scrapy源码阅读

cooolr 于 2021-06-29 发布
import logging
import requests
from twisted.internet import reactor, defer

logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(message)s")

def getErr(failure):
    print(failure)
    reactor.callLater(0, reactor.stop)

def saveData(result):
    logging.debug("call saveData...")
    with open("test.jpg", "wb") as f:
        f.write(result)
    reactor.callLater(0, reactor.stop)

def getData(result):
    logging.debug("call getData...")
    return requests.get(result, timeout=1).content

def start():
    logging.debug("call start...")
    img_url = "https://oss4.baidu.com/tnaot/image/2021/06/15/4e1b8590e98242449569ac9236553522.jpg"
    deferred = defer.Deferred()
    reactor.callLater(0, deferred.callback, img_url)
    return deferred

deferred = start()
deferred.addCallbacks(getData, getErr).addCallbacks(saveData, getErr)
reactor.run()

inlineCallbacks实现

import time
import logging
import requests
from twisted.internet import reactor, defer
from twisted.internet.defer import inlineCallbacks

logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(message)s")

@inlineCallbacks
def saveData(result):
    logging.debug("call saveData...")
    with open("test.jpg", "wb") as f:
        f.write(result)
    yield reactor.stop()

@inlineCallbacks
def getData(result):
    logging.debug("call getData...")
    try:
        content = yield requests.get(result).content
    except Exception as e:
        logging.error(e)
        content = b''
    return content

def start():
    logging.debug("call start...")
    img_url = "https://oss4.baidu.com/tnaot/image/2021/06/15/4e1b8590e98242449569ac9236553522.jpg"
    deferred = defer.Deferred()
    reactor.callLater(0, deferred.callback, img_url)
    return deferred

deferred = start()
deferred.addCallback(getData).addCallback(saveData)

reactor.run()
logging.debug('this is my time')