Scrapy修改中间件

cooolr 于 2022-09-15 发布
from scrapy.http import TextResponse

class ProxyMiddleware(object):
    def process_request(self, request, spider):
        if spider.name == "khmer_spider":
            # 增加代理
            request.meta["proxy"] = proxies
        elif request.url == "http://baidu.com":
            # 自定义返回文本
            return TextResponse(url=request.url, body=b"hello world", request=request)
        elif "/api/pc/list/feed" in request.url:
            new_url = request.url + "&hello=world"
            # 修改url
            request._set_url(new_url)
        elif "www.toutiao.com/a" in request.url:
            # 修改headers
            request.headers['Cookie'] =  request.headers['Cookie'] + b"; a=123"
        if request.meta['type'] in ['image', 'video']:
            # 过滤资源请求
            pass