scrapy 联合redis实现去重

2019-12-06

# redis去重url,需要在每次爬取时将url存入redis中(在pipeline中写)
'''
from redis import Redis
class IngoreRequestMiddleware(object):
    def __init__(self):
        self.Redis = Redis(host="127.0.0.1" port=6379,password="sunck")
    def process_request(self,request,spider):
        # redis判断hash类型的key的属性是否存在
        if self.Redis.hexists("request_url",request.url):
            raise IgnoreRequest("IgnoreRequest : %s" % request.url)
        else:
            return None

{/if}