Created
December 23, 2012 18:43
-
-
Save bcambel/4365185 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# You can use this middleware to have a random user agent every request the spider makes. | |
# You can define a user USER_AGEN_LIST in your settings and the spider will chose a random user agent from that list every time. | |
# | |
# You will have to disable the default user agent middleware and add this to your settings file. | |
# | |
# DOWNLOADER_MIDDLEWARES = { | |
# 'scraper.random_user_agent.RandomUserAgentMiddleware': 400, | |
# 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, | |
# } | |
from scraper.settings import USER_AGENT_LIST | |
import random | |
from scrapy import log | |
class RandomUserAgentMiddleware(object): | |
def process_request(self, request, spider): | |
ua = random.choice(USER_AGENT_LIST) | |
if ua: | |
request.headers.setdefault('User-Agent', ua) | |
#log.msg('>>>> UA %s'%request.headers) | |
# Snippet imported from snippets.scrapy.org (which no longer works) | |
# author: dushyant | |
# date : Sep 16, 2011 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment