Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Scrapy middlewares for random agent list and proxy server usage.
import os
import random
from scrapy.conf import settings
class RandomUserAgentMiddleware(object):
def process_request(self, request, spider):
ua = random.choice(settings.get('USER_AGENT_LIST'))
if ua:
request.headers.setdefault('User-Agent', ua)
class ProxyMiddleware(object):
def process_request(self, request, spider):
request.meta['proxy'] = settings.get('HTTP_PROXY')
# More comprehensive list can be found at
# http://techpatterns.com/forums/about304.html
USER_AGENT_LIST = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'
]
DOWNLOADER_MIDDLEWARES = {
'myproject.middlewares.RandomUserAgentMiddleware': 400,
'myproject.middlewares.ProxyMiddleware': 410,
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,
# Disable compression middleware, so the actual HTML pages are cached
}
@Caner112

Hello,
Can i see what you have in from scrapy.conf import settings?
Thank you in advance.

Edit: Ignore this, my bad

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment