Skip to content

Instantly share code, notes, and snippets.

@ziplokk1
Created July 10, 2017 01:18
Show Gist options
  • Save ziplokk1/69926a037478675b0c26cdc8ded4c109 to your computer and use it in GitHub Desktop.
Save ziplokk1/69926a037478675b0c26cdc8ded4c109 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# Scrapy settings for ScrapyChip project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'amazonbot'
SPIDER_MODULES = ['project.spiders']
NEWSPIDER_MODULE = 'project.spiders'
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0'
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
'Connection': 'keep-alive'
}
# Pipelines Redacted
DOWNLOADER_MIDDLEWARES = {
'captchabuster.RobotMiddleware': 90,
'project.middleware.PauseOnStatusMiddleware': 501
}
TIMEOUT_STATUS_CODES = [503]
CONCURRENT_REQUESTS = 4
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 5
LOG_LEVEL = 'INFO'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment