Created
April 5, 2016 22:17
-
-
Save redapple/e2b4ac795443de1f611216fa3c4fb681 to your computer and use it in GitHub Desktop.
stackoverflow 36391781 error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ scrapy crawl httpbin | |
2016-04-06 00:16:58 [scrapy] INFO: Scrapy 1.1.0rc3 started (bot: mwtest) | |
2016-04-06 00:16:58 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'mwtest.spiders', 'SPIDER_MODULES': ['mwtest.spiders'], 'BOT_NAME': 'mwtest'} | |
2016-04-06 00:16:58 [scrapy] INFO: Enabled extensions: | |
['scrapy.extensions.logstats.LogStats', | |
'scrapy.extensions.telnet.TelnetConsole', | |
'scrapy.extensions.corestats.CoreStats'] | |
2016-04-06 00:16:58 [py.warnings] WARNING: /home/paul/tmp/mwtest/mwtest/middlewares.py:1: ScrapyDeprecationWarning: Module `scrapy.log` has been deprecated, Scrapy now relies on the builtin Python library for logging. Read the updated logging entry in the documentation to learn more. | |
from scrapy import log, signals | |
2016-04-06 00:16:58 [scrapy] INFO: Enabled downloader middlewares: | |
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', | |
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', | |
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', | |
'scrapy.downloadermiddlewares.retry.RetryMiddleware', | |
'mwtest.middlewares.TestMiddleware', | |
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', | |
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', | |
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', | |
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', | |
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', | |
'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware', | |
'scrapy.downloadermiddlewares.stats.DownloaderStats'] | |
2016-04-06 00:16:58 [scrapy] INFO: Enabled spider middlewares: | |
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', | |
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', | |
'scrapy.spidermiddlewares.referer.RefererMiddleware', | |
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', | |
'scrapy.spidermiddlewares.depth.DepthMiddleware'] | |
2016-04-06 00:16:58 [scrapy] INFO: Enabled item pipelines: | |
[] | |
2016-04-06 00:16:58 [scrapy] INFO: Spider opened | |
2016-04-06 00:16:58 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.open_spider() | |
2016-04-06 00:16:58 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023 | |
2016-04-06 00:16:58 [scrapy] ERROR: Error caught on signal handler: <bound method ?.process of <mwtest.middlewares.TestMiddleware object at 0x7f83d4a73f50>> | |
Traceback (most recent call last): | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/scrapy/utils/signal.py", line 30, in send_catch_log | |
*arguments, **named) | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply | |
return receiver(*arguments, **named) | |
File "/home/paul/tmp/mwtest/mwtest/middlewares.py", line 26, in process | |
raise Exception | |
Exception | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.process_request() | |
2016-04-06 00:16:58 [scrapy] ERROR: Error caught on signal handler: <bound method ?.process of <mwtest.middlewares.TestMiddleware object at 0x7f83d4a73f50>> | |
Traceback (most recent call last): | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/scrapy/utils/signal.py", line 30, in send_catch_log | |
*arguments, **named) | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply | |
return receiver(*arguments, **named) | |
File "/home/paul/tmp/mwtest/mwtest/middlewares.py", line 26, in process | |
raise Exception | |
Exception | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.process_request() | |
2016-04-06 00:16:58 [scrapy] ERROR: Error caught on signal handler: <bound method ?.process of <mwtest.middlewares.TestMiddleware object at 0x7f83d4a73f50>> | |
Traceback (most recent call last): | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/scrapy/utils/signal.py", line 30, in send_catch_log | |
*arguments, **named) | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply | |
return receiver(*arguments, **named) | |
File "/home/paul/tmp/mwtest/mwtest/middlewares.py", line 26, in process | |
raise Exception | |
Exception | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.process_request() | |
2016-04-06 00:16:58 [scrapy] ERROR: Error caught on signal handler: <bound method ?.process of <mwtest.middlewares.TestMiddleware object at 0x7f83d4a73f50>> | |
Traceback (most recent call last): | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/scrapy/utils/signal.py", line 30, in send_catch_log | |
*arguments, **named) | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply | |
return receiver(*arguments, **named) | |
File "/home/paul/tmp/mwtest/mwtest/middlewares.py", line 26, in process | |
raise Exception | |
Exception | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.process_request() | |
2016-04-06 00:16:58 [scrapy] ERROR: Error caught on signal handler: <bound method ?.process of <mwtest.middlewares.TestMiddleware object at 0x7f83d4a73f50>> | |
Traceback (most recent call last): | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/scrapy/utils/signal.py", line 30, in send_catch_log | |
*arguments, **named) | |
File "/home/paul/.virtualenvs/scrapy11rc3.py27/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply | |
return receiver(*arguments, **named) | |
File "/home/paul/tmp/mwtest/mwtest/middlewares.py", line 26, in process | |
raise Exception | |
Exception | |
2016-04-06 00:16:58 [httpbin] INFO: TestMiddleware.process_request() | |
2016-04-06 00:16:58 [scrapy] DEBUG: Crawled (200) <GET http://www.httpbin.org/user-agent> (referer: None) | |
2016-04-06 00:16:58 [scrapy] DEBUG: Crawled (200) <GET http://www.httpbin.org/headers> (referer: None) | |
2016-04-06 00:16:58 [scrapy] DEBUG: Crawled (200) <GET http://www.httpbin.org/> (referer: None) | |
2016-04-06 00:16:58 [scrapy] INFO: Closing spider (finished) | |
2016-04-06 00:16:58 [scrapy] INFO: Dumping Scrapy stats: | |
{'downloader/exception_count': 2, | |
'downloader/exception_type_count/scrapy.exceptions.IgnoreRequest': 2, | |
'downloader/request_bytes': 665, | |
'downloader/request_count': 3, | |
'downloader/request_method_count/GET': 3, | |
'downloader/response_bytes': 13006, | |
'downloader/response_count': 3, | |
'downloader/response_status_count/200': 3, | |
'finish_reason': 'finished', | |
'finish_time': datetime.datetime(2016, 4, 5, 22, 16, 58, 641313), | |
'log_count/DEBUG': 4, | |
'log_count/ERROR': 5, | |
'log_count/INFO': 13, | |
'log_count/WARNING': 1, | |
'response_received_count': 3, | |
'scheduler/dequeued': 5, | |
'scheduler/dequeued/memory': 5, | |
'scheduler/enqueued': 5, | |
'scheduler/enqueued/memory': 5, | |
'start_time': datetime.datetime(2016, 4, 5, 22, 16, 58, 221894)} | |
2016-04-06 00:16:58 [scrapy] INFO: Spider closed (finished) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
class HttpbinSpider(scrapy.Spider): | |
name = "httpbin" | |
allowed_domains = ["httpbin.org"] | |
start_urls = ( | |
'http://www.httpbin.org/', | |
'http://www.httpbin.org/ip', | |
'http://www.httpbin.org/user-agent', | |
'http://www.httpbin.org/get', | |
'http://www.httpbin.org/headers', | |
) | |
def parse(self, response): | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy import log, signals | |
from scrapy.exceptions import IgnoreRequest | |
class TestMiddleware(object): | |
def __init__(self, crawler): | |
self.counter = 0 | |
@classmethod | |
def from_crawler(cls, crawler): | |
o = cls(crawler) | |
crawler.signals.connect(o.open_spider, signals.spider_opened) | |
# this raise an exception always and will trigger errors in the console | |
crawler.signals.connect(o.process, signals.request_scheduled) | |
return o | |
def open_spider(self, spider): | |
spider.logger.info('TestMiddleware.open_spider()') | |
def process_request(self, request, spider): | |
spider.logger.info('TestMiddleware.process_request()') | |
self.counter += 1 | |
if (self.counter % 2) == 0: | |
raise IgnoreRequest("ignoring request %d" % self.counter) | |
def process(self, *args, **kwargs): | |
raise Exception |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment