Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
item_scrapped signal
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import scrapy
from scrapy import signals, log
from scrapy.crawler import Crawler
from scrapy.xlib.pydispatch import dispatcher
from scrapy.utils.project import get_project_settings
from twisted.internet import reactor
class TestItem(scrapy.Item):
"""Test item"""
class TestSpider(scrapy.Spider):
name = 'testspider'
start_urls = ['https://github.com']
def parse(self, response):
return TestItem()
class TestCase(object):
def _item_passed(self, item, response, spider):
print "PASSED"
def _item_passed_2(self, item, response, spider):
print "PASSED-DISP"
def _spider_opened(self, spider):
print "spider opened"
def run(self):
spider = TestSpider()
settings = get_project_settings()
crawler = Crawler(settings)
crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
crawler.signals.connect(self._item_passed, signal=signals.item_scraped)
crawler.signals.connect(self._spider_opened, signal=signals.spider_opened)
dispatcher.connect(self._item_passed_2, signals.item_scraped)
crawler.install()
crawler.configure()
crawler.crawl(spider)
crawler.start()
# log.start(loglevel=log.INFO, logstdout=False)
log.start(loglevel=log.INFO)
reactor.run()
if __name__ == '__main__':
TestCase().run()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import scrapy
from scrapy import signals
from scrapy.crawler import CrawlerRunner
from scrapy.settings import Settings
from scrapy.xlib.pydispatch import dispatcher
from twisted.internet import reactor
class TestItem(scrapy.Item):
"""Test item"""
class TestSpider(scrapy.Spider):
name = 'testspider'
start_urls = ['https://github.com']
def parse(self, response):
return TestItem()
class TestCase(object):
def _item_passed(self, item, response, spider):
print "PASSED"
def _item_passed_2(self, item, response, spider):
print "PASSED-DISP"
def _spider_opened(self, spider):
print "spider opened"
def run(self):
runner = CrawlerRunner(Settings())
d = runner.crawl(TestSpider)
d.addBoth(lambda _: reactor.stop())
crawler = list(runner.crawlers)[0]
crawler.signals.connect(self._item_passed, signal=signals.item_scraped)
crawler.signals.connect(self._spider_opened, signal=signals.spider_opened)
dispatcher.connect(self._item_passed_2, signals.item_scraped)
reactor.run()
if __name__ == '__main__':
TestCase().run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.