Skip to content

Instantly share code, notes, and snippets.

@nillia
Created March 21, 2024 14:51
Show Gist options
  • Save nillia/3adcbc093071dc90b95752f1375c2e8f to your computer and use it in GitHub Desktop.
Save nillia/3adcbc093071dc90b95752f1375c2e8f to your computer and use it in GitHub Desktop.
from billiard.context import Process
from scrapy.crawler import Crawler
from scrapy import signals
from scrapy.utils.project import get_project_settings
from twisted.internet import reactor
from celery_app import app
class CrawlerProcess(Process):
def __init__(self, spider):
Process.__init__(self)
settings = get_project_settings()
self.crawler = Crawler(spider.__class__, settings)
self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
self.spider = spider
def run(self):
self.crawler.crawl(self.spider)
reactor.run()
@app.task
def crawl(spider_cls, *args, **kwargs):
spider = spider_cls(*args, **kwargs)
crawler = CrawlerProcess(spider)
crawler.start()
crawler.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment