Skip to content

Instantly share code, notes, and snippets.

@siddhantkushwaha
Created June 18, 2019 06:53
Show Gist options
  • Save siddhantkushwaha/24f38bf6e8024aeccba4e2fb1a5ac338 to your computer and use it in GitHub Desktop.
Save siddhantkushwaha/24f38bf6e8024aeccba4e2fb1a5ac338 to your computer and use it in GitHub Desktop.
This gist contains code snippet for allowing a scrapy spider to be called via a function and return the data.
from scrapy.crawler import CrawlerProcess
from scrapy_tasks.spiders.trial import *
class CustomCrawler:
def __init__(self):
self.output = None
self.process = CrawlerProcess(settings={'LOG_ENABLED': False})
def yield_output(self, data):
self.output = data
def crawl(self, cls):
self.process.crawl(cls, args={'callback': self.yield_output})
self.process.start()
def crawl_static(cls):
crawler = CustomCrawler()
crawler.crawl(cls)
return crawler.output
if __name__ == '__main__':
out = crawl_static(Trial)
print(out)
from scrapy import Spider
class Trial(Spider):
name = 'trial'
start_urls = ['']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.output_callback = kwargs.get('args').get('callback')
def parse(self, response):
pass
def close(self, spider, reason):
self.output_callback(['Hi, This is the output.'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment