dogweather/scrapy_example_return_tree_object.py

## scrapy_example_return_tree_object.py
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
    """Register to receive the idle event"""
    spider = super(SecureSosStateOrUsSpider, cls).from_crawler(
        crawler, *args, **kwargs
    )
    crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle)
    return spider

def spider_idle(self, spider):
    """Schedule a simple request in order to return the collected data"""
    if self.data_submitted:
        return

    # This is a hack: I don't yet know how to schedule a request to just
    # submit data _without_ also triggering a scrape. So I provide a URL
    # to a simple site that we're going to ignore.
    null_request = scrapy.Request("http://neverssl.com/", callback=self.submit_data)
    self.crawler.engine.schedule(null_request, spider)
    raise scrapy.exceptions.DontCloseSpider

def submit_data(self, _):
    """Simply return the collection of all the scraped data. Ignore the actual
    scraped content. I haven't figured out another way to submit the merged
    results.

    To be used as a callback when the spider is idle (i.e., has finished scraping.)
    """
    self.data_submitted = True
    return self.sportsInventory
	@classmethod
	def from_crawler(cls, crawler, args, *kwargs):
	"""Register to receive the idle event"""
	spider = super(SecureSosStateOrUsSpider, cls).from_crawler(
	crawler, args, *kwargs
	)
	crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle)
	return spider

	def spider_idle(self, spider):
	"""Schedule a simple request in order to return the collected data"""
	if self.data_submitted:
	return

	# This is a hack: I don't yet know how to schedule a request to just
	# submit data _without_ also triggering a scrape. So I provide a URL
	# to a simple site that we're going to ignore.
	null_request = scrapy.Request("http://neverssl.com/", callback=self.submit_data)
	self.crawler.engine.schedule(null_request, spider)
	raise scrapy.exceptions.DontCloseSpider

	def submit_data(self, _):
	"""Simply return the collection of all the scraped data. Ignore the actual
	scraped content. I haven't figured out another way to submit the merged
	results.

	To be used as a callback when the spider is idle (i.e., has finished scraping.)
	"""
	self.data_submitted = True
	return self.sportsInventory