Skip to content

Instantly share code, notes, and snippets.

@rennerocha
Created April 2, 2019 15:29
Show Gist options
  • Save rennerocha/a0dd2fd98fa3f74715247236e6a23c87 to your computer and use it in GitHub Desktop.
Save rennerocha/a0dd2fd98fa3f74715247236e6a23c87 to your computer and use it in GitHub Desktop.
import datetime
import os
from scrapy import signals
from scrapy.exceptions import NotConfigured
from scrapy.utils.project import data_path
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime.datetime, datetime.date)):
return obj.isoformat()
raise TypeError ("Type %s not serializable" % type(obj))
class StatsStore(object):
def __init__(self, stats):
self.stats = stats
self.statsdir = data_path('stats', createdir=True)
@classmethod
def from_crawler(cls, crawler):
o = cls(crawler.stats)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
return o
def spider_closed(self, spider):
spider_stats = self.stats.get_stats(spider)
key = int(datetime.datetime.now().timestamp())
stats_f = os.path.join(self.statsdir, str(key))
import json
with open(stats_f, "w") as s_f:
s_f.write(json.dumps(spider_stats, default=json_serial))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment