Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
custom item exporter of Scrapy
from scrapy.contrib.exporter import BaseItemExporter
from scrapy import signals, log
from pipeline_base import StorePipeline
from os.path import join
class CustomItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
@staticmethod
def format_output(item):
# add code to generate output you want to
pass
def export_item(self, item):
output = '%s\n' % (sef.format_output(item))
self.file.write(output)
class FsLinesPipeline(StorePipeline):
def __init__(self, data_path):
self.files = {}
self.data_path = data_path
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
pipeline = cls(data_path=settings.get('DATA_PATH'))
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open(join(self.data_path, spider.output_file), 'w+b')
self.files[spider] = file
self.exporter = DelimitedItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment