Skip to content

Instantly share code, notes, and snippets.

@kezhenxu94
Created March 30, 2018 08:38
Show Gist options
  • Save kezhenxu94/9da7e8b84055e146a4d8eb836ea2154b to your computer and use it in GitHub Desktop.
Save kezhenxu94/9da7e8b84055e146a4d8eb836ea2154b to your computer and use it in GitHub Desktop.
pipelines.py
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy import Request
from scrapy.pipelines.images import ImagesPipeline
class MmonlyCcPipeline(object):
def process_item(self, item, spider):
return item
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
return [Request(x, meta={
'item': item
}) for x in item.get(self.images_urls_field, [])]
def file_path(self, request, response=None, info=None):
super_file_path = super(MyImagesPipeline, self).file_path(request, response, info)
if 'item' not in request.meta:
return super_file_path
item = request.meta['item']
if 'title' not in item:
return super_file_path
if 'category' not in item:
return super_file_path
title = item['title']
category = item['category']
return '%s/%s/%s' % (category, title, super_file_path.replace('full/', ''))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment