Create a gist now

Instantly share code, notes, and snippets.

@nahali /
Last active Dec 22, 2015

from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item
def process_item(self, item, spider):
if == 'vinNico3':
print "inside process_item"
#class Projetvinnicolas3Pipeline(object):
# def process_item(self, item, spider):
# return item

You have to call the superclass' method and return its result
otherwise you are discarding all items

    def process_item(self, item, spider):
        if == 'vinNico3':
            print "inside process_item"
        return super(MyImagesPipeline, self).process_item(item, spider)

You can also NOT define the method.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment