public
Last active

  • Download Gist
pipelines.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
 
class MyImagesPipeline(ImagesPipeline):
 
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
 
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item
def process_item(self, item, spider):
if spider.name == 'vinNico3':
print "inside process_item"
#class Projetvinnicolas3Pipeline(object):
# def process_item(self, item, spider):
# return item

You have to call the superclass' method and return its result
otherwise you are discarding all items

    def process_item(self, item, spider):
        if spider.name == 'vinNico3':
            print "inside process_item"
        return super(MyImagesPipeline, self).process_item(item, spider)

You can also NOT define the method.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.