Created
February 15, 2021 08:23
-
-
Save Mifody/e5f46ad99686b3d27783bee282c1f0ec to your computer and use it in GitHub Desktop.
scrapy Сохранение изображений с кастомным именем
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itemadapter import ItemAdapter | |
from shutterstock.db_utils import db_handle, CatalogModel | |
from scrapy.pipelines.images import ImagesPipeline | |
from scrapy import Request | |
import json | |
class ShutterstockImagePipeline(ImagesPipeline): | |
def get_media_requests(self, item, info): | |
url = ItemAdapter(item).get('thumb_url') | |
if url: | |
yield Request(url, meta={'str_id': item.get('str_id'), 'type': '_small.jpg'}) | |
url = ItemAdapter(item).get('image_url') | |
if url: | |
yield Request(url, meta={'str_id': item.get('str_id'), 'type': '_large.jpg'}) | |
return | |
def file_path(self, request, response=None, info=None, *, item=None): | |
_id = request.meta.get('str_id') | |
_type = request.meta.get('type') | |
image_name = str(_id) + _type | |
return image_name | |
def item_completed(self, results, item, info): | |
for ok, x in results: | |
if ok: | |
if '_small' in x['path']: | |
item['thumb_file'] = x['path'] | |
elif '_large' in x['path']: | |
item['image_file'] = x['path'] | |
return item | |
class ShutterstockPipeline: | |
def __init__(self): | |
self.items = [] | |
super(ShutterstockPipeline, self).__init__() | |
def process_item(self, item, spider): | |
self.items.append(dict(item)) | |
if len(self.items) > 1000: | |
self.flush_data() | |
return item | |
def close_spider(self, spider): | |
if len(self.items) > 0: | |
self.flush_data() | |
def flush_data(self): | |
with db_handle.atomic() as transaction: | |
# try: | |
for item in self.items: | |
rec = { | |
'status': item['status'], | |
'data': item | |
} | |
CatalogModel.update(**rec).where(CatalogModel.id == item['id']).execute() | |
transaction.commit() | |
self.items = [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment