Skip to content

Instantly share code, notes, and snippets.

@Mifody
Created February 15, 2021 08:23
Show Gist options
  • Save Mifody/e5f46ad99686b3d27783bee282c1f0ec to your computer and use it in GitHub Desktop.
Save Mifody/e5f46ad99686b3d27783bee282c1f0ec to your computer and use it in GitHub Desktop.
scrapy Сохранение изображений с кастомным именем
from itemadapter import ItemAdapter
from shutterstock.db_utils import db_handle, CatalogModel
from scrapy.pipelines.images import ImagesPipeline
from scrapy import Request
import json
class ShutterstockImagePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
url = ItemAdapter(item).get('thumb_url')
if url:
yield Request(url, meta={'str_id': item.get('str_id'), 'type': '_small.jpg'})
url = ItemAdapter(item).get('image_url')
if url:
yield Request(url, meta={'str_id': item.get('str_id'), 'type': '_large.jpg'})
return
def file_path(self, request, response=None, info=None, *, item=None):
_id = request.meta.get('str_id')
_type = request.meta.get('type')
image_name = str(_id) + _type
return image_name
def item_completed(self, results, item, info):
for ok, x in results:
if ok:
if '_small' in x['path']:
item['thumb_file'] = x['path']
elif '_large' in x['path']:
item['image_file'] = x['path']
return item
class ShutterstockPipeline:
def __init__(self):
self.items = []
super(ShutterstockPipeline, self).__init__()
def process_item(self, item, spider):
self.items.append(dict(item))
if len(self.items) > 1000:
self.flush_data()
return item
def close_spider(self, spider):
if len(self.items) > 0:
self.flush_data()
def flush_data(self):
with db_handle.atomic() as transaction:
# try:
for item in self.items:
rec = {
'status': item['status'],
'data': item
}
CatalogModel.update(**rec).where(CatalogModel.id == item['id']).execute()
transaction.commit()
self.items = []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment