Skip to content

Instantly share code, notes, and snippets.

<!DOCTYPE html><html itemscope="" itemtype="http://schema.org/Place" lang="en-RU"> <head> <link href="/maps/_/js/k=maps.m.en.GBzd5ZChrUs.es5.O/m=sc2,per,mo,lp,ep,ti,ds,stx,dwi,enr,pwd,dw,plm,log,b/am=BgBGAxI/rt=j/d=1/rs=ACT90oEgzA4NjNg7VA4qKaAf-t7UWDH1Ng?wli=m.vOb3cCZHKaI.loadSv.O%3A%3B" as="script" rel="preload" type="application/javascript" nonce="U_IB5RFP8128Yb-UrmQ6Dg"> <link href="/maps/preview/opensearch.xml?hl=en" title="Google Maps" rel="search" type="application/opensearchdescription+xml"> <title> Google Maps </title> <meta content="Find local businesses, view maps and get driving directions in Google Maps." name="Description"> <meta content="Anm+hhtuh7NJguqSnXHEAIqqMaV+GXCks8WYXHJKF7l6AeYMj+wO+fi9OdDqFnJTg9t0492DykVxx4jpvFbxnA8AAABseyJvcmlnaW4iOiJodHRwczovL2dvb2dsZS5jb206NDQzIiwiZmVhdHVyZSI6IlByaXZhY3lTYW5kYm94QWRzQVBJcyIsImV4cGlyeSI6MTY5NTE2Nzk5OSwiaXNTdWJkb21haW4iOnRydWV9" http-equiv="origin-trial"> <meta content="initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name=
@Mifody
Mifody / tmp1.md
Last active March 26, 2024 18:19

Вытаскивание продавца

  1. запрос на https://api.ozon.ru/composer-api.bx/page/json/v1?url=/products/[тут ID продукта]/?layout_container=appPdpPage3&layout_page_index=3
  2. в узле "layout" найти узел с параметром component=seller
  3. из найденного узла взять stateId (например seller-814807-appPdpPage3-3)
  4. в узле pdp найти узел с следующим полным xpath /pdb/seller/seller-814807-appPdpPage3-3 последняя ветка это параметр из предыдущего пункта
  5. найденный узел и будет информацией о продавце

Описание и характеристики товара

curl --location --request POST 'https://kiev.prom.ua/graphql'
--header 'Content-Type: application/json'
--data-raw '[{"variables":{"categoryId":0,"target":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"PromoPanelQuery","query":"query PromoPanelQuery($categoryId: Intu0021, $target: Stringu0021, $path: Stringu0021) {\n promoPanel(category_id: $categoryId, target: $target, path: $path) {\n isAvailable\n url\n data {\n id\n image\n text\n textColor\n gradientStart\n gradientStop\n url\n urlTarget\n __typename\n }\n __typename\n }\n}\n"},{"variables":{"pageName":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"ListingBannerQuery","query":"query ListingBannerQuery($currentCategoryId: Int, $pageName: Stringu0021, $path: Stringu0021) {\n listingBanner(category_id: $currentCategoryId, target: $pageName, path: $path) {\n isAvailable\n url\

(scrapy) developer@ip-172-31-36-21:~/cosmetics_catalogs$ python test.py
Traceback (most recent call last):
File "test.py", line 13, in <module>
asyncio.run(main())
File "/usr/lib/python3.8/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "test.py", line 7, in main
browser = await browser_type.launch()
def process_request(self, request, spider):
cookiejarkey = request.meta.get('proxy') or request.meta.get("cookiejar")
proxy = request.meta.get('proxy')
token = self.tokens.get(cookiejarkey)
# req = request.copy()
if token:
s = ''
session = requests.Session()
headers = self.headers
headers['User-Agent'] = self.ua
import os
import logging
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from notifiers.logging import NotificationHandler
from os import getenv
from dotenv import load_dotenv
from datetime import datetime
load_dotenv()
@Mifody
Mifody / pipelines.py
Created February 15, 2021 08:23
scrapy Сохранение изображений с кастомным именем
from itemadapter import ItemAdapter
from shutterstock.db_utils import db_handle, CatalogModel
from scrapy.pipelines.images import ImagesPipeline
from scrapy import Request
import json
class ShutterstockImagePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
url = ItemAdapter(item).get('thumb_url')
@Mifody
Mifody / pipelines.py
Last active February 5, 2021 11:39
scrapy запись в базу чанками
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
from rssparser.db_utils import *
from rssparser.items import *
import logging