- запрос на
https://api.ozon.ru/composer-api.bx/page/json/v1?url=/products/[тут ID продукта]/?layout_container=appPdpPage3&layout_page_index=3
- в узле "layout" найти узел с параметром component=seller
- из найденного узла взять stateId (например seller-814807-appPdpPage3-3)
- в узле pdp найти узел с следующим полным xpath /pdb/seller/seller-814807-appPdpPage3-3 последняя ветка это параметр из предыдущего пункта
- найденный узел и будет информацией о продавце
<!DOCTYPE html><html itemscope="" itemtype="http://schema.org/Place" lang="en-RU"> <head> <link href="/maps/_/js/k=maps.m.en.GBzd5ZChrUs.es5.O/m=sc2,per,mo,lp,ep,ti,ds,stx,dwi,enr,pwd,dw,plm,log,b/am=BgBGAxI/rt=j/d=1/rs=ACT90oEgzA4NjNg7VA4qKaAf-t7UWDH1Ng?wli=m.vOb3cCZHKaI.loadSv.O%3A%3B" as="script" rel="preload" type="application/javascript" nonce="U_IB5RFP8128Yb-UrmQ6Dg"> <link href="/maps/preview/opensearch.xml?hl=en" title="Google Maps" rel="search" type="application/opensearchdescription+xml"> <title> Google Maps </title> <meta content="Find local businesses, view maps and get driving directions in Google Maps." name="Description"> <meta content="Anm+hhtuh7NJguqSnXHEAIqqMaV+GXCks8WYXHJKF7l6AeYMj+wO+fi9OdDqFnJTg9t0492DykVxx4jpvFbxnA8AAABseyJvcmlnaW4iOiJodHRwczovL2dvb2dsZS5jb206NDQzIiwiZmVhdHVyZSI6IlByaXZhY3lTYW5kYm94QWRzQVBJcyIsImV4cGlyeSI6MTY5NTE2Nzk5OSwiaXNTdWJkb21haW4iOnRydWV9" http-equiv="origin-trial"> <meta content="initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name= |
curl --location --request POST 'https://kiev.prom.ua/graphql'
--header 'Content-Type: application/json'
--data-raw '[{"variables":{"categoryId":0,"target":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"PromoPanelQuery","query":"query PromoPanelQuery($categoryId: Intu0021, $target: Stringu0021, $path: Stringu0021) {\n promoPanel(category_id: $categoryId, target: $target, path: $path) {\n isAvailable\n url\n data {\n id\n image\n text\n textColor\n gradientStart\n gradientStop\n url\n urlTarget\n __typename\n }\n __typename\n }\n}\n"},{"variables":{"pageName":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"ListingBannerQuery","query":"query ListingBannerQuery($currentCategoryId: Int, $pageName: Stringu0021, $path: Stringu0021) {\n listingBanner(category_id: $currentCategoryId, target: $pageName, path: $path) {\n isAvailable\n url\
(scrapy) developer@ip-172-31-36-21:~/cosmetics_catalogs$ python test.py | |
Traceback (most recent call last): | |
File "test.py", line 13, in <module> | |
asyncio.run(main()) | |
File "/usr/lib/python3.8/asyncio/runners.py", line 44, in run | |
return loop.run_until_complete(main) | |
File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete | |
return future.result() | |
File "test.py", line 7, in main | |
browser = await browser_type.launch() |
def process_request(self, request, spider): | |
cookiejarkey = request.meta.get('proxy') or request.meta.get("cookiejar") | |
proxy = request.meta.get('proxy') | |
token = self.tokens.get(cookiejarkey) | |
# req = request.copy() | |
if token: | |
s = '' | |
session = requests.Session() | |
headers = self.headers | |
headers['User-Agent'] = self.ua |
import os | |
import logging | |
from scrapy.utils.project import get_project_settings | |
from scrapy.utils.log import configure_logging | |
from notifiers.logging import NotificationHandler | |
from os import getenv | |
from dotenv import load_dotenv | |
from datetime import datetime | |
load_dotenv() |
from itemadapter import ItemAdapter | |
from shutterstock.db_utils import db_handle, CatalogModel | |
from scrapy.pipelines.images import ImagesPipeline | |
from scrapy import Request | |
import json | |
class ShutterstockImagePipeline(ImagesPipeline): | |
def get_media_requests(self, item, info): | |
url = ItemAdapter(item).get('thumb_url') |
# -*- coding: utf-8 -*- | |
# Define your item pipelines here | |
# | |
# Don't forget to add your pipeline to the ITEM_PIPELINES setting | |
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html | |
from rssparser.db_utils import * | |
from rssparser.items import * | |
import logging |