curl --location --request POST 'https://kiev.prom.ua/graphql'
--header 'Content-Type: application/json'
--data-raw '[{"variables":{"categoryId":0,"target":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"PromoPanelQuery","query":"query PromoPanelQuery($categoryId: Intu0021, $target: Stringu0021, $path: Stringu0021) {\n promoPanel(category_id: $categoryId, target: $target, path: $path) {\n isAvailable\n url\n data {\n id\n image\n text\n textColor\n gradientStart\n gradientStop\n url\n urlTarget\n __typename\n }\n __typename\n }\n}\n"},{"variables":{"pageName":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"ListingBannerQuery","query":"query ListingBannerQuery($currentCategoryId: Int, $pageName: Stringu0021, $path: Stringu0021) {\n listingBanner(category_id: $currentCategoryId, target: $pageName, path: $path) {\n isAvailable\n url\
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Define your item pipelines here | |
# | |
# Don't forget to add your pipeline to the ITEM_PIPELINES setting | |
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html | |
from rssparser.db_utils import * | |
from rssparser.items import * | |
import logging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itemadapter import ItemAdapter | |
from shutterstock.db_utils import db_handle, CatalogModel | |
from scrapy.pipelines.images import ImagesPipeline | |
from scrapy import Request | |
import json | |
class ShutterstockImagePipeline(ImagesPipeline): | |
def get_media_requests(self, item, info): | |
url = ItemAdapter(item).get('thumb_url') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
from scrapy.utils.project import get_project_settings | |
from scrapy.utils.log import configure_logging | |
from notifiers.logging import NotificationHandler | |
from os import getenv | |
from dotenv import load_dotenv | |
from datetime import datetime | |
load_dotenv() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process_request(self, request, spider): | |
cookiejarkey = request.meta.get('proxy') or request.meta.get("cookiejar") | |
proxy = request.meta.get('proxy') | |
token = self.tokens.get(cookiejarkey) | |
# req = request.copy() | |
if token: | |
s = '' | |
session = requests.Session() | |
headers = self.headers | |
headers['User-Agent'] = self.ua |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(scrapy) developer@ip-172-31-36-21:~/cosmetics_catalogs$ python test.py | |
Traceback (most recent call last): | |
File "test.py", line 13, in <module> | |
asyncio.run(main()) | |
File "/usr/lib/python3.8/asyncio/runners.py", line 44, in run | |
return loop.run_until_complete(main) | |
File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete | |
return future.result() | |
File "test.py", line 7, in main | |
browser = await browser_type.launch() |
- запрос на
https://api.ozon.ru/composer-api.bx/page/json/v1?url=/products/[тут ID продукта]/?layout_container=appPdpPage3&layout_page_index=3
- в узле "layout" найти узел с параметром component=seller
- из найденного узла взять stateId (например seller-814807-appPdpPage3-3)
- в узле pdp найти узел с следующим полным xpath /pdb/seller/seller-814807-appPdpPage3-3 последняя ветка это параметр из предыдущего пункта
- найденный узел и будет информацией о продавце
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html><html itemscope="" itemtype="http://schema.org/Place" lang="en-RU"> <head> <link href="/maps/_/js/k=maps.m.en.GBzd5ZChrUs.es5.O/m=sc2,per,mo,lp,ep,ti,ds,stx,dwi,enr,pwd,dw,plm,log,b/am=BgBGAxI/rt=j/d=1/rs=ACT90oEgzA4NjNg7VA4qKaAf-t7UWDH1Ng?wli=m.vOb3cCZHKaI.loadSv.O%3A%3B" as="script" rel="preload" type="application/javascript" nonce="U_IB5RFP8128Yb-UrmQ6Dg"> <link href="/maps/preview/opensearch.xml?hl=en" title="Google Maps" rel="search" type="application/opensearchdescription+xml"> <title> Google Maps </title> <meta content="Find local businesses, view maps and get driving directions in Google Maps." name="Description"> <meta content="Anm+hhtuh7NJguqSnXHEAIqqMaV+GXCks8WYXHJKF7l6AeYMj+wO+fi9OdDqFnJTg9t0492DykVxx4jpvFbxnA8AAABseyJvcmlnaW4iOiJodHRwczovL2dvb2dsZS5jb206NDQzIiwiZmVhdHVyZSI6IlByaXZhY3lTYW5kYm94QWRzQVBJcyIsImV4cGlyeSI6MTY5NTE2Nzk5OSwiaXNTdWJkb21haW4iOnRydWV9" http-equiv="origin-trial"> <meta content="initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name= |