Skip to content

Instantly share code, notes, and snippets.

@cpouldev
Created December 27, 2023 07:34
Show Gist options
  • Save cpouldev/0c5fc954fae197d713d35def77881384 to your computer and use it in GitHub Desktop.
Save cpouldev/0c5fc954fae197d713d35def77881384 to your computer and use it in GitHub Desktop.
Masoutis
# -*- coding: utf-8 -*-
import json
from scrapy import Request
from src.scraper.shops.spiders.base import SupermarketSpider
FORM_URL = 'https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetPromoItemWithListCouponsSubCategories/'
HEADERS = {'Content-Type': 'application/json; charset=utf-8', 'Host': 'eshop.masoutis.gr',
'Origin': 'https://eshop.masoutis.gr/', 'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://eshop.masoutis.gr/'}
def format_price(p):
try:
return float(p)
except:
return None
class MasoutisSpider(SupermarketSpider):
name = 'masoutis'
allowed_domains = ['eshop.masoutis.gr']
app_key = ''
app_uid = ''
app_usl = ''
download_delay = 15
def authed_request(self, **kwargs):
return Request(headers={**HEADERS,
'Key': self.app_key,
'Uid': self.app_uid,
'Usl': self.app_usl},
**kwargs)
def make_cat_request(self, cat_id, page):
REQ_DATA = {
"PassKey": "Sc@NnSh0p",
"Itemcode": cat_id,
"ItemDescr": "0",
"IfWeight": page
}
return self.authed_request(
url='https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetPromoItemWithListCouponsSubCategories/',
method='POST',
callback=self.parse_catalog,
body=json.dumps(REQ_DATA),
meta={'page': page, 'cat_id': cat_id})
def start_requests(self):
yield Request(url='https://eshop.masoutis.gr/Home/GetGred',
headers=HEADERS,
method='POST',
body=json.dumps({'PassTemp': 'ScanNShop$c@nNSh0p'}),
callback=self.request_categories)
def request_categories(self, response):
data = json.loads(response.body)
self.app_key = data['Key']
self.app_uid = data['Uid']
self.app_usl = data['Usl']
yield self.authed_request(
url='https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetScanNShopMenuAllLevels/',
method='POST',
body=json.dumps({'PassKey': 'Sc@NnSh0p'}),
callback=self.parse_categories)
def parse_categories(self, response):
data = json.loads(response.body)
cats = set([c['HeaderMenuItem'] for c in data if c['HeaderMenuItemLinkDescr'] != 'oi-sunergates-mas'])
for cat_id in cats:
yield self.make_cat_request(cat_id=cat_id,
page=1)
def parse_catalog(self, response):
data = json.loads(response.body)
cat_id = response.meta['cat_id']
if len(data) <= 0:
return None
for item in data:
img_url = item['PhotoData']
offer = item['Discount']
url = item['ItemDescrLink']
if offer:
offer = offer.strip()
image_item, image_hash = self.get_image_item(img_url)
yield image_item
self.insert_item(
item=item.get('ItemDescr', None),
key=url,
offer=offer,
url=url,
sale_price=format_price(item.get('PosPrice', None)),
price=format_price(item.get('StartPrice', None)),
image=image_hash
)
page = response.meta['page']
next_page = page + 1
yield self.make_cat_request(cat_id=cat_id, page=next_page)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment