Skip to content

Instantly share code, notes, and snippets.

@cpouldev
Created December 27, 2023 07:36
Show Gist options
  • Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.
Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.
xalkiadakis
# -*- coding: utf-8 -*-
from scrapy import Request
from src.scraper.shops.spiders.base import SupermarketSpider
def format_price(p):
try:
return float(p.strip())
except:
return None
class XalkiadakisSpider(SupermarketSpider):
name = 'xalkiadakis'
allowed_domains = ['xalkiadakis.gr']
start_urls = ['https://eshop.xalkiadakis.gr/']
def parse(self, response):
cats = response.css('#mega-menu-primary li.mega-proiontamenu > ul > li > a.mega-menu-link').xpath(
'@href').getall()
for cat in cats:
yield Request(url=cat, callback=self.parse_catalog)
def parse_catalog(self, response):
items = response.css('li.product')
next_page = response.css('.page-numbers a.next').xpath('@href').get()
for item in items:
title = item.css('.woocommerce-loop-product__title::text').get()
image_url = item.css('img').xpath('@src').get()
sale_price = format_price(item.css('.sale_price bdi::text').get())
price = format_price(item.css('.price del bdi::text').get())
url = item.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link').xpath('@href').get()
image_item, image_hash = self.get_image_item(image_url)
if price and not sale_price:
sale_price = price
if not price and not sale_price:
continue
yield image_item
self.insert_item(
item=title,
key=url,
sale_price=sale_price,
price=price,
url=url,
image=image_hash
)
if next_page:
yield Request(url=next_page, callback=self.parse_catalog)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment