scdekov/a.py

## a.py
import re

import scrapy


product = {
    'url': 'https://www.amazon.co.uk/Asmodee-ASMDOBB01EN-Dobble-Card-Game/dp/B0031QBHMA/'
}


data_selectors_map = {
    'title': lambda response: response.xpath('//*[@id="productTitle"]/text()').strip(),
    # validate if all group results are the same
    'parent_asin': lambda response: re.findall(r'["\']parentAsin["\']\s*:\s*["\']([A-z0-9]{10})',
                                               response.body.decode('utf8'))[0],
    'bullets': lambda response: "\n".join(map(str.strip,
                                              response.xpath('//*[@id="feature-bullets"]//li//text()').getall())),
    'price': lambda response: response.xpath('//*[@id="priceblock_ourprice"]//text()').get()[1:],
    # TODO
    # 'stock': lambda response:
    # 'is_prime': ''
    'brand': lambda response: response.xpath('//*[@id="bylineInfo"]/text()').get(),
    'seller_rank': lambda response:\
    re.findall(r'(\d+)', response.xpath('//*[@id="SalesRank"]//td[@class="value"]/text()').get().strip())[0],
    'category_rank': lambda response:\
    response.xpath('//*[@id="SalesRank"]//td[@class="value"]//*[@class="zg_hrsr_rank"]/text()').get()[1:],
    'other_category_rank': lambda response:\
    (response.xpath('//*[@id="SalesRank"]//td[@class="value"]//*[@class="zg_hrsr_rank"]/text()')\
        .getall()[1:2] or [''])[0],
    'description_length': ''
}


class ProductDataSpidere(scrapy.Spider):
    name = 'product-data'

    def start_requests(self):
        yield scrapy.Request(url=product['url'], callback=self.parse)

    def parse(self, response):
        pass
	import re

	import scrapy


	product = {
	'url': 'https://www.amazon.co.uk/Asmodee-ASMDOBB01EN-Dobble-Card-Game/dp/B0031QBHMA/'
	}


	data_selectors_map = {
	'title': lambda response: response.xpath('//*[@id="productTitle"]/text()').strip(),
	# validate if all group results are the same
	'parent_asin': lambda response: re.findall(r'["\']parentAsin["\']\s:\s["\']([A-z0-9]{10})',
	response.body.decode('utf8'))[0],
	'bullets': lambda response: "\n".join(map(str.strip,
	response.xpath('//*[@id="feature-bullets"]//li//text()').getall())),
	'price': lambda response: response.xpath('//*[@id="priceblock_ourprice"]//text()').get()[1:],
	# TODO
	# 'stock': lambda response:
	# 'is_prime': ''
	'brand': lambda response: response.xpath('//*[@id="bylineInfo"]/text()').get(),
	'seller_rank': lambda response:\
	re.findall(r'(\d+)', response.xpath('//*[@id="SalesRank"]//td[@class="value"]/text()').get().strip())[0],
	'category_rank': lambda response:\
	response.xpath('//[@id="SalesRank"]//td[@class="value"]//[@class="zg_hrsr_rank"]/text()').get()[1:],
	'other_category_rank': lambda response:\
	(response.xpath('//[@id="SalesRank"]//td[@class="value"]//[@class="zg_hrsr_rank"]/text()')\
	.getall()[1:2] or [''])[0],
	'description_length': ''
	}


	class ProductDataSpidere(scrapy.Spider):
	name = 'product-data'

	def start_requests(self):
	yield scrapy.Request(url=product['url'], callback=self.parse)

	def parse(self, response):
	pass