Skip to content

Instantly share code, notes, and snippets.

@amarynets
Created July 26, 2017 21:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amarynets/0f8379939d5f3c9fe485c8a6ac19b219 to your computer and use it in GitHub Desktop.
Save amarynets/0f8379939d5f3c9fe485c8a6ac19b219 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
class LfrSpider(scrapy.Spider):
name = "lfr"
allowed_domains = ["www.leboncoin.fr"]
start_urls = ['https://www.leboncoin.fr/ventes_immobilieres/']
def parse(self, response):
urls = response.xpath(".//section[@class='tabsContent block-white dontSwitch']/ul/li/a/@href").extract()
for i in urls:
yield scrapy.Request('https:' + i, callback=self.parse_detail)
def parse_detail(self, response):
item = dict()
item['url'] = response.url
table = response.xpath(".//section[@class='properties lineNegative']//div//h2")
for i in table:
prop = ''.join(i.xpath(".//span[1]/text()").extract()).replace('\n', '').strip()
val = ''.join(i.xpath(".//span[2]//text()").extract()).replace('\n', '').strip()
item[prop] = val
print(item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment