Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Collect BMW vehicle data on lacentrale.fr
# -*- coding: utf-8 -*-
# Copyright(C) 2021 Sasha Bouloudnine
import requests
from lxml import html
import csv
class CrawlerLaCentrale():
def __init__(self):
self.s = requests.Session()
def iter_cars(self, page, brand='BMW'):
headers = {
'Connection': 'keep-alive',
'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'Accept': 'application/json',
'Authorization': 'Bearer eyJhbGciOiJSUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2MTg0OTQ0NDEsInZlcnNpb24iOiIyMDE4LTA3LTE2IiwidXNlckNvcnJlbGF0aW9uSWQiOm51bGwsInVzZXJfY29ycmVsYXRpb25faWQiOm51bGwsImxvZ2dlZFVzZXIiOnsiY29ycmVsYXRpb25JZCI6bnVsbCwicmVmcmVzaFRva2VuVFRMIjoxNjE4NTgwNTQxfSwibW9kZU1hc3F1ZXJhZGUiOmZhbHNlLCJhdXRob3JpemF0aW9ucyI6eyJ2ZXJzaW9uIjoiMjAxOC0wNy0xNiIsInN0YXRlbWVudHMiOlt7InNpZCI6IioiLCJlZmZlY3QiOiJEZW55IiwiYWN0aW9ucyI6WyIqIl0sInJlc291cmNlcyI6WyIqIl19XX0sInNzciI6Ii0xIiwiaWF0IjoxNjE4NDk0MTQxfQ.Ws_SU4NJ_1wd096AoGNPWtffpzX0V3kLQ3VKTmwlbsi_SPMsPOixTSpriL3hbATa_s2WUUbNQzXXs22wSyQgNDaKlhKIXBjs3wLsB5cq7eFVEdUyhk720JEwLC8xOOfORpi4r7R_F1wIXN1Re7m3pONp7Cn82ie4k4V9r2Wshd2pt_sphBpkQQvLXcYTO7uWtpfeTmIBQkkQMDHywHzuWegCHcrJ95uFTCBPn40EV9Mfhh--W1Gc6npcL0QiLlhyy5rEQl6WegcHdetZEXKG2-Du3sRq-u4rdwGrotUsRfGjgDfbc0Xy1Xtq1v6GnXvTpjl4QVu50dTtIJWkKhnZlQ',
'X-Client-Source': 'classified:lcpab:recherche-react',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36',
'x-api-key': '2vHD2GjDJ07RpNvbGYpJG7s6bQNwRNkI9SEkgQnR',
'Origin': 'https://www.lacentrale.fr',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://www.lacentrale.fr/listing?makesModelsCommercialNames=BMW&options=&page=2&regions=FR-IDF',
'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
}
url = f'https://recherche.lacentrale.fr/v3/search?boostVo=true&clientGeopoints=45.0393%2C3.8371&families=AUTO%2CUTILITY&makesModelsCommercialNames={brand}&pageSize={page}&regions=FR-IDF'
print(url)
response = self.s.get(url, headers=headers)
return response.json()
def get_cars(self, doc):
cars = doc['hits']
for car in cars:
item = car['item']
vehicle = item['vehicle']
assert vehicle
year = vehicle.get('year', None)
model = vehicle.get('model', None)
brand = vehicle.get('make', None)
version = vehicle.get('version', None)
mileage = vehicle.get('mileage', None)
price = item.get('price', None)
image_url = item.get('photoUrl', None)
vehicle_dict = {
'year': year,
'model': model,
'brand': brand,
'version': version,
'mileage': mileage,
'price': price,
'image_url': image_url
}
print(vehicle_dict)
yield vehicle_dict
def main(self, write):
l = []
for i in range(1, 10):
doc = self.iter_cars(i, 'BMW')
cars = self.get_cars(doc)
for car in cars:
l.append(car)
if write:
keys = ['year', 'model', 'brand', 'version', 'mileage', 'price', 'image_url']
with open('lacentrale_22010415.csv', mode='w') as f:
writer = csv.DictWriter(f, delimiter='\t', fieldnames=keys)
writer.writeheader()
for car in l:
writer.writerow(car)
if __name__ == '__main__':
lacentrale_crawler = CrawlerLaCentrale()
lacentrale_crawler.main(write=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment