Created
August 2, 2018 16:10
-
-
Save lobstrio/4b00512ecc37c2df4ece0b5b2c9773a4 to your computer and use it in GitHub Desktop.
Compute dynamically avg. price of an item on Leboncoin.fr based on the 100 first items, using Python 3 and Request
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# coding: utf-8 | |
import requests | |
from bs4 import BeautifulSoup | |
from scrapy import Selector | |
import datetime | |
import argparse | |
def compute_avg(keyword): | |
""" | |
Export all price from a keyword on leboncoin.fr, | |
then compute average price for 100 items (or less if no available). | |
Arguments: | |
keyword (str): | |
keyword you would type on leboncoin search browser to get access to your items | |
Return: | |
average price (€) | |
""" | |
# INITIALISATION | |
print('-- INITIALISATION --') | |
print('KEYWORD: {}'.format(keyword)) | |
print('SITE: LEBONCOIN.FR') | |
r = requests.session() | |
start = datetime.datetime.now() | |
price_table = [] | |
base_url = 'https://www.leboncoin.fr/recherche/?text=' | |
page_nb = 1 | |
url_to_start = base_url + keyword.replace(' ', '%20') + "&page={}".format(page_nb) | |
response = r.get(url=url_to_start) | |
soup = BeautifulSoup(response.text, features='html.parser') | |
sel = Selector(text=soup.prettify()) | |
# CRAWLING ET PARSEING | |
while len(price_table) < 100: | |
if response.status_code == 200: | |
print('PAGE: {}'.format(page_nb)) | |
prices = \ | |
[float(x.strip().replace(' ', '')) for x in sel.xpath("//span[@itemprop='price']/text()").extract()] | |
price_table.extend(prices) | |
print(price_table) | |
page_nb += 1 | |
url_to_follow = base_url + keyword.replace(' ', '%20') + "&page={}".format(page_nb) | |
print(url_to_follow) | |
response = r.get(url=url_to_follow) | |
else: | |
print('Page #{} : inaccessible'.format(page_nb)) | |
break | |
# TEMPS PASSE | |
end = datetime.datetime.now() | |
time_elapsed = str(end - start) | |
print('\n') | |
print('-- TIME ELAPSED --') | |
print(time_elapsed) | |
avg_price = sum(price_table)/float(len(price_table)) | |
return avg_price | |
if __name__ == "__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('keyword', help='Search Keyword on Leboncoin.fr') | |
args = argparser.parse_args() | |
keyword = args.keyword | |
avg_price = compute_avg(keyword) | |
if avg_price: | |
print("Le prix moyen pour le mot-clé '{}' sur leboncoin.fr est de: ".format(keyword) | |
+ '\n' + "{:.2f}".format(avg_price) + ' €') | |
if not avg_price: | |
print('Prix indisponible, veuillez réessayer') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment