Skip to content

Instantly share code, notes, and snippets.

@rhanka
Created January 26, 2019 18:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rhanka/b119417b51e2cf1a1a36d1d5ecb5fa14 to your computer and use it in GitHub Desktop.
Save rhanka/b119417b51e2cf1a1a36d1d5ecb5fa14 to your computer and use it in GitHub Desktop.
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import hashlib
import re
import json
import requests
from pandas.io.json import json_normalize
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
limit = 100
maxtries = 3
text_query = 'my_query'
category = '2'
api_key = 'my_api_key' # you can get a free key with a inspect on firefox going on leboncoin.fr
url = 'https://api.leboncoin.fr/finder/search'
data = {"limit":limit,"limit_alu":3,"filters":{"category":{"id":category},"enums":{"ad_type":["offer"]},"location":{},"keywords":{"text": text_query},"ranges":{}}}
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'api_key': api_key,
'origin': 'https://www.leboncoin.fr',
'Referer': 'https://www.leboncoin.fr/voitures/offres/'
}
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
tries=1
failed=True
while ((failed == True) & (tries <= maxtries)):
try:
response = requests.post(url, headers=headers, json=data)
status_code = response.status_code
except requests.exceptions.ReadTimeout:
status_code = "timeout"
if status_code == 200:
failed=False
else:
#logging.warning("{}".format(tries))
tries += 1
if (tries <= maxtries):
time.sleep(3 ** (tries-1))
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df=pd.DataFrame(json.loads(response.content)['ads'])
print df.head
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
cols = ['attributes', 'images', 'location', 'options', 'owner']
for col in cols:
df[col]=df[col].apply(lambda x: str(json.dumps(x)))
print df.head
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
leboncoin_scrap = dataiku.Dataset("leboncoin_scrap")
leboncoin_scrap.write_with_schema(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment