Skip to content

Instantly share code, notes, and snippets.

@SiD3W4y
Last active March 7, 2017 08:25
Show Gist options
  • Save SiD3W4y/15faee45d9d5771c16eb098e49574b24 to your computer and use it in GitHub Desktop.
Save SiD3W4y/15faee45d9d5771c16eb098e49574b24 to your computer and use it in GitHub Desktop.
Simple cli for pages jaunes
import requests
import urllib
import bs4
import json
import base64
def parse_card(elem):
result = {
"name":"",
"phone":"",
"street":"",
"town":"",
"postal-code":""
}
person = elem.findAll("span",{"class","bi-pos-links pj-lb pj-link"})[0]
data = person.attrs["data-pjlb"]
info = json.loads(data)
url = base64.b64decode(info["url"])
if ":80" in url:
#We do not care about companies
return result
r = requests.get(url)
if r.status_code != 200:
print "Error while requesting data"
return result
document = bs4.BeautifulSoup(r.text,"lxml")
temp = {}
temp["name"] = document.find(itemprop="name")
temp["phone"] = document.find("span",{"class","coord-numero"})
temp["street"] = document.find(itemprop="streetAddress")
temp["town"] = document.find(itemprop="addressLocality")
temp["postal-code"] = document.find(itemprop="postalCode")
for key in temp:
#We need to make sure we are not trying to call null objects (no data on some fields)
if temp[key] != None:
result[key] = temp[key].text.strip().encode("UTF-8")
return result
def search(name,town):
endpoint = "http://www.pagesjaunes.fr/pagesblanches/recherche?"
form_data = {
"quoiqui": name,
"ou": town
}
results = []
r = requests.post(endpoint+urllib.urlencode(form_data),data=form_data)
if r.status_code != 200:
print "Error"
return
document = bs4.BeautifulSoup(r.text,"lxml")
for elem in document.find_all("header"):
if elem.attrs["class"][0] == "v-card":
card = parse_card(elem)
if card["name"] != "":
results.append(card)
print "\n----- Found {} result(s) -----".format(len(results))
for record in results:
for key in record:
print "[{}] --> {}".format(key,record[key])
print "----------\n"
name = raw_input("Enter name : ")
town = raw_input("Enter town : ")
search(name,town)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment