Last active
March 7, 2017 08:25
-
-
Save SiD3W4y/15faee45d9d5771c16eb098e49574b24 to your computer and use it in GitHub Desktop.
Simple cli for pages jaunes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import urllib | |
import bs4 | |
import json | |
import base64 | |
def parse_card(elem): | |
result = { | |
"name":"", | |
"phone":"", | |
"street":"", | |
"town":"", | |
"postal-code":"" | |
} | |
person = elem.findAll("span",{"class","bi-pos-links pj-lb pj-link"})[0] | |
data = person.attrs["data-pjlb"] | |
info = json.loads(data) | |
url = base64.b64decode(info["url"]) | |
if ":80" in url: | |
#We do not care about companies | |
return result | |
r = requests.get(url) | |
if r.status_code != 200: | |
print "Error while requesting data" | |
return result | |
document = bs4.BeautifulSoup(r.text,"lxml") | |
temp = {} | |
temp["name"] = document.find(itemprop="name") | |
temp["phone"] = document.find("span",{"class","coord-numero"}) | |
temp["street"] = document.find(itemprop="streetAddress") | |
temp["town"] = document.find(itemprop="addressLocality") | |
temp["postal-code"] = document.find(itemprop="postalCode") | |
for key in temp: | |
#We need to make sure we are not trying to call null objects (no data on some fields) | |
if temp[key] != None: | |
result[key] = temp[key].text.strip().encode("UTF-8") | |
return result | |
def search(name,town): | |
endpoint = "http://www.pagesjaunes.fr/pagesblanches/recherche?" | |
form_data = { | |
"quoiqui": name, | |
"ou": town | |
} | |
results = [] | |
r = requests.post(endpoint+urllib.urlencode(form_data),data=form_data) | |
if r.status_code != 200: | |
print "Error" | |
return | |
document = bs4.BeautifulSoup(r.text,"lxml") | |
for elem in document.find_all("header"): | |
if elem.attrs["class"][0] == "v-card": | |
card = parse_card(elem) | |
if card["name"] != "": | |
results.append(card) | |
print "\n----- Found {} result(s) -----".format(len(results)) | |
for record in results: | |
for key in record: | |
print "[{}] --> {}".format(key,record[key]) | |
print "----------\n" | |
name = raw_input("Enter name : ") | |
town = raw_input("Enter town : ") | |
search(name,town) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment