Last active
July 4, 2018 10:35
-
-
Save parvathysarat/12988d9458f7eb1be3a3b6725163a646 to your computer and use it in GitHub Desktop.
Using Google Maps API to scrape details of safe spots in cities. Google allows us to scrape results off the first 3 pages of its search results. Number of call requests are also restricted per key.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
key= " #key " | |
#iteration to get ids | |
i=0 | |
#iterate twice, get all the data of 60 search results from 3 pages. Google restricts number of results | |
#that can be scraped to first three pages of search results | |
while(i<=2) : | |
if (i==0): | |
url="https://maps.googleapis.com/maps/api/place/textsearch/json?query=mumbai+police+station+india&key="+key | |
r=requests.get(url) | |
out=r.json() | |
#get all 'place_ids' which contain required details | |
if(i==0): id=[d['place_id'] for d in out['results']] | |
if(i==1): id1=[d['place_id'] for d in out['results']] | |
if (i==2): id2=[d['place_id'] for d in out['results']] | |
nextpagetoken=[] | |
if (i!=2) : | |
# since only first 3 pages can be scraped | |
nextpagetoken=out['next_page_token'] | |
url="https://maps.googleapis.com/maps/api/place/textsearch/json?query=mumbai+mall+india&key="+key+"&pagetoken="+nextpagetoken | |
i=i+1 | |
# creating a list of all 60 place ID's | |
id=id+id1+id2 | |
#declaring empty lists to save extracted features of places | |
contact=[] | |
website=[] | |
rating=[] | |
address=[] | |
name=[] | |
lat=[] | |
lng=[] | |
for d in id: | |
url="https://maps.googleapis.com/maps/api/place/details/json?placeid="+d+"&key="+key | |
r=requests.get(url) | |
output=r.json() | |
#appending extracted features from results to the lists | |
name.append(output['result']['name']) | |
address.append(output['result']['formatted_address']) | |
lat.append(output['result']['geometry']['location']['lat']) | |
lng.append(output['result']['geometry']['location']['lng']) | |
#get contact number if available | |
if 'international_phone_number' in output['result'].keys(): | |
contact.append(output['result']['international_phone_number']) | |
else: | |
contact.append("Not Available") | |
#get website if available | |
if 'website' in output['result'].keys(): | |
website.append(output['result']['website']) #get website | |
else: | |
website.append("Not Available") | |
#get Google Rating if available | |
if 'rating' in output['result'].keys(): | |
rating.append(output['result']['rating']) | |
else: | |
rating.append("Not Available") | |
# the data is now built into a DataFrame 'df' | |
df=pd.DataFrame() | |
df["NAME"]=name | |
df["ADDRESS"]=address | |
df["LATITUDE"]=lat | |
df["LONGITUDE"]=lng | |
df["CONTACT No."]=contact | |
df["WEBSITE"]=website | |
df["GOOGLE RATING"]=rating | |
# writing the DataFrame into a CSV file | |
df.to_csv("SafeSpots.csv",index=None) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment