Skip to content

Instantly share code, notes, and snippets.

@sanand0
Created July 15, 2019 16:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sanand0/43c0127a97201533fb642fa37d80f9be to your computer and use it in GitHub Desktop.
Save sanand0/43c0127a97201533fb642fa37d80f9be to your computer and use it in GitHub Desktop.
Manhattan Coffee Shop location scraper
import geopandas
import os
import pandas as pd
import requests
def mcdonalds():
url = 'https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do'
data = requests.get(url, params={
'method': 'searchLocation',
'latitude': '40.78',
'longitude': '-73.95',
'radius': '50',
'maxResults': '1000',
'country': 'us',
'language': 'en-us',
}).json()
result = []
for row in data['features']:
geom, prop = row['geometry'], row['properties']
result.append({
'store': 'McDonalds',
'lat': geom['coordinates'][1],
'lng': geom['coordinates'][0],
'address': prop['addressLine1'],
})
return pd.DataFrame(result)
def starbucks():
'''Return a DataFrame with Manhattan Starbucks (store, address, lat, lng)'''
# I went to https://www.starbucks.com/store-locator and
# moved the map around to get center points that seem to cover Manhattan.
map_points = (
[40.709353, -74.002588],
[40.72899, -73.982323],
[40.741837, -74.008072],
[40.741317, -73.972179],
[40.761994, -73.959133],
[40.763489, -73.989517],
[40.763814, -74.01415],
[40.781312, -73.98617],
[40.773145, -73.960995],
[40.79807, -73.977686],
[40.786317, -73.928566],
[40.818219, -73.943928],
)
result = []
for lat, lng in map_points:
url = f'https://www.starbucks.com/bff/locations?lat={lat}&lng={lng}'
data = requests.get(url, headers={'x-requested-with': 'XMLHttpRequest'}).json()
for row in data['stores']:
result.append({
'store': 'Starbucks',
'lat': row['coordinates']['latitude'],
'lng': row['coordinates']['longitude'],
'address': ', '.join(row['addressLines'])
})
return pd.DataFrame(result).drop_duplicates()
def scrape_stores(target, overwrite=False):
if not overwrite and os.path.exists(target):
print(target, 'already exists. Not scraping')
return pd.read_excel(target)
data = pd.concat([
mcdonalds(),
starbucks(),
])
data.to_excel(target, index=False)
return data
def manhattan_map():
# Load population and census shape file data
cols = ['BoroName', 'FIPSCode', 'BoroCode', 'CT2010', 'Pop2000', 'Pop2010',
'Change', 'ChangePC', 'Acres', 'Density']
pop = pd.read_excel('t_pl_p5_ct.xlsx', skiprows=range(0, 7), header=None,
names=cols, dtype=object)
shape = geopandas.read_file('nyct2010_19b/nyct2010.shp')
# Filter for Manhattan
pop = pop[pop.BoroName == 'Manhattan']
shape = shape[shape.BoroName == 'Manhattan']
# Merge shapes
shape = shape.merge(pop[['CT2010', 'Pop2010']], on='CT2010')
if not os.path.exists('map'):
os.makedirs('map')
# Convert to WGS84 (lat-long) coordinate system and save
shape = shape.to_crs({'init': 'epsg:4326'})
shape['Pop2010'] = shape['Pop2010'].astype(int)
shape.to_file('map/population.shp')
# Save as a single blog for use by reshaper
shape.dissolve(by='BoroName')[['BoroCode', 'geometry']].to_file('map/single.shp')
return shape
def stores_map(stores, shape):
geom = geopandas.points_from_xy(stores.lng, stores.lat)
points = geopandas.GeoDataFrame(stores, geometry=geom)
points = points[points.intersects(shape.unary_union)]
points[['lat', 'lng', 'store', 'address']].to_csv(
'manhattan-stores.csv', index=False, encoding='utf-8')
points.to_file('map/points.shp')
if __name__ == '__main__':
stores = scrape_stores('stores.xlsx', overwrite=False)
shape = manhattan_map()
stores_map(stores, shape)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment