Created
July 15, 2019 16:09
-
-
Save sanand0/43c0127a97201533fb642fa37d80f9be to your computer and use it in GitHub Desktop.
Manhattan Coffee Shop location scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas | |
import os | |
import pandas as pd | |
import requests | |
def mcdonalds(): | |
url = 'https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do' | |
data = requests.get(url, params={ | |
'method': 'searchLocation', | |
'latitude': '40.78', | |
'longitude': '-73.95', | |
'radius': '50', | |
'maxResults': '1000', | |
'country': 'us', | |
'language': 'en-us', | |
}).json() | |
result = [] | |
for row in data['features']: | |
geom, prop = row['geometry'], row['properties'] | |
result.append({ | |
'store': 'McDonalds', | |
'lat': geom['coordinates'][1], | |
'lng': geom['coordinates'][0], | |
'address': prop['addressLine1'], | |
}) | |
return pd.DataFrame(result) | |
def starbucks(): | |
'''Return a DataFrame with Manhattan Starbucks (store, address, lat, lng)''' | |
# I went to https://www.starbucks.com/store-locator and | |
# moved the map around to get center points that seem to cover Manhattan. | |
map_points = ( | |
[40.709353, -74.002588], | |
[40.72899, -73.982323], | |
[40.741837, -74.008072], | |
[40.741317, -73.972179], | |
[40.761994, -73.959133], | |
[40.763489, -73.989517], | |
[40.763814, -74.01415], | |
[40.781312, -73.98617], | |
[40.773145, -73.960995], | |
[40.79807, -73.977686], | |
[40.786317, -73.928566], | |
[40.818219, -73.943928], | |
) | |
result = [] | |
for lat, lng in map_points: | |
url = f'https://www.starbucks.com/bff/locations?lat={lat}&lng={lng}' | |
data = requests.get(url, headers={'x-requested-with': 'XMLHttpRequest'}).json() | |
for row in data['stores']: | |
result.append({ | |
'store': 'Starbucks', | |
'lat': row['coordinates']['latitude'], | |
'lng': row['coordinates']['longitude'], | |
'address': ', '.join(row['addressLines']) | |
}) | |
return pd.DataFrame(result).drop_duplicates() | |
def scrape_stores(target, overwrite=False): | |
if not overwrite and os.path.exists(target): | |
print(target, 'already exists. Not scraping') | |
return pd.read_excel(target) | |
data = pd.concat([ | |
mcdonalds(), | |
starbucks(), | |
]) | |
data.to_excel(target, index=False) | |
return data | |
def manhattan_map(): | |
# Load population and census shape file data | |
cols = ['BoroName', 'FIPSCode', 'BoroCode', 'CT2010', 'Pop2000', 'Pop2010', | |
'Change', 'ChangePC', 'Acres', 'Density'] | |
pop = pd.read_excel('t_pl_p5_ct.xlsx', skiprows=range(0, 7), header=None, | |
names=cols, dtype=object) | |
shape = geopandas.read_file('nyct2010_19b/nyct2010.shp') | |
# Filter for Manhattan | |
pop = pop[pop.BoroName == 'Manhattan'] | |
shape = shape[shape.BoroName == 'Manhattan'] | |
# Merge shapes | |
shape = shape.merge(pop[['CT2010', 'Pop2010']], on='CT2010') | |
if not os.path.exists('map'): | |
os.makedirs('map') | |
# Convert to WGS84 (lat-long) coordinate system and save | |
shape = shape.to_crs({'init': 'epsg:4326'}) | |
shape['Pop2010'] = shape['Pop2010'].astype(int) | |
shape.to_file('map/population.shp') | |
# Save as a single blog for use by reshaper | |
shape.dissolve(by='BoroName')[['BoroCode', 'geometry']].to_file('map/single.shp') | |
return shape | |
def stores_map(stores, shape): | |
geom = geopandas.points_from_xy(stores.lng, stores.lat) | |
points = geopandas.GeoDataFrame(stores, geometry=geom) | |
points = points[points.intersects(shape.unary_union)] | |
points[['lat', 'lng', 'store', 'address']].to_csv( | |
'manhattan-stores.csv', index=False, encoding='utf-8') | |
points.to_file('map/points.shp') | |
if __name__ == '__main__': | |
stores = scrape_stores('stores.xlsx', overwrite=False) | |
shape = manhattan_map() | |
stores_map(stores, shape) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment