Created
January 27, 2019 04:32
-
-
Save jsundram/055d0118721953eca31888786fcb13ad to your computer and use it in GitHub Desktop.
ingest some addresses from a csv, geocode them and make a map.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import io | |
import json | |
import os | |
import re | |
import time | |
from itertools import groupby | |
import folium | |
import geocoder | |
from attrdict import AttrDict as attrdict | |
""" | |
Read data | |
Geocode addresses | |
Plot on map, color-coded by agency | |
tooltips for # of apartments. | |
change glyph for presence of 1-bed | |
add icon for accessibility | |
""" | |
# Rate Limits: | |
# 10000 requests/ day | |
# 60 requests/ minute | |
# 2 requests / second | |
api_key='' # copy your locationiq api key here (uses nominatum under the hood). | |
def normalize(s): | |
# doing this helps minimize geocoder calls by avoiding calling variants | |
# of the same address. | |
s = re.sub('Street|St\.', 'St', s, flags=re.IGNORECASE) | |
s = re.sub('Avenue|Ave\.', 'Ave', s, flags=re.IGNORECASE) | |
s = re.sub('Road|Rd\.', 'Rd', s, flags=re.IGNORECASE) | |
s = re.sub('East|E\.', 'E', s, flags=re.IGNORECASE) | |
s = re.sub('apt|Apt\.', 'APT', s, flags=re.IGNORECASE) | |
s = re.sub(' st', ' St', s) | |
s = re.sub(' rd', ' Rd', s) | |
s = re.sub(' ave', ' Ave', s) | |
s = re.sub('e\b', 'E', s) | |
s = re.sub(' +', ' ', s) | |
return s.replace('#', '') | |
def read_pibly(filename='pibly.csv'): | |
# note: I fixed the data where it was missing apartment numbers or was otherwise broken. | |
cachefile = 'pibly_points.json' | |
if os.path.exists(cachefile): | |
print("hitting cache: %s" % cachefile) | |
with open(cachefile) as f: | |
return map(attrdict, json.load(f)) | |
addresses = [] | |
# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name. | |
with io.open(filename, 'r', encoding='utf-8-sig') as f: | |
reader = csv.DictReader(f, dialect='excel') | |
for r in map(attrdict, reader): | |
s = normalize(r.Address) | |
i = s.index('Bronx') | |
start, end = s[:i-1], s[i:] | |
start, _, apt = start.partition(' APT') | |
address = start + ', ' + end | |
apartment = 'APT ' + apt | |
addresses.append((address, apartment, dict( | |
bedrooms=r.Bedrooms, | |
apartment=apartment, | |
))) | |
points = geocode_list(addresses) | |
with open(cachefile, 'w') as f: | |
json.dump(points, f, indent=4) | |
return points | |
def read_foo(filename='foo.csv'): | |
# I went through and heavily edited the original foo.csv export; sorting it | |
# and normalizing the data (Apt 1 Bed 2), then deleting the Bed 1 rows for | |
# apartments with multiple beds so that there was only 1 entry per apartment that | |
# indicated the total number of bedrooms in the apartment. | |
cachefile = 'foo_points.json' | |
if os.path.exists(cachefile): | |
print("hitting cache: %s" % cachefile) | |
with open(cachefile) as f: | |
return map(attrdict, json.load(f)) | |
addresses = [] | |
# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name. | |
with io.open(filename, 'r', encoding='utf-8-sig') as f: | |
reader = csv.DictReader(f, dialect='excel') | |
location, city, state, zipcode, consumer = reader.fieldnames | |
prev_address = None | |
for i, r in enumerate(reader): | |
# print("row %d" % i) | |
loc = normalize(r[location]) | |
address, _, apartment = loc.partition(' APT ') | |
apartment, _, beds = apartment.partition(' Bed ') | |
address_str = ', '.join([address, r[city], r[state], r[zipcode]]) | |
addresses.append((address_str, apartment, dict( | |
consumer=r[consumer].strip(), | |
bedrooms=beds, | |
apartment='APT ' + apartment, | |
))) | |
points = geocode_list(addresses) | |
with open(cachefile, 'w') as f: | |
json.dump(points, f, indent=4) | |
return points | |
def get_centroid(points): | |
lats, lngs = zip(*[p.latlng for p in points]) | |
return (sum(lats) / float(len(lats))), (sum(lngs) / float(len(lngs))) | |
def get_extents(points): | |
lats, lngs = zip(*[p.latlng for p in points]) | |
return (min(lats), min(lngs)), (max(lats), max(lngs)) | |
def make_map(point_dict, colors): | |
valid_colors = set(['red', 'blue', 'green', 'purple', 'orange', | |
'darkred','lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', | |
'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen','gray', | |
'black', 'lightgray']) | |
for name, color in colors.items(): | |
if color not in valid_colors: | |
raise Exception("Invalid color specified: %s" % color) | |
for name in point_dict: | |
if name not in colors: | |
raise Exception("No color specified for dataset: %s" % name) | |
all_points = reduce(lambda a, b: a + b, point_dict.values(), []) | |
clat, clng = get_centroid(all_points) | |
m = folium.Map( | |
location=[clat, clng], | |
zoom_start=12, | |
max_zoom=20, | |
tiles='Stamen Toner' | |
) | |
m.fit_bounds(get_extents(all_points)) | |
# icons: | |
# fa: https://fontawesome.com/icons/accessible-icon?style=brands | |
# glyphicon: https://getbootstrap.com/docs/3.3/components/#glyphicons-glyphs | |
for name, points in point_dict.items(): | |
clr = colors[name] | |
for p in points: | |
folium.Marker( | |
location=p.latlng, | |
popup=p.address, | |
tooltip='%s Apartment(s), %s Total Bedroom(s)' % (p.apartments, p.bedrooms), | |
icon=folium.Icon( | |
color=clr, | |
icon='star' if p.contains_onebed else 'home', | |
prefix='fa', # fa for font-awesome, glyphicon for bootstrap 3 | |
) | |
).add_to(m) | |
# TODO: could add a legend like: https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373 | |
return m | |
def geocode(address): | |
try: | |
result = geocoder.locationiq(address, key=api_key) | |
d = attrdict(result.geojson) | |
lng, lat = d.features[0].geometry.coordinates # x, y | |
return lat, lng | |
except Exception as e: | |
e.result = result | |
print(result) # raise(e) | |
return None | |
def geocode_list(addresses): | |
# geocode the addresses. | |
points = [] | |
by_address = lambda (a, d, e): a | |
addresses = sorted(addresses, key=by_address) | |
for address, values in groupby(addresses, by_address): | |
print(address) | |
latlng = geocode(address) | |
time.sleep(1) # rate limit | |
if not latlng: | |
continue | |
print("\t%2.5f, %2.5f" % (latlng)) | |
# compute total number of beds and apartments | |
bedrooms = [int(extra['bedrooms']) for address, apt, extra in values] | |
apartments = len(bedrooms) | |
# mark if there are any 1-bedrooms | |
contains_1br = any(i == 1 for i in bedrooms) | |
points.append(attrdict( | |
address=address, | |
apartments=apartments, | |
bedrooms=sum(bedrooms), | |
contains_onebed=contains_1br, | |
latlng=latlng, | |
)) | |
return points | |
def main(): | |
points = { | |
'foo': read_foo(), | |
'pibly': read_pibly(), | |
} | |
colors = { | |
'foo': 'darkblue', | |
'pibly': 'darkgreen', | |
} | |
m = make_map(points, colors) | |
m.save('map.html') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment