Skip to content

Instantly share code, notes, and snippets.

@jsundram
Created January 27, 2019 04:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsundram/055d0118721953eca31888786fcb13ad to your computer and use it in GitHub Desktop.
Save jsundram/055d0118721953eca31888786fcb13ad to your computer and use it in GitHub Desktop.
ingest some addresses from a csv, geocode them and make a map.
import csv
import io
import json
import os
import re
import time
from itertools import groupby
import folium
import geocoder
from attrdict import AttrDict as attrdict
"""
Read data
Geocode addresses
Plot on map, color-coded by agency
tooltips for # of apartments.
change glyph for presence of 1-bed
add icon for accessibility
"""
# Rate Limits:
# 10000 requests/ day
# 60 requests/ minute
# 2 requests / second
api_key='' # copy your locationiq api key here (uses nominatum under the hood).
def normalize(s):
# doing this helps minimize geocoder calls by avoiding calling variants
# of the same address.
s = re.sub('Street|St\.', 'St', s, flags=re.IGNORECASE)
s = re.sub('Avenue|Ave\.', 'Ave', s, flags=re.IGNORECASE)
s = re.sub('Road|Rd\.', 'Rd', s, flags=re.IGNORECASE)
s = re.sub('East|E\.', 'E', s, flags=re.IGNORECASE)
s = re.sub('apt|Apt\.', 'APT', s, flags=re.IGNORECASE)
s = re.sub(' st', ' St', s)
s = re.sub(' rd', ' Rd', s)
s = re.sub(' ave', ' Ave', s)
s = re.sub('e\b', 'E', s)
s = re.sub(' +', ' ', s)
return s.replace('#', '')
def read_pibly(filename='pibly.csv'):
# note: I fixed the data where it was missing apartment numbers or was otherwise broken.
cachefile = 'pibly_points.json'
if os.path.exists(cachefile):
print("hitting cache: %s" % cachefile)
with open(cachefile) as f:
return map(attrdict, json.load(f))
addresses = []
# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
with io.open(filename, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f, dialect='excel')
for r in map(attrdict, reader):
s = normalize(r.Address)
i = s.index('Bronx')
start, end = s[:i-1], s[i:]
start, _, apt = start.partition(' APT')
address = start + ', ' + end
apartment = 'APT ' + apt
addresses.append((address, apartment, dict(
bedrooms=r.Bedrooms,
apartment=apartment,
)))
points = geocode_list(addresses)
with open(cachefile, 'w') as f:
json.dump(points, f, indent=4)
return points
def read_foo(filename='foo.csv'):
# I went through and heavily edited the original foo.csv export; sorting it
# and normalizing the data (Apt 1 Bed 2), then deleting the Bed 1 rows for
# apartments with multiple beds so that there was only 1 entry per apartment that
# indicated the total number of bedrooms in the apartment.
cachefile = 'foo_points.json'
if os.path.exists(cachefile):
print("hitting cache: %s" % cachefile)
with open(cachefile) as f:
return map(attrdict, json.load(f))
addresses = []
# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
with io.open(filename, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f, dialect='excel')
location, city, state, zipcode, consumer = reader.fieldnames
prev_address = None
for i, r in enumerate(reader):
# print("row %d" % i)
loc = normalize(r[location])
address, _, apartment = loc.partition(' APT ')
apartment, _, beds = apartment.partition(' Bed ')
address_str = ', '.join([address, r[city], r[state], r[zipcode]])
addresses.append((address_str, apartment, dict(
consumer=r[consumer].strip(),
bedrooms=beds,
apartment='APT ' + apartment,
)))
points = geocode_list(addresses)
with open(cachefile, 'w') as f:
json.dump(points, f, indent=4)
return points
def get_centroid(points):
lats, lngs = zip(*[p.latlng for p in points])
return (sum(lats) / float(len(lats))), (sum(lngs) / float(len(lngs)))
def get_extents(points):
lats, lngs = zip(*[p.latlng for p in points])
return (min(lats), min(lngs)), (max(lats), max(lngs))
def make_map(point_dict, colors):
valid_colors = set(['red', 'blue', 'green', 'purple', 'orange',
'darkred','lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen','gray',
'black', 'lightgray'])
for name, color in colors.items():
if color not in valid_colors:
raise Exception("Invalid color specified: %s" % color)
for name in point_dict:
if name not in colors:
raise Exception("No color specified for dataset: %s" % name)
all_points = reduce(lambda a, b: a + b, point_dict.values(), [])
clat, clng = get_centroid(all_points)
m = folium.Map(
location=[clat, clng],
zoom_start=12,
max_zoom=20,
tiles='Stamen Toner'
)
m.fit_bounds(get_extents(all_points))
# icons:
# fa: https://fontawesome.com/icons/accessible-icon?style=brands
# glyphicon: https://getbootstrap.com/docs/3.3/components/#glyphicons-glyphs
for name, points in point_dict.items():
clr = colors[name]
for p in points:
folium.Marker(
location=p.latlng,
popup=p.address,
tooltip='%s Apartment(s), %s Total Bedroom(s)' % (p.apartments, p.bedrooms),
icon=folium.Icon(
color=clr,
icon='star' if p.contains_onebed else 'home',
prefix='fa', # fa for font-awesome, glyphicon for bootstrap 3
)
).add_to(m)
# TODO: could add a legend like: https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373
return m
def geocode(address):
try:
result = geocoder.locationiq(address, key=api_key)
d = attrdict(result.geojson)
lng, lat = d.features[0].geometry.coordinates # x, y
return lat, lng
except Exception as e:
e.result = result
print(result) # raise(e)
return None
def geocode_list(addresses):
# geocode the addresses.
points = []
by_address = lambda (a, d, e): a
addresses = sorted(addresses, key=by_address)
for address, values in groupby(addresses, by_address):
print(address)
latlng = geocode(address)
time.sleep(1) # rate limit
if not latlng:
continue
print("\t%2.5f, %2.5f" % (latlng))
# compute total number of beds and apartments
bedrooms = [int(extra['bedrooms']) for address, apt, extra in values]
apartments = len(bedrooms)
# mark if there are any 1-bedrooms
contains_1br = any(i == 1 for i in bedrooms)
points.append(attrdict(
address=address,
apartments=apartments,
bedrooms=sum(bedrooms),
contains_onebed=contains_1br,
latlng=latlng,
))
return points
def main():
points = {
'foo': read_foo(),
'pibly': read_pibly(),
}
colors = {
'foo': 'darkblue',
'pibly': 'darkgreen',
}
m = make_map(points, colors)
m.save('map.html')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment