jsundram/make_map.py

## make_map.py
import csv
import io
import json
import os
import re
import time
from itertools import groupby

import folium
import geocoder
from attrdict import AttrDict as attrdict

"""
Read data
Geocode addresses
Plot on map, color-coded by agency
tooltips for # of apartments.
change glyph for presence of 1-bed
add icon for accessibility
"""

# Rate Limits:
# 10000 requests/ day
# 60 requests/ minute
# 2 requests / second
api_key='' # copy your locationiq api key here (uses nominatum under the hood).


def normalize(s):
    # doing this helps minimize geocoder calls by avoiding calling variants
    # of the same address.
    s = re.sub('Street|St\.', 'St', s, flags=re.IGNORECASE)
    s = re.sub('Avenue|Ave\.', 'Ave', s, flags=re.IGNORECASE)
    s = re.sub('Road|Rd\.', 'Rd', s, flags=re.IGNORECASE)
    s = re.sub('East|E\.', 'E', s, flags=re.IGNORECASE)
    s = re.sub('apt|Apt\.', 'APT', s, flags=re.IGNORECASE)
    s = re.sub(' st', ' St', s)
    s = re.sub(' rd', ' Rd', s)
    s = re.sub(' ave', ' Ave', s)
    s = re.sub('e\b', 'E', s)
    s = re.sub(' +', ' ', s)
    return s.replace('#', '')


def read_pibly(filename='pibly.csv'):
    # note: I fixed the data where it was missing apartment numbers or was otherwise broken.
    cachefile = 'pibly_points.json'
    if os.path.exists(cachefile):
        print("hitting cache: %s" % cachefile)
        with open(cachefile) as f:
            return map(attrdict, json.load(f))

    addresses = []
    # Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
    with io.open(filename, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f, dialect='excel')
        for r in map(attrdict, reader):
            s = normalize(r.Address)
            i = s.index('Bronx')
            start, end = s[:i-1], s[i:]
            start, _, apt = start.partition(' APT')
            address = start + ', ' + end
            apartment = 'APT ' + apt

            addresses.append((address, apartment, dict(
                bedrooms=r.Bedrooms,
                apartment=apartment,
            )))
    points = geocode_list(addresses)

    with open(cachefile, 'w') as f:
        json.dump(points, f, indent=4)
    return points


def read_foo(filename='foo.csv'):
    # I went through and heavily edited the original foo.csv export; sorting it
    # and normalizing the data (Apt 1 Bed 2), then deleting the Bed 1 rows for
    # apartments with multiple beds so that there was only 1 entry per apartment that
    # indicated the total number of bedrooms in the apartment.
    cachefile = 'foo_points.json'
    if os.path.exists(cachefile):
        print("hitting cache: %s" % cachefile)
        with open(cachefile) as f:
            return map(attrdict, json.load(f))

    addresses = []
    # Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
    with io.open(filename, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f, dialect='excel')
        location, city, state, zipcode, consumer = reader.fieldnames
        prev_address = None
        for i, r in enumerate(reader):
            # print("row %d" % i)
            loc = normalize(r[location])
            address, _, apartment = loc.partition(' APT ')
            apartment, _, beds = apartment.partition(' Bed ')
            address_str = ', '.join([address, r[city], r[state], r[zipcode]])

            addresses.append((address_str, apartment, dict(
                consumer=r[consumer].strip(),
                bedrooms=beds,
                apartment='APT ' + apartment,
            )))

    points = geocode_list(addresses)

    with open(cachefile, 'w') as f:
        json.dump(points, f, indent=4)
    return points


def get_centroid(points):
    lats, lngs = zip(*[p.latlng for p in points])
    return (sum(lats) / float(len(lats))), (sum(lngs) / float(len(lngs)))

def get_extents(points):
    lats, lngs = zip(*[p.latlng for p in points])
    return (min(lats), min(lngs)), (max(lats), max(lngs))


def make_map(point_dict, colors):
    valid_colors = set(['red', 'blue', 'green', 'purple', 'orange',
        'darkred','lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
        'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen','gray',
        'black', 'lightgray'])
    for name, color in colors.items():
        if color not in valid_colors:
            raise Exception("Invalid color specified: %s" % color)

    for name in point_dict:
        if name not in colors:
            raise Exception("No color specified for dataset: %s" % name)

    all_points = reduce(lambda a, b: a + b, point_dict.values(), [])
    clat, clng = get_centroid(all_points)
    m = folium.Map(
        location=[clat, clng],
        zoom_start=12,
        max_zoom=20,
        tiles='Stamen Toner'
    )
    m.fit_bounds(get_extents(all_points))

    # icons:
    #     fa: https://fontawesome.com/icons/accessible-icon?style=brands
    #     glyphicon: https://getbootstrap.com/docs/3.3/components/#glyphicons-glyphs
    for name, points in point_dict.items():
        clr = colors[name]
        for p in points:
            folium.Marker(
                location=p.latlng,
                popup=p.address,
                tooltip='%s Apartment(s), %s Total Bedroom(s)' % (p.apartments, p.bedrooms),
                icon=folium.Icon(
                    color=clr,
                    icon='star' if p.contains_onebed else 'home',
                    prefix='fa', # fa for font-awesome, glyphicon for bootstrap 3
                )
            ).add_to(m)

    # TODO: could add a legend like: https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373
    return m


def geocode(address):
    try:
        result = geocoder.locationiq(address, key=api_key)
        d = attrdict(result.geojson)
        lng, lat = d.features[0].geometry.coordinates # x, y
        return lat, lng
    except Exception as e:
        e.result = result
        print(result) # raise(e)
    return None


def geocode_list(addresses):
    # geocode the addresses.
    points = []

    by_address = lambda (a, d, e): a
    addresses = sorted(addresses, key=by_address)
    for address, values in groupby(addresses, by_address):
        print(address)
        latlng = geocode(address)
        time.sleep(1) # rate limit
        if not latlng:
            continue
        print("\t%2.5f, %2.5f" % (latlng))

        # compute total number of beds and apartments
        bedrooms = [int(extra['bedrooms']) for address, apt, extra in values]
        apartments = len(bedrooms)

        # mark if there are any 1-bedrooms
        contains_1br = any(i == 1 for i in bedrooms)

        points.append(attrdict(
            address=address,
            apartments=apartments,
            bedrooms=sum(bedrooms),
            contains_onebed=contains_1br,
            latlng=latlng,
        ))

    return points


def main():
    points = {
        'foo': read_foo(),
        'pibly': read_pibly(),
    }
    colors = {
        'foo': 'darkblue',
        'pibly': 'darkgreen',
    }

    m = make_map(points, colors)
    m.save('map.html')


if __name__ == '__main__':
    main()
	import csv
	import io
	import json
	import os
	import re
	import time
	from itertools import groupby

	import folium
	import geocoder
	from attrdict import AttrDict as attrdict

	"""
	Read data
	Geocode addresses
	Plot on map, color-coded by agency
	tooltips for # of apartments.
	change glyph for presence of 1-bed
	add icon for accessibility
	"""

	# Rate Limits:
	# 10000 requests/ day
	# 60 requests/ minute
	# 2 requests / second
	api_key='' # copy your locationiq api key here (uses nominatum under the hood).


	def normalize(s):
	# doing this helps minimize geocoder calls by avoiding calling variants
	# of the same address.
	s = re.sub('Street\|St\.', 'St', s, flags=re.IGNORECASE)
	s = re.sub('Avenue\|Ave\.', 'Ave', s, flags=re.IGNORECASE)
	s = re.sub('Road\|Rd\.', 'Rd', s, flags=re.IGNORECASE)
	s = re.sub('East\|E\.', 'E', s, flags=re.IGNORECASE)
	s = re.sub('apt\|Apt\.', 'APT', s, flags=re.IGNORECASE)
	s = re.sub(' st', ' St', s)
	s = re.sub(' rd', ' Rd', s)
	s = re.sub(' ave', ' Ave', s)
	s = re.sub('e\b', 'E', s)
	s = re.sub(' +', ' ', s)
	return s.replace('#', '')


	def read_pibly(filename='pibly.csv'):
	# note: I fixed the data where it was missing apartment numbers or was otherwise broken.
	cachefile = 'pibly_points.json'
	if os.path.exists(cachefile):
	print("hitting cache: %s" % cachefile)
	with open(cachefile) as f:
	return map(attrdict, json.load(f))

	addresses = []
	# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
	with io.open(filename, 'r', encoding='utf-8-sig') as f:
	reader = csv.DictReader(f, dialect='excel')
	for r in map(attrdict, reader):
	s = normalize(r.Address)
	i = s.index('Bronx')
	start, end = s[:i-1], s[i:]
	start, _, apt = start.partition(' APT')
	address = start + ', ' + end
	apartment = 'APT ' + apt

	addresses.append((address, apartment, dict(
	bedrooms=r.Bedrooms,
	apartment=apartment,
	)))
	points = geocode_list(addresses)

	with open(cachefile, 'w') as f:
	json.dump(points, f, indent=4)
	return points


	def read_foo(filename='foo.csv'):
	# I went through and heavily edited the original foo.csv export; sorting it
	# and normalizing the data (Apt 1 Bed 2), then deleting the Bed 1 rows for
	# apartments with multiple beds so that there was only 1 entry per apartment that
	# indicated the total number of bedrooms in the apartment.
	cachefile = 'foo_points.json'
	if os.path.exists(cachefile):
	print("hitting cache: %s" % cachefile)
	with open(cachefile) as f:
	return map(attrdict, json.load(f))

	addresses = []
	# Need io.open & utf-8-sig to avoid BOM being prepended to 1st column name.
	with io.open(filename, 'r', encoding='utf-8-sig') as f:
	reader = csv.DictReader(f, dialect='excel')
	location, city, state, zipcode, consumer = reader.fieldnames
	prev_address = None
	for i, r in enumerate(reader):
	# print("row %d" % i)
	loc = normalize(r[location])
	address, _, apartment = loc.partition(' APT ')
	apartment, _, beds = apartment.partition(' Bed ')
	address_str = ', '.join([address, r[city], r[state], r[zipcode]])

	addresses.append((address_str, apartment, dict(
	consumer=r[consumer].strip(),
	bedrooms=beds,
	apartment='APT ' + apartment,
	)))

	points = geocode_list(addresses)

	with open(cachefile, 'w') as f:
	json.dump(points, f, indent=4)
	return points


	def get_centroid(points):
	lats, lngs = zip(*[p.latlng for p in points])
	return (sum(lats) / float(len(lats))), (sum(lngs) / float(len(lngs)))

	def get_extents(points):
	lats, lngs = zip(*[p.latlng for p in points])
	return (min(lats), min(lngs)), (max(lats), max(lngs))


	def make_map(point_dict, colors):
	valid_colors = set(['red', 'blue', 'green', 'purple', 'orange',
	'darkred','lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
	'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen','gray',
	'black', 'lightgray'])
	for name, color in colors.items():
	if color not in valid_colors:
	raise Exception("Invalid color specified: %s" % color)

	for name in point_dict:
	if name not in colors:
	raise Exception("No color specified for dataset: %s" % name)

	all_points = reduce(lambda a, b: a + b, point_dict.values(), [])
	clat, clng = get_centroid(all_points)
	m = folium.Map(
	location=[clat, clng],
	zoom_start=12,
	max_zoom=20,
	tiles='Stamen Toner'
	)
	m.fit_bounds(get_extents(all_points))

	# icons:
	# fa: https://fontawesome.com/icons/accessible-icon?style=brands
	# glyphicon: https://getbootstrap.com/docs/3.3/components/#glyphicons-glyphs
	for name, points in point_dict.items():
	clr = colors[name]
	for p in points:
	folium.Marker(
	location=p.latlng,
	popup=p.address,
	tooltip='%s Apartment(s), %s Total Bedroom(s)' % (p.apartments, p.bedrooms),
	icon=folium.Icon(
	color=clr,
	icon='star' if p.contains_onebed else 'home',
	prefix='fa', # fa for font-awesome, glyphicon for bootstrap 3
	)
	).add_to(m)

	# TODO: could add a legend like: https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373
	return m


	def geocode(address):
	try:
	result = geocoder.locationiq(address, key=api_key)
	d = attrdict(result.geojson)
	lng, lat = d.features[0].geometry.coordinates # x, y
	return lat, lng
	except Exception as e:
	e.result = result
	print(result) # raise(e)
	return None


	def geocode_list(addresses):
	# geocode the addresses.
	points = []

	by_address = lambda (a, d, e): a
	addresses = sorted(addresses, key=by_address)
	for address, values in groupby(addresses, by_address):
	print(address)
	latlng = geocode(address)
	time.sleep(1) # rate limit
	if not latlng:
	continue
	print("\t%2.5f, %2.5f" % (latlng))

	# compute total number of beds and apartments
	bedrooms = [int(extra['bedrooms']) for address, apt, extra in values]
	apartments = len(bedrooms)

	# mark if there are any 1-bedrooms
	contains_1br = any(i == 1 for i in bedrooms)

	points.append(attrdict(
	address=address,
	apartments=apartments,
	bedrooms=sum(bedrooms),
	contains_onebed=contains_1br,
	latlng=latlng,
	))

	return points


	def main():
	points = {
	'foo': read_foo(),
	'pibly': read_pibly(),
	}
	colors = {
	'foo': 'darkblue',
	'pibly': 'darkgreen',
	}

	m = make_map(points, colors)
	m.save('map.html')


	if __name__ == '__main__':
	main()