Use the gist: https://gist.github.com/jsundram/1918435
on this data: https://data.sfgov.org/Public-Works/Street-Tree-List/tkzw-k3nq
shapefiles from here: http://www.zillow.com/howto/api/neighborhood-boundaries.htm
minor changes were required.
Use the gist: https://gist.github.com/jsundram/1918435
on this data: https://data.sfgov.org/Public-Works/Street-Tree-List/tkzw-k3nq
shapefiles from here: http://www.zillow.com/howto/api/neighborhood-boundaries.htm
minor changes were required.
import csv | |
from collections import defaultdict | |
import json | |
import shape_index | |
def main(): | |
name = lambda l: l[3] | |
shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name) | |
idx = shape_index.make_index(shapes) | |
with open('Street_Tree_List.csv') as f, \ | |
open('Street_Tree_List_Neighborly.csv', 'w') as g: | |
r = csv.DictReader(f) | |
w = csv.DictWriter(g, r.fieldnames + ['Neighborhood']) | |
rows, parse_errors = 0, 0 | |
for row in r: | |
try: | |
lat = float(row['Latitude']) | |
lng = float(row['Longitude']) | |
except ValueError, e: | |
parse_errors += 1 | |
continue | |
rows += 1 | |
shape = shape_index.query_index(idx, shapes, lat, lng) | |
row['Neighborhood'] = names.get(shape, None) | |
w.writerow(row) | |
print "%d rows succeeded, %d had errors" % (rows, parse_errors) | |
print "Wrote Stree_Tree_List_Neighborly.csv" | |
if __name__ == '__main__': | |
main() |
import json | |
from collections import defaultdict | |
from itertools import izip | |
# To get rtree: | |
# brew install spatialindex | |
# pip install rtree | |
from rtree import index | |
# to get shapefile: | |
# pip install pyshp | |
import shapefile | |
def read_shapefile(shape_dir, ixId=3, name=None): | |
"""ixId is the index of a field in the shaperecord that can be used as a numeric id.""" | |
sf = shapefile.Reader(shape_dir) | |
shapes = {} | |
shape_names = {} | |
for (s, sr) in izip(sf.shapes(), sf.shapeRecords()): | |
ix = int(sr.record[ixId]) | |
shapes[ix] = s | |
if name: | |
shape_names[ix] = name(sr.record) | |
return shapes, shape_names | |
def make_index(county_shapes): | |
idx = index.Index() | |
for (countyId, s) in county_shapes.iteritems(): | |
idx.insert(countyId, s.bbox) | |
return idx | |
def query_index(idx, shapes, lat, lng, e=.0000001): | |
for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)): | |
if hit_test(lng, lat, shapes[hit].points): | |
return hit | |
def hit_test(x, y, polygon): | |
inside = False | |
x1, y1 = polygon[0] | |
for i in range(len(polygon)) + [0]: | |
x2, y2 = polygon[i] | |
if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2): | |
xints = (y-y1) * (x2-x1) / (y2-y1) + x1 | |
if x1 == x2 or x <= xints: | |
inside = not inside | |
x1, y1 = x2, y2 | |
return inside | |
def process(your_data): | |
shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip | |
idx = make_index(shapes) | |
summary = defaultdict(int) | |
for (lat, lng) in your_data: | |
countyId = query_index(idx, shapes, lat, lng) | |
summary[countyId] += 1 | |
json.dump(summary, open('data.json', 'w')) # scale this later |