Skip to content

Instantly share code, notes, and snippets.

@jsundram
Last active December 28, 2015 13:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsundram/7509686 to your computer and use it in GitHub Desktop.
Save jsundram/7509686 to your computer and use it in GitHub Desktop.
import csv
from collections import defaultdict
import json
import shape_index
def main():
name = lambda l: l[3]
shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name)
idx = shape_index.make_index(shapes)
with open('Street_Tree_List.csv') as f, \
open('Street_Tree_List_Neighborly.csv', 'w') as g:
r = csv.DictReader(f)
w = csv.DictWriter(g, r.fieldnames + ['Neighborhood'])
rows, parse_errors = 0, 0
for row in r:
try:
lat = float(row['Latitude'])
lng = float(row['Longitude'])
except ValueError, e:
parse_errors += 1
continue
rows += 1
shape = shape_index.query_index(idx, shapes, lat, lng)
row['Neighborhood'] = names.get(shape, None)
w.writerow(row)
print "%d rows succeeded, %d had errors" % (rows, parse_errors)
print "Wrote Stree_Tree_List_Neighborly.csv"
if __name__ == '__main__':
main()
import json
from collections import defaultdict
from itertools import izip
# To get rtree:
# brew install spatialindex
# pip install rtree
from rtree import index
# to get shapefile:
# pip install pyshp
import shapefile
def read_shapefile(shape_dir, ixId=3, name=None):
"""ixId is the index of a field in the shaperecord that can be used as a numeric id."""
sf = shapefile.Reader(shape_dir)
shapes = {}
shape_names = {}
for (s, sr) in izip(sf.shapes(), sf.shapeRecords()):
ix = int(sr.record[ixId])
shapes[ix] = s
if name:
shape_names[ix] = name(sr.record)
return shapes, shape_names
def make_index(county_shapes):
idx = index.Index()
for (countyId, s) in county_shapes.iteritems():
idx.insert(countyId, s.bbox)
return idx
def query_index(idx, shapes, lat, lng, e=.0000001):
for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)):
if hit_test(lng, lat, shapes[hit].points):
return hit
def hit_test(x, y, polygon):
inside = False
x1, y1 = polygon[0]
for i in range(len(polygon)) + [0]:
x2, y2 = polygon[i]
if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2):
xints = (y-y1) * (x2-x1) / (y2-y1) + x1
if x1 == x2 or x <= xints:
inside = not inside
x1, y1 = x2, y2
return inside
def process(your_data):
shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip
idx = make_index(shapes)
summary = defaultdict(int)
for (lat, lng) in your_data:
countyId = query_index(idx, shapes, lat, lng)
summary[countyId] += 1
json.dump(summary, open('data.json', 'w')) # scale this later
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment