jsundram/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Use the gist:
https://gist.github.com/jsundram/1918435
on this data:
https://data.sfgov.org/Public-Works/Street-Tree-List/tkzw-k3nq
shapefiles from here:
http://www.zillow.com/howto/api/neighborhood-boundaries.htm
minor changes were required.

  
## csv_to_shapes.py
import csv
from collections import defaultdict
import json

import shape_index


def main():
    name = lambda l: l[3]
    shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name)
    idx = shape_index.make_index(shapes)

    with open('Street_Tree_List.csv') as f, \
         open('Street_Tree_List_Neighborly.csv', 'w') as g:
        r = csv.DictReader(f)
        w = csv.DictWriter(g, r.fieldnames + ['Neighborhood'])
        rows, parse_errors = 0, 0
        for row in r:
            try:
                lat = float(row['Latitude'])
                lng = float(row['Longitude'])
            except ValueError, e:
                parse_errors += 1
                continue
            rows += 1
            shape = shape_index.query_index(idx, shapes, lat, lng)
            row['Neighborhood'] = names.get(shape, None)
            w.writerow(row)
        print "%d rows succeeded, %d had errors" % (rows, parse_errors)
        print "Wrote Stree_Tree_List_Neighborly.csv"


if __name__ == '__main__':
    main()

## shape_index.py
import json
from collections import defaultdict
from itertools import izip
# To get rtree:
# brew install spatialindex
# pip install rtree
from rtree import index
# to get shapefile:
# pip install pyshp
import shapefile

def read_shapefile(shape_dir, ixId=3, name=None):
    """ixId is the index of a field in the shaperecord that can be used as a numeric id."""
    sf = shapefile.Reader(shape_dir)
    shapes = {}
    shape_names = {}
    for (s, sr) in izip(sf.shapes(), sf.shapeRecords()):
        ix = int(sr.record[ixId])
        shapes[ix] = s
        if name:
            shape_names[ix] = name(sr.record)
    return shapes, shape_names

def make_index(county_shapes):
    idx = index.Index()
    for (countyId, s) in county_shapes.iteritems():
        idx.insert(countyId, s.bbox)
    return idx

def query_index(idx, shapes, lat, lng, e=.0000001):
    for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)):
        if hit_test(lng, lat, shapes[hit].points):
            return hit

def hit_test(x, y, polygon):
    inside = False
    x1, y1 = polygon[0]
    for i in range(len(polygon)) + [0]:
        x2, y2 = polygon[i]
        if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2):
            xints = (y-y1) * (x2-x1) / (y2-y1) + x1
            if x1 == x2 or x <= xints:
                inside = not inside
        x1, y1 = x2, y2
    return inside

def process(your_data):
    shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip
    idx = make_index(shapes)

    summary = defaultdict(int)
    for (lat, lng) in your_data:
        countyId = query_index(idx, shapes, lat, lng)
        summary[countyId] += 1
    json.dump(summary, open('data.json', 'w')) # scale this later
	import csv
	from collections import defaultdict
	import json

	import shape_index


	def main():
	name = lambda l: l[3]
	shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name)
	idx = shape_index.make_index(shapes)

	with open('Street_Tree_List.csv') as f, \
	open('Street_Tree_List_Neighborly.csv', 'w') as g:
	r = csv.DictReader(f)
	w = csv.DictWriter(g, r.fieldnames + ['Neighborhood'])
	rows, parse_errors = 0, 0
	for row in r:
	try:
	lat = float(row['Latitude'])
	lng = float(row['Longitude'])
	except ValueError, e:
	parse_errors += 1
	continue
	rows += 1
	shape = shape_index.query_index(idx, shapes, lat, lng)
	row['Neighborhood'] = names.get(shape, None)
	w.writerow(row)
	print "%d rows succeeded, %d had errors" % (rows, parse_errors)
	print "Wrote Stree_Tree_List_Neighborly.csv"


	if __name__ == '__main__':
	main()
	import json
	from collections import defaultdict
	from itertools import izip
	# To get rtree:
	# brew install spatialindex
	# pip install rtree
	from rtree import index
	# to get shapefile:
	# pip install pyshp
	import shapefile

	def read_shapefile(shape_dir, ixId=3, name=None):
	"""ixId is the index of a field in the shaperecord that can be used as a numeric id."""
	sf = shapefile.Reader(shape_dir)
	shapes = {}
	shape_names = {}
	for (s, sr) in izip(sf.shapes(), sf.shapeRecords()):
	ix = int(sr.record[ixId])
	shapes[ix] = s
	if name:
	shape_names[ix] = name(sr.record)
	return shapes, shape_names

	def make_index(county_shapes):
	idx = index.Index()
	for (countyId, s) in county_shapes.iteritems():
	idx.insert(countyId, s.bbox)
	return idx

	def query_index(idx, shapes, lat, lng, e=.0000001):
	for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)):
	if hit_test(lng, lat, shapes[hit].points):
	return hit

	def hit_test(x, y, polygon):
	inside = False
	x1, y1 = polygon[0]
	for i in range(len(polygon)) + [0]:
	x2, y2 = polygon[i]
	if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2):
	xints = (y-y1) * (x2-x1) / (y2-y1) + x1
	if x1 == x2 or x <= xints:
	inside = not inside
	x1, y1 = x2, y2
	return inside

	def process(your_data):
	shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip
	idx = make_index(shapes)

	summary = defaultdict(int)
	for (lat, lng) in your_data:
	countyId = query_index(idx, shapes, lat, lng)
	summary[countyId] += 1
	json.dump(summary, open('data.json', 'w')) # scale this later