Skip to content

Instantly share code, notes, and snippets.

@boundsj
Created April 30, 2012 22:48
Show Gist options
  • Save boundsj/2563386 to your computer and use it in GitHub Desktop.
Save boundsj/2563386 to your computer and use it in GitHub Desktop.
Find and count all the things nearby a place
import math
import pymongo
from pymongo import Connection
earth_radius_in_miles = 3959.0
radians_to_degrees = 180.0 / math.pi
radius = (0.0189393939 / earth_radius_in_miles) * radians_to_degrees # 100 foot radius
bbox_earth_radius = 3959.0 # miles
bbox_dist_from_center = 0.1 # miles
def sum_by_distinct_type(distinct_type, distinct_key, lat, lon):
nearby_requests = db.windygrid.find({
"where.location": {
"$within": {"$center": [[lat, lon], radius ]}},
"type": aggregate_type,
distinct_key: distinct_type})
return {distinct_type: nearby_requests.count()}
def sum_types_by_locations_in_query(search_type_query, search_mask, mask_name_key, distinct_key, aggregate_type, db, table):
cursor = db.windygrid.find(search_type_query, search_mask)
result = {}
location_counts = []
print "running against query results of %d records" % cursor.count()
for doc in cursor:
if doc['where']['location'] == [-1, 1]:
print "invalid coordinates (-1, 1) for %s" % doc['_id']
continue
lat = doc['where']['location'][0]
lon = doc['where']['location'][1]
# within circle
#nearby_requests = db.windygrid.find({
# "where.location": {
# "$within": {"$center": [[lat, lon], radius ]}},
# "type": aggregate_type})
#
# within bounding box
ll_x = lon - math.degrees(bbox_dist_from_center / bbox_earth_radius / math.cos(math.radians(lat)))
ll_y = lat - math.degrees(bbox_dist_from_center / bbox_earth_radius)
ur_x = lon + math.degrees(bbox_dist_from_center / bbox_earth_radius / math.cos(math.radians(lat)))
ur_y = lat + math.degrees(bbox_dist_from_center / bbox_earth_radius)
print "bbox = lowerleft:[%f, %f], upperright[%f, %f]" % (ll_x, ll_y, ur_x, ur_y)
box = [[ll_y, ll_x], [ur_y, ur_x]]
nearby_requests = db.windygrid.find({"where.location": {"$within": {"$box" : box}}, "type": aggregate_type})
count = nearby_requests.count()
distinct_types = db.windygrid.find({"type": aggregate_type}) \
.distinct(distinct_key)
counts = [sum_by_distinct_type(t, distinct_key, lat, lon) \
for t in distinct_types]
type_counts = []
for c in counts:
keys = c.keys()
for key in keys:
if c[key] > 0:
type_counts.append({key: c[key]})
print "processed %s; it had %d records near it" % \
(doc[mask_name_key], count)
location_counts.append({"what": doc[mask_name_key],
"location": doc['where']['location'],
"total_count": count,
"type_counts": type_counts})
result["location_counts"] = location_counts
return result
connection = Connection()
db = connection["test"]
search_type_query = {"type" : "business_licenses"}
search_mask = {"_id" : 0, "what.legal_name" : 1, "where.location" : 1}
mask_name_key = "what"
distinct_key = "what.type_code"
aggregate_type = "311"
table = "windygrid"
result = sum_types_by_locations_in_query(search_type_query, search_mask, mask_name_key, distinct_key, aggregate_type, db, table)
#could save to a db here
print result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment