Created
July 26, 2012 00:54
-
-
Save boundsj/3179645 to your computer and use it in GitHub Desktop.
Find all the things with a bounding box and no mongo geo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import pymongo | |
from pymongo import Connection | |
# constants to | |
bbox_earth_radius = 3959.0 # miles | |
bbox_dist_from_center = 0.1 # miles | |
def sum_by_distinct_type(dist_type, distinct_key, args): | |
nearby_requests = db.windygrid.find({"$and": [{"where.latitude": {"$gte": args["ll_y"]}}, \ | |
{"where.latitude": {"$lte": args["ur_y"]}}, \ | |
{"where.longitude": {"$gte": args["ll_x"]}}, \ | |
{"where.longitude": {"$lte": args["ur_x"]}} \ | |
], "type": args["agr_type"], | |
distinct_key: dist_type}) | |
return {dist_type: nearby_requests.count()} | |
def sum_types_by_locations_in_query(search_type_query, search_mask, mask_name_key, distinct_key, aggregate_type, db, table): | |
cursor = db.windygrid.find(search_type_query, search_mask) | |
result = {} | |
# master collection | |
location_counts = [] | |
print "running against query results of %d records" % cursor.count() | |
for doc in cursor: | |
if doc['where']['location'] == [-1, 1]: | |
print "invalid coordinates (-1, 1) for %s" % doc['_id'] | |
continue | |
lat = doc['where']['location'][0] | |
lon = doc['where']['location'][1] | |
ll_x = lon - math.degrees(bbox_dist_from_center / bbox_earth_radius / math.cos(math.radians(lat))) | |
ll_y = lat - math.degrees(bbox_dist_from_center / bbox_earth_radius) | |
ur_x = lon + math.degrees(bbox_dist_from_center / bbox_earth_radius / math.cos(math.radians(lat))) | |
ur_y = lat + math.degrees(bbox_dist_from_center / bbox_earth_radius) | |
print "bbox = lowerleft:[%f, %f], upperright[%f, %f]" % (ll_x, ll_y, ur_x, ur_y) | |
nearby_requests = db.windygrid.find({"$and": [{"where.latitude": {"$gte": ll_y}}, \ | |
{"where.latitude": {"$lte": ur_y}}, \ | |
{"where.longitude": {"$gte": ll_x}}, \ | |
{"where.longitude": {"$lte": ur_x}} \ | |
], "type": aggregate_type}) | |
# total requests for bounding box and type | |
count = nearby_requests.count() | |
print "processed %s; it had %d records near it" % \ | |
(doc[mask_name_key], count) | |
# get a list of distinct types | |
distinct_types = db.windygrid.find({"type": aggregate_type}) \ | |
.distinct(distinct_key) | |
# collect counts by distinct type | |
counts = [sum_by_distinct_type(dist_type, \ | |
distinct_key, | |
{"agr_type": aggregate_type, \ | |
"ll_x": ll_x, \ | |
"ll_y": ll_y, \ | |
"ur_x": ur_x, \ | |
"ur_y": ur_y}) \ | |
for dist_type in distinct_types] | |
# create array of types and their counts | |
type_counts = [] | |
for c in counts: | |
keys = c.keys() | |
for key in keys: | |
if c[key] > 0: | |
type_counts.append({key: c[key]}) | |
# add to master collection | |
location_counts.append({"what": doc[mask_name_key], | |
"location": doc['where']['location'], | |
"total_count": count, | |
"type_counts": type_counts}) | |
result["location_counts"] = location_counts | |
return result | |
# query params | |
connection = Connection() | |
db = connection["test"] | |
search_type_query = {"type" : "business_licenses"} | |
search_mask = {"_id" : 0, "what.legal_name" : 1, "where.location" : 1} | |
mask_name_key = "what" | |
distinct_key = "what.type_code" | |
aggregate_type = "311" | |
table = "windygrid" | |
# run | |
result = sum_types_by_locations_in_query(search_type_query, search_mask, mask_name_key, distinct_key, aggregate_type, db, table) | |
# display (or save in db) | |
print result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment