Last active
August 29, 2015 14:00
-
-
Save svershin/4fbdebcf3f04f6461935 to your computer and use it in GitHub Desktop.
DataSift CSDL geo_polygon to geo_radius converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DataSift CSDL polygon to radius filter converter by Sergey Vershinin (https://gist.github.com/svershin) | |
# Takes a CSDL file that has some geo_polygon filters and converts them | |
# to centroids with an appropriate radius, commenting out the old geo_polygon statements | |
# *** NOTE: The script expects to have the 'twitter.geo geo_polygon "..."' statement on its own line | |
# *** BUG ALERT: Have not tested with scripts that have 'GEO_POLYGON' in upper case | |
import math | |
import string | |
import re | |
# poly2radius() takes in a list of lat/long tuples and returns the lat/long of a centroid | |
# with a radius that covers the entire area of the original polygon; | |
# uses barycenter of polygon boundary and great circle dist optimized for 39 deg. latitude | |
def poly2radius(points): | |
xc, yc = centroid(points) | |
dist = 0 | |
for p in points: | |
d = distance(xc, yc, p[0], p[1]) | |
if d > dist: | |
dist = d | |
return "{},{}:{}".format(xc,yc,dist) | |
# centroid() calculates the barycentre of the polygon boundary, as per code posted here: | |
# http://stackoverflow.com/questions/18305712/how-to-compute-the-center-of-a-polygon-in-2d-and-3d-space | |
def centroid(points): | |
sx = sy = sL = 0 | |
for i in range(len(points)): # counts from 0 to len(points)-1 | |
x0, y0 = points[i - 1] # in Python points[-1] is last element of points | |
x1, y1 = points[i] | |
L = ((x1 - x0)**2 + (y1 - y0)**2) ** 0.5 | |
sx += (x0 + x1)/2 * L | |
sy += (y0 + y1)/2 * L | |
sL += L | |
xc = sx / sL | |
yc = sy / sL | |
return xc, yc | |
# distance() is based on a JavaScript implementation of great circle distance by Andrew Hedges [andrew(at)hedges(dot)name] | |
# See Wikipedia entry for more information: http://en.wikipedia.org/wiki/Great-circle_distance | |
def distance(lat1, lon1, lat2, lon2): | |
# mean radius of the earth (km) | |
# See Wikipedia entry for more information: http://en.wikipedia.org/wiki/Earth_radius | |
Rk = 6371 | |
# convert coordinates to radians | |
lat1 = math.radians(lat1) | |
lon1 = math.radians(lon1) | |
lat2 = math.radians(lat2) | |
lon2 = math.radians(lon2) | |
#find the differences between the coordinates | |
dlat = lat2 - lat1 | |
dlon = lon2 - lon1 | |
a = math.pow(math.sin(dlat/2),2) + math.cos(lat1) * math.cos(lat2) * math.pow(math.sin(dlon/2),2) | |
c = 2 * math.atan2(math.sqrt(a),math.sqrt(1-a)) # great circle distance in radians | |
return round(c * Rk, 3) # great circle distance in km to the nearest 1/1000 | |
def main(): | |
numbers = re.compile("([-]*\d+[.]+\d+)") | |
# specify the name of the output file here | |
df = open('test2_radius2.csdl','w') | |
# specify the name of the input file here | |
with open('test2.csdl') as f: | |
for line in f: | |
if string.find(line, "geo_polygon")>0: | |
pts = numbers.findall(line) | |
points = [] | |
[points.append((float(pts[i]),float(pts[i+1]))) for i in range(0,len(pts),2)] | |
df.write("twitter.geo geo_radius \"{}\"\n".format(poly2radius(points))) | |
df.write("// "+line) | |
else: | |
df.write(line) | |
df.flush() | |
df.close() | |
return | |
if __name__ == '__main__': | |
main() | |
# EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment