Skip to content

Instantly share code, notes, and snippets.

@svershin
Last active August 29, 2015 14:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save svershin/4fbdebcf3f04f6461935 to your computer and use it in GitHub Desktop.
Save svershin/4fbdebcf3f04f6461935 to your computer and use it in GitHub Desktop.
DataSift CSDL geo_polygon to geo_radius converter
# DataSift CSDL polygon to radius filter converter by Sergey Vershinin (https://gist.github.com/svershin)
# Takes a CSDL file that has some geo_polygon filters and converts them
# to centroids with an appropriate radius, commenting out the old geo_polygon statements
# *** NOTE: The script expects to have the 'twitter.geo geo_polygon "..."' statement on its own line
# *** BUG ALERT: Have not tested with scripts that have 'GEO_POLYGON' in upper case
import math
import string
import re
# poly2radius() takes in a list of lat/long tuples and returns the lat/long of a centroid
# with a radius that covers the entire area of the original polygon;
# uses barycenter of polygon boundary and great circle dist optimized for 39 deg. latitude
def poly2radius(points):
xc, yc = centroid(points)
dist = 0
for p in points:
d = distance(xc, yc, p[0], p[1])
if d > dist:
dist = d
return "{},{}:{}".format(xc,yc,dist)
# centroid() calculates the barycentre of the polygon boundary, as per code posted here:
# http://stackoverflow.com/questions/18305712/how-to-compute-the-center-of-a-polygon-in-2d-and-3d-space
def centroid(points):
sx = sy = sL = 0
for i in range(len(points)): # counts from 0 to len(points)-1
x0, y0 = points[i - 1] # in Python points[-1] is last element of points
x1, y1 = points[i]
L = ((x1 - x0)**2 + (y1 - y0)**2) ** 0.5
sx += (x0 + x1)/2 * L
sy += (y0 + y1)/2 * L
sL += L
xc = sx / sL
yc = sy / sL
return xc, yc
# distance() is based on a JavaScript implementation of great circle distance by Andrew Hedges [andrew(at)hedges(dot)name]
# See Wikipedia entry for more information: http://en.wikipedia.org/wiki/Great-circle_distance
def distance(lat1, lon1, lat2, lon2):
# mean radius of the earth (km)
# See Wikipedia entry for more information: http://en.wikipedia.org/wiki/Earth_radius
Rk = 6371
# convert coordinates to radians
lat1 = math.radians(lat1)
lon1 = math.radians(lon1)
lat2 = math.radians(lat2)
lon2 = math.radians(lon2)
#find the differences between the coordinates
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.pow(math.sin(dlat/2),2) + math.cos(lat1) * math.cos(lat2) * math.pow(math.sin(dlon/2),2)
c = 2 * math.atan2(math.sqrt(a),math.sqrt(1-a)) # great circle distance in radians
return round(c * Rk, 3) # great circle distance in km to the nearest 1/1000
def main():
numbers = re.compile("([-]*\d+[.]+\d+)")
# specify the name of the output file here
df = open('test2_radius2.csdl','w')
# specify the name of the input file here
with open('test2.csdl') as f:
for line in f:
if string.find(line, "geo_polygon")>0:
pts = numbers.findall(line)
points = []
[points.append((float(pts[i]),float(pts[i+1]))) for i in range(0,len(pts),2)]
df.write("twitter.geo geo_radius \"{}\"\n".format(poly2radius(points)))
df.write("// "+line)
else:
df.write(line)
df.flush()
df.close()
return
if __name__ == '__main__':
main()
# EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment