Last active
January 17, 2016 14:27
-
-
Save shantanuo/05187dd23b96f5e92f6b to your computer and use it in GitHub Desktop.
Extract wards and reverse lookup for pincodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://groups.google.com/forum/#!topic/datameet/OhPxDmhkctc | |
import shapefile | |
r = shapefile.Reader("gis/Mumbai.shp") | |
import re | |
from geopy.geocoders import ArcGIS as mygis | |
geolocator = mygis() | |
mydict={} | |
for i in range(r.numRecords): | |
#print r.record(i) | |
geom = r.shape(i) | |
points = geom.points | |
mylist=[] | |
for id in points[0:len(points)]: | |
lat, lon = id | |
location = (lon, lat) | |
try: | |
geolocation = geolocator.reverse(location) | |
myadd=geolocation.address | |
mylist.append(re.findall(r'[0-9]{6}', myadd)[0]) | |
except: | |
pass | |
#print location | |
mydict[r.record(i)[2]] = sorted(set(mylist)) | |
import pandas as pd | |
df = pd.DataFrame(mydict.items()) | |
df.to_csv('/var/www/html/new_email360_panel/abc1.csv') | |
# function to check the ward of given location | |
def point_in_poly(y,x,poly): | |
# check if point is a vertex | |
if (y,y) in poly: return True | |
# check if point is on a boundary | |
for i in range(len(poly)): | |
p1 = None | |
p2 = None | |
if i==0: | |
p1 = poly[0] | |
p2 = poly[1] | |
else: | |
p1 = poly[i-1] | |
p2 = poly[i] | |
if p1[1] == p2[1] and p1[1] == x and y > min(p1[0], p2[0]) and y < max(p1[0], p2[0]): | |
return True | |
n = len(poly) | |
inside = False | |
p1x,p1y = poly[0] | |
for i in range(n+1): | |
p2x,p2y = poly[i % n] | |
if x > min(p1y,p2y): | |
if x <= max(p1y,p2y): | |
if y <= max(p1x,p2x): | |
if p1y != p2y: | |
xints = (x-p1y)*(p2x-p1x)/(p2y-p1y)+p1x | |
if p1x == p2x or y <= xints: | |
inside = not inside | |
p1x,p1y = p2x,p2y | |
if inside: return True | |
return False | |
# convert shape data to standard python dic | |
mydict={} | |
for id in range(len(r.shapes())): | |
geom = r.shape(id) | |
points = geom.points | |
mydict[r.records()[id][2]] = str(points) | |
# pickle the dictionary to a text file | |
import pickle | |
with open('mumbai_wards.pickle', 'wb') as handle: | |
pickle.dump(mydict, handle) | |
# lambda function code (to be hosted at amazon) | |
# name of this file should be lambda_function.py | |
# do not forget to include point_in_poly function | |
import ast | |
import pickle | |
with open('mumbai_wards.pickle', 'rb') as handle: | |
b = pickle.load(handle) | |
def lambda_handler(event, context): | |
some_lon=float(event['some_place_lon']) | |
some_lat=float(event['some_place_lat']) | |
for id in b: | |
if point_in_poly(some_place_lon, some_place_lat, ast.literal_eval(b[id])): | |
return id | |
# zip lambda_function.py and mumbai_wards.pickle | |
# upload the zip file to amazon lambda and create API gateway | |
""" | |
This code will calculate distance from given location to each point for that ward. | |
FOr e.g. the some_place will calculate distance from each location of P/s ward and find the closest point. and distance in meters for e.g. 399 | |
We can then reverse lookup the location to find the address. | |
""" | |
some_place_lat=19.1645972 | |
some_place_lon=72.8496286 | |
# nominatim is accurate but limits access use ArcGIS for loops | |
#great_circle is marginally less accurate, but always produces result unlike vincenty | |
from geopy.geocoders import Nominatim as mygis | |
from geopy.distance import great_circle as dist_cal | |
#from geopy.distance import vincenty as dist_cal | |
#from geopy.geocoders import ArcGIS as mygis | |
geolocator = mygis() | |
for id in range(len(r.shapes())): | |
geom = r.shape(id) | |
if point_in_poly(some_place_lon, some_place_lat, geom.points): | |
print r.records()[id][2] | |
x=r.shape(id) | |
mydist={} | |
for i in x.points: | |
mydist[i]=(dist_cal((some_place_lon, some_place_lat), i).meters) | |
import operator | |
print min(mydist.iteritems(), key=operator.itemgetter(1)) | |
lon, lat = min(mydist.iteritems(), key=operator.itemgetter(1))[0] | |
location = (lat, lon) | |
geolocation = geolocator.reverse(location) | |
print geolocation | |
location = (some_place_lat, some_place_lon) | |
geolocation = geolocator.reverse(location) | |
print geolocation | |
""" | |
P/S | |
([72.85049703721015, 19.17642085278126], 399.63701267653516) | |
Chinchwali Road, Goregaon East, Kurar, Greater Bombay, Maharashtra, 400097, India | |
WR Parking, Station Road, Motilal Nagar 1, Goregaon East, Kurar, Greater Bombay, Maharashtra, 400065, India | |
""" | |
# pure math solution returns 1.31789032903 that is more than 401.1862930166953 | |
# but this uses simple math module that is available universally | |
import math | |
lon_dist=math.radians(some_place_lat- lat) | |
lat_dist=math.radians(some_place_lon- lon) | |
lon_radn=math.radians(some_place_lat) | |
lon_radp=math.radians(lat) | |
a = math.sin(lon_dist/2)**2 + math.sin(lat_dist/2)**2 * math.cos(lon_radn) * math.cos(lon_radp) | |
c= 2*math.asin(math.sqrt(a)) | |
distance=c * 6371 | |
print distance |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment