Created
July 18, 2012 06:42
-
-
Save mapmeld/3134642 to your computer and use it in GitHub Desktop.
Geo-matching for Prediction CSVs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Go through a CSV file of addresses | |
# Find each address in a KML file | |
# Add the coordinates to the CSV file if a match is found | |
# Add Geostuff to addresses | |
cases = open('PROP1Cleanest.csv', 'r') | |
geocases = open('allcasesplusgeo.csv', 'w') | |
lastgeo = [ ] | |
geobyaddress = { } | |
for case in cases: | |
myinfo = case.split('"F"')[1].split(",") | |
myaddress = case.split('"')[1] | |
if(geobyaddress.has_key(myaddress)): | |
lastgeo = geobyaddress[ myaddress ] | |
case = case.replace('"F","",""','"F",' + lastgeo[0] + "," + lastgeo[1]) | |
print lastgeo | |
else: | |
# new address, look up location | |
lastgeo = [ ] | |
coordinatesOn = 0 | |
caseid = myinfo[4] | |
try: | |
if(len(caseid) < 4 or caseid.find('"') > -1): | |
# has an invalid caseid | |
print "error: " + caseid | |
geosearch = open('../Downloads/cases-' + caseid[0:4] + 'geo.kml', 'r') | |
for geoline in geosearch: | |
if(geoline.find('<b>CaseID:</b> ' + caseid + '<br>') > -1): | |
# found this address | |
coordinatesOn = 1 | |
if(geoline.find('</Placemark>') > -1 and coordinatesOn == 1): | |
# no geocode exists for this address | |
break | |
if(geoline.find('<coordinates>') > -1 and coordinatesOn == 1): | |
# found the coordinates | |
# set case[5] to latitude, case[6] to longitude | |
lat = geoline.split(',')[0] | |
lat = lat[lat.find('coordinates') + 12: len(lat)] | |
lng = geoline.split(',')[1] | |
lastgeo = [ lat, lng ] | |
geobyaddress[ myaddress ] = lastgeo | |
case = case.replace('"F","",""','"F",' + lat + "," + lng) | |
break | |
# didn't find caseid in expected file | |
# check next year as a back-up | |
geosearch = open('../Downloads/cases-' + str(int(caseid[0:4]) + 1) + 'geo.kml', 'r') | |
for geoline in geosearch: | |
if(geoline.find('<b>CaseID:</b> ' + caseid + '<br>') > -1): | |
# found this address | |
coordinatesOn = 1 | |
if(geoline.find('</Placemark>') > -1 and coordinatesOn == 1): | |
# no geocode exists for this address | |
break | |
if(geoline.find('<coordinates>') > -1 and coordinatesOn == 1): | |
# found the coordinates | |
# set case[5] to latitude, case[6] to longitude | |
lat = geoline.split(',')[0] | |
lat = lat[lat.find('coordinates') + 12: len(lat)] | |
lng = geoline.split(',')[1] | |
lastgeo = [ lat, lng ] | |
geobyaddress[ myaddress ] = lastgeo | |
case = case.replace('"F","",""','"F",' + lat + "," + lng) | |
break | |
except: | |
# caseid was not valid year | |
continue | |
if(lastgeo != [ ]): | |
# print the geocoded case | |
geocases.write(case) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment