Skip to content

Instantly share code, notes, and snippets.

@mapmeld
Created July 18, 2012 06:42
Show Gist options
  • Save mapmeld/3134642 to your computer and use it in GitHub Desktop.
Save mapmeld/3134642 to your computer and use it in GitHub Desktop.
Geo-matching for Prediction CSVs
# Go through a CSV file of addresses
# Find each address in a KML file
# Add the coordinates to the CSV file if a match is found
# Add Geostuff to addresses
cases = open('PROP1Cleanest.csv', 'r')
geocases = open('allcasesplusgeo.csv', 'w')
lastgeo = [ ]
geobyaddress = { }
for case in cases:
myinfo = case.split('"F"')[1].split(",")
myaddress = case.split('"')[1]
if(geobyaddress.has_key(myaddress)):
lastgeo = geobyaddress[ myaddress ]
case = case.replace('"F","",""','"F",' + lastgeo[0] + "," + lastgeo[1])
print lastgeo
else:
# new address, look up location
lastgeo = [ ]
coordinatesOn = 0
caseid = myinfo[4]
try:
if(len(caseid) < 4 or caseid.find('"') > -1):
# has an invalid caseid
print "error: " + caseid
geosearch = open('../Downloads/cases-' + caseid[0:4] + 'geo.kml', 'r')
for geoline in geosearch:
if(geoline.find('<b>CaseID:</b> ' + caseid + '<br>') > -1):
# found this address
coordinatesOn = 1
if(geoline.find('</Placemark>') > -1 and coordinatesOn == 1):
# no geocode exists for this address
break
if(geoline.find('<coordinates>') > -1 and coordinatesOn == 1):
# found the coordinates
# set case[5] to latitude, case[6] to longitude
lat = geoline.split(',')[0]
lat = lat[lat.find('coordinates') + 12: len(lat)]
lng = geoline.split(',')[1]
lastgeo = [ lat, lng ]
geobyaddress[ myaddress ] = lastgeo
case = case.replace('"F","",""','"F",' + lat + "," + lng)
break
# didn't find caseid in expected file
# check next year as a back-up
geosearch = open('../Downloads/cases-' + str(int(caseid[0:4]) + 1) + 'geo.kml', 'r')
for geoline in geosearch:
if(geoline.find('<b>CaseID:</b> ' + caseid + '<br>') > -1):
# found this address
coordinatesOn = 1
if(geoline.find('</Placemark>') > -1 and coordinatesOn == 1):
# no geocode exists for this address
break
if(geoline.find('<coordinates>') > -1 and coordinatesOn == 1):
# found the coordinates
# set case[5] to latitude, case[6] to longitude
lat = geoline.split(',')[0]
lat = lat[lat.find('coordinates') + 12: len(lat)]
lng = geoline.split(',')[1]
lastgeo = [ lat, lng ]
geobyaddress[ myaddress ] = lastgeo
case = case.replace('"F","",""','"F",' + lat + "," + lng)
break
except:
# caseid was not valid year
continue
if(lastgeo != [ ]):
# print the geocoded case
geocases.write(case)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment