Skip to content

Instantly share code, notes, and snippets.

@mapmeld
Created July 11, 2012 00:50
Show Gist options
  • Save mapmeld/3087203 to your computer and use it in GitHub Desktop.
Save mapmeld/3087203 to your computer and use it in GitHub Desktop.
PredictPrep
# Predict Cases
# Prepare housing cases CSV for Google Prediction API
# Now associates a case with where it is in three more years, not the ultimate result
# for example
# "Some", 19960909, Cut and Cleared
# "None", 19980101, Cut and Cleared
# "None", 19980202, No Violations (not predicted by previous because there were no violations)
# "Demolished", 20020808, Repaired (prediction should be demolished because demo comes in < 3 years)
# "Demolished", 20030101, Demolished
prop = open('PROP1.csv', 'r')
thisyr = open('cases-predict-plus.csv', 'w')
cases = { }
skipto = "ELL ST, 0583"
for line in prop:
# get a case id
caseid = line.split(',')[10]
opendate = line.split(',')[11]
if(opendate > "20090620"):
# stop loading transit data after June 20, 2009
continue
# clean up line
# a few don't match this model line = line.replace('F,"","",','').replace('"Macon","Macon",','').replace('F,T,F,F,F,','')
# classify this line
address = line[ 1 : len(line) ]
address = address[ 0 : address.find('"') ]
if(address.find('Macon') > -1):
address = address[0 : address.find('Macon')]
address = address[0 : address.rfind(',')]
elif(address.find('MACON') > -1):
address = address[0 : address.find('MACON')]
address = address[0 : address.rfind(',')]
#line = line.split(',')
if(cases.has_key(address)):
continue
else:
cases[address] = { "lines": [ ], "opendates": [ ], "closedates": [ ], "result": "none" }
if(skipto != None):
if(address != skipto):
continue
else:
skipto = None
propreview = open('PROP1.csv', 'r')
lastcase = ""
caseids = { }
for propline in propreview:
# clean up line
propcaseid = propline.split(',')[10]
propopendate = propline.split(',')[11]
propclosedate = propline.split(',')[9]
# print propcaseid + ": open at " + propopendate + " close at " + propclosedate
# a few don't match this model: propline = propline.replace('F,"","",','').replace('"Macon","Macon",','').replace('F,T,F,F,F,','')
# classify this line
propaddress = propline[ propline.find('"') + 1 : len(propline) ]
propaddress = propaddress[ 0 : propaddress.find('"') ]
if(propaddress.find('Macon') > -1):
propaddress = propaddress[0 : propaddress.find('Macon')]
propaddress = propaddress[0 : propaddress.rfind(',')]
elif(propaddress.find('MACON') > -1):
propaddress = propaddress[0 : propaddress.find('MACON')]
propaddress = propaddress[0 : propaddress.rfind(',')]
if(address == propaddress and caseids.has_key(propcaseid) == False):
lastcase = propopendate
caseids[propcaseid] = "Yes"
if(propclosedate > "20090620"):
# if case was closed after June 20, 2009 - show as empty
line = line.replace(propclosedate, " ", 1)
if(propopendate > "20090620"):
# if case was opened after June 20, 2009 - keep around for others' 3 year predictions, but cannot predict its own 3 year
propline = "donotprint:" + propline
cases[address]["lines"].append(propline)
cases[address]["opendates"].append(propopendate)
cases[address]["closedates"].append(propclosedate)
if(propline.find("emolish") > -1):
# mark demolished even if demolitions came after June 20, 2009
print address + " demolished after " + str(len(cases[address]["lines"])) + " reports in " + propcaseid
break
propreview.close()
caseindex = 0
for case in cases[address]["lines"]:
# only print cases with opendates before June 20, 2009
if(case.find('donotprint:') > -1):
continue
nofuturecases = True
demolished = False
for futurecase in range(caseindex, len(cases[address]["lines"])):
if(int(cases[address]["opendates"][futurecase]) > int(cases[address]["opendates"][caseindex]) + 30000):
# reached a case which is more than three years in the future - remove from prediction categorization
break
if((futurecase != caseindex) and (cases[address]["lines"][futurecase].lower().find('no violations') == -1)):
# found a future case which is not a No Violations dismissal
nofuturecases = False
if(cases[address]["lines"][futurecase].find('emolish') > -1):
# found a demolition in the next three years
demolished = True
break
if(demolished == True):
thisyr.write('"Demolished",' + case)
elif(nofuturecases == False):
thisyr.write('"Some",' + case)
else:
thisyr.write('"None",' + case)
caseindex = caseindex + 1
cases[address] = "True"
@mapmeld
Copy link
Author

mapmeld commented Jul 11, 2012

TODO:

  1. Remove a few recent demolitions, and see if any one of its reports leads to a demolition prediction in Predictions API
  2. Rewrite this script to label a house's status in 3 years ( None, Some, or Demolished ) and then run recent reports through the Predictions API
  3. Add lat / lng to data and predictions

@mapmeld
Copy link
Author

mapmeld commented Jul 11, 2012

Oh! Also add quotes to sometimes-blank fields, to fit https://developers.google.com/prediction/docs/developer-guide

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment