Skip to content

Instantly share code, notes, and snippets.

@cds-amal
Created March 12, 2015 19:19
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cds-amal/dba5fa9bac6f57ac3f26 to your computer and use it in GitHub Desktop.
Save cds-amal/dba5fa9bac6f57ac3f26 to your computer and use it in GitHub Desktop.
import csv
import urllib
import re
from nyc_geoclient import Geoclient
from sys import argv
from sys import exit
script, infile, outfile = argv
g = Geoclient('9cd0a15f', '54dc84bcaca9ff4877da771750033275')
def parse_response(data):
"""
parses a dictionary response returned by the nyc_geoclient
"""
bbl = None
if type(data) == type({}):
try:
return data.get('bbl')
except AttributeError:
return None
def clean_bldg_no(num):
m = re.search('(?P<streetnumber>\d+)(\s+)?(?P<letter>(A-Z))?', num)
if m:
return m.group(0)
else:
return num
def ping_geoclient(number, street, code):
"""
ping the nyc geoclient api and then parse the response
"""
response = g.address(number, street, code)
return parse_response(response)
def try_addresses(addresses, boro):
"""
try first address, if an error is returned try second, then third
"""
first = addresses[0]
second = addresses[1]
third = addresses[2]
attempt_one = ping_geoclient(clean_bldg_no(first[0]), first[1], boro)
if attempt_one == None or 'error' in attempt_one:
attempt_two = ping_geoclient(clean_bldg_no(second[0]), second[1], boro)
elif attempt_one and 'error' not in attempt_one:
return attempt_one
if attempt_two == None or 'error' in attempt_two:
attempt_three = ping_geoclient(clean_bldg_no(third[0]), third[1], boro)
elif attempt_two and 'error' not in attempt_two:
return attempt_two
if attempt_three == None or 'error' in attempt_three:
return 'null'
elif attempt_three and 'error' not in attempt_three:
return attempt_three
def read_csv(infile, outfile):
"""
iterates over an input csv file containing fields for building number,
street name, street suffix, boro code, and zipcode
"""
with open(infile, 'rb') as f:
with open(outfile, 'wb') as w:
reader = csv.reader(f)
writer = csv.writer(w)
header = next(reader, None) # skip CSV header
writer.writerow(header) # write the header to the outfile
for row in reader:
try:
# reference values for each column
bldgno = row[1]
street = row[3]
suffix = row[4]
bldgno2 = row[6]
street2 = row[8]
suffix2 = row[9]
bldgno3 = row[11]
street3 = row[13]
suffix3 = row[14]
boro_code = row[-3]
zipcode = row[-2]
# url encode street name with suffix
full_street1 = urllib.quote_plus(street + ' ' + suffix)
full_street2 = urllib.quote_plus(street2 + ' ' + suffix2)
full_street3 = urllib.quote_plus(street3 + ' ' + suffix3)
addresses = [
[bldgno, full_street1],
[bldgno2, full_street2],
[bldgno3, full_street3]
]
# print addresses[0][0], addresses[0][1]
# print addresses[1][0], addresses[1][1]
# print addresses[2][0], addresses[2][1]
print 'bbl: ', try_addresses(addresses, boro_code)
row[-1] = try_addresses(addresses, boro_code)
writer.writerow(row)
except csv.Error as e:
# todo - append to a csv file; goal is to have a csv of entries that failed.
print 'CSV Parse Error:'
print 'file %s, line %d: %s' % (infile, reader.line_num, e)
print '===\n\n\n'
except Exception, e:
# todo - append to a csv file; goal is to have a csv of entries that failed.
print 'Exception: '
print e
print row
print '====\n\n'
if __name__ == '__main__':
if len(argv) == 3:
read_csv(infile, outfile)
else:
sys.stderr.write(u'''
Should be called with a file name for CSV input and filename for CSV output data
''')
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment