Skip to content

Instantly share code, notes, and snippets.

@jmelesky
Created August 27, 2011 18:47
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmelesky/1175729 to your computer and use it in GitHub Desktop.
Save jmelesky/1175729 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# on a unix-type system, it runs like this:
# ./parse_csv.py INPUTFILE > OUTPUTFILE
# where INPUTFILE is a .csv file like the dump we were given
# it creates OUTPUTFILE, which is a .csv file like the input file,
# but with the additional parsed-out fields (OWNER_ADDR,
# OWNER_APT_SUITE, SITE_ADDR, and SITE_APT_SUITE).
import sys
import csv
import re
fields = ["APN","RNO","OWNER1","OWNER2","OWNERADDR","OWNERCITY",
"OWNERSTATE","OWNERZIP","SITEADDR","SITECITY",
"SITESTATE","SITEZIP"]
new_fields = ["APN","RNO","OWNER1","OWNER2","OWNERADDR",
"OWNER_ADDR","OWNER_APT_SUITE","OWNERCITY",
"OWNERSTATE","OWNERZIP","SITEADDR","SITE_ADDR",
"SITE_APT_SUITE","SITECITY","SITESTATE","SITEZIP"]
suite_re = re.compile("^(.*?)\s+(#|STE #|STE|UNIT|ROOM|MS#|RM|APT|BOX|FSC)([\s\d].*?)$")
reader = csv.reader(open(sys.argv[1]))
def pull_suite(str):
m = re.match(suite_re,str)
if m:
return (m.group(1), m.group(2) + m.group(3))
else:
return (str, "")
print ",".join(["\"%s\"" % f for f in new_fields])
header_row = reader.next()
for row in reader:
row_fields = dict(zip(fields, row))
(own_addr, own_ste) = pull_suite(row_fields["OWNERADDR"])
row_fields["OWNER_ADDR"] = own_addr
row_fields["OWNER_APT_SUITE"] = own_ste
(site_addr, site_ste) = pull_suite(row_fields["SITEADDR"])
row_fields["SITE_ADDR"] = site_addr
row_fields["SITE_APT_SUITE"] = site_ste
values = [row_fields[f] for f in new_fields]
print ",".join(["\"%s\"" % v for v in values])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment