Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sedona Public Library's Brand New CSV County Getter
#! /usr/bin/python2 \
''' specify a csv file as a command line argument.
creates a new csv with addresses replaced with counties.
skips users who throw errors and outputs their details to ./errors
'''
import sys
import re
import urllib, urllib2
import cookielib
from os.path import isfile
from csv import DictReader, DictWriter
addresses = DictReader(open(sys.argv[1]), ['patron barcode', 'patron address'])
outfile = DictWriter(open('./counties.csv', 'w'), ['patron barcode', 'patron county'])
errorfile = open('./errors', 'w')
# we need to handle cookies
cookieFile = './cookies.lwp'
cookieJar = cookielib.LWPCookieJar()
if isfile(cookieFile):
cookieJar.load(cookieFile)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
urllib2.install_opener(opener)
def melissa(address):
queryurl = "http://www.melissadata.com/lookups/AddressCheck.asp?" + urllib.urlencode({"InData": address})
html = urllib2.urlopen(queryurl).read()
if "Address Verified" in html:
cindex = html.index('County')
return re.search('<b>(.*)</b>', html[cindex:cindex + 50]).group(1).capitalize() # not the prettiest, but the html we're getting isn't marked up very well.
elif "Cookies must be enabled" in html:
raise Exception("Melissa wants to eat your cookies.")
else:
raise Exception("Melissa returned some kind of error.")
for address in addresses:
try:
county = {'patron barcode': address['patron barcode'],
'patron county': melissa(address['patron address'])}
outfile.writerow(county)
except Exception as e:
errorfile.write(str(e) +'\n')
errorfile.write(str(address) + '\n')
errorfile.write('\n')
cookieJar.save(cookieFile)
errorfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment