Skip to content

Instantly share code, notes, and snippets.

@anarchivist
Created December 22, 2009 02:46
Show Gist options
  • Select an option

  • Save anarchivist/261457 to your computer and use it in GitHub Desktop.

Select an option

Save anarchivist/261457 to your computer and use it in GitHub Desktop.
Grab records from a CSV file to do lookups against the Virtual International Authority File
import csv
import re
import time
import string
import sys
import urllib
import urllib2
import pymarc
PUNC_RE = re.compile('[%s]' % string.punctuation)
VIAF_URL = 'http://orlabs.oclc.org/viaf/'
QUERY_BASE = VIAF_URL + 'search/VIAF?query=local.personalName+all+'
QUERY_PARAMS = '+&version=1.1&maximumRecords=100&operation=searchRetrieve&stylesheet=/viaf/xsl/Results.xsl&sortKeys=holdingscount&recordSchema=BriefMarcXML'
def normalize(instr):
return urllib.quote_plus(re.sub(PUNC_RE, ' ', instr).strip())
def grab_response(name):
"""docstring for grab_response"""
url = '%s%%22%s%%22%s' % (QUERY_BASE, normalize(name), QUERY_PARAMS)
#print url
response = urllib.urlopen(url)
return response
def do_it(r):
r_ = u''
r_ = unicode(r.__str__())
print r_.encode("utf-8", 'ignore')
def is_not_none(n):
return n is not None
def grab_auth(identifier):
"""grab viaf authority record"""
url = VIAF_URL + identifier
#print url
auth = urllib.urlopen(url)
return auth
#_ = input('String to grab:')
reader = csv.DictReader(file(sys.argv[1], 'rU'))
writer = csv.writer(file(sys.argv[2], 'wt'))
writer.writerow(('InName', 'LOC', 'ParsedName', 'ParsedId'))
for row in reader:
xh = pymarc.XmlHandler()
inname = row['Uniform_Author_Name']
lc = row['LOC']
pymarc.parse_xml(grab_response(inname), xh)
xh.records = filter(is_not_none, xh.records)
for rec in xh.records:
#print rec['001'].data
try:
identifier = rec['001'].data
if identifier.startswith('LC'):
pn = rec['100'].format_field()
writer.writerow((inname, lc, pn, identifier))
print "%s, %s, %s, %s" % (inname, lc, pn, identifier)
break
else:
pass
except:
identifier = 'Fail'
pn = 'Fail'
writer.writerow((inname, lc, pn, identifier))
print "%s, %s, %s, %s" % (inname, lc, pn, identifier)
time.sleep(1)
writer.close()
#map(do_it, xh.records)
@anarchivist
Copy link
Author

This is an old script that predates the production service - but I will try to find time to factor in your changes. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment