Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generate Archives Hub URIs for records matching a given subject
"""Generate Archives Hub URIs for records matching a given subject.
Usage: %prog [options] subject
"""
import sys
import os
from contextlib import contextmanager
from optparse import OptionParser
cheshirePath = os.environ.get('C3HOME', os.path.expanduser('~/cheshire3'))
# Hack sys.path to look for over-ridden cheshire3 modules
sys.path.insert(1, os.path.join(cheshirePath, 'cheshire3', 'code'))
# Cheshire3 imports
from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
# Build environment...
session = Session()
serv = SimpleServer(session,
os.path.join(cheshirePath,
'cheshire3',
'configs',
'serverConfig.xml')
)
class MyOptionParser(OptionParser):
"""Custom option parser for outputting list of record URIs."""
def __init__(self, **kwargs):
OptionParser.__init__(self, **kwargs)
# Options
self.add_option(
"-o",
"--output",
dest="outfile",
default=None,
help=" ".join(["Write output to OUTFILE.",
"If omitted output will be printed to STDOUT."]),
metavar="OUTFILE"
)
self.add_option(
"-c",
"--components",
dest="components",
action="store_true",
default=False,
help="Include component URIs"
)
@contextmanager
def get_outputStream(options):
"""Context manager for appropriate output stream based on given options."""
if options.outfile is not None:
f = open(os.path.abspath(options.outfile), 'w')
try:
yield f
finally:
f.close()
else:
yield sys.stdout
def printSubjectURIs(options, args):
# Put the subject into a query string
if options.components:
qString = 'dc.subject exact "{0}"'.format(' '.join(args[1:]))
else:
qString = 'dc.subject exact "{0}" and ead.istoplevel=1'.format(' '.join(args[1:]))
# Parse the query CQL into a Query object
try:
q = qf.get_query(session, qString)
except:
session.logger.log_error(session, "Invalid CQL: {0}".format(qString))
return 1
# Search the database with the Query
resultSet = db.search(session, q)
unitidWf = db.get_object(session, 'unitidIdentifierWorkflow')
# Get the output stream (usually file or stdout)
with get_outputStream(options) as outStream:
# Iterate through all results in resultSet
for result in resultSet:
# Fetch the Record in EAD schema
rec = result.fetch_record(session, schema='ead')
# Use a Workflow to return the <unitid> based identifier
new_id = unitidWf.process(session, rec)
# Assemble the URI
uri = "{0}/{1}".format(baseDataUri, new_id)
# Write the URI to the output stream
outStream.write(uri + '\n')
return 0
db = serv.get_object(session, 'db_hub')
session.database = db.id
session.logger = db.get_path(session, 'defaultLogger')
qf = db.get_object(session, 'defaultQueryFactory')
hubBaseUrl = 'http://archiveshub.ac.uk'
apacheDataUrl = '/data'
baseDataUri = '{0}{1}'.format(hubBaseUrl, apacheDataUrl)
option_parser = MyOptionParser()
if __name__ == '__main__':
(options, args) = option_parser.parse_args(sys.argv)
sys.exit(printSubjectURIs(options, args))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment