Skip to content

Instantly share code, notes, and snippets.

@jeffgerhard
Last active July 31, 2019 11:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffgerhard/f01244e2b3aefd80c2497ddacf3207f2 to your computer and use it in GitHub Desktop.
Save jeffgerhard/f01244e2b3aefd80c2497ddacf3207f2 to your computer and use it in GitHub Desktop.
Internet Archive / Open Library TSV file to XML for Alma import profile as portfolios
# -*- coding: utf-8 -*-
"""
Build XML file of all our books' data for lending
for the purposes of short-term management of our electronic lending
collection in Alma
"""
import os
def xml_record(mmsid, search_id):
''' just generate a single record for the xml files '''
xml = ''' <record xmlns="http://www.loc.gov/MARC21/slim">
<controlfield tag="001">'''
xml += str(mmsid)
xml += '''</controlfield>
<datafield tag="999" ind1=" " ind2=" ">
<subfield code="u">https://archive.org/details/'''
xml += search_id
xml += '''</subfield>
<subfield code="a">Create a free account with the Internet Archive to read online. PDF and EPUB versions are also available for use with Adobe Digital Editions.</subfield>
<subfield code="p">Digitized copy may be available for checkout.</subfield>'
</datafield>
</record>
'''
return xml
if __name__ == "__main__":
workingdir = r"G:\My Drive\Alma - IA dump"
inputfile = os.path.join(workingdir, "FINAL-gl-inlibrary-matches.tsv")
xml = '''<?xml version="1.0" encoding="UTF-8" ?>
<collection>
'''
with open(inputfile, 'r') as fh:
lines = fh.read().splitlines()
for line in lines:
search_id = line.split()[1]
mmsid = line.split()[2]
xml += xml_record(mmsid, search_id)
xml += '</collection>'
with open(os.path.join(workingdir, 'ia_import.xml'), 'w') as fh:
fh.write(xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment