Skip to content

Instantly share code, notes, and snippets.

@anarchivist
Created December 31, 2009 04:36
Show Gist options
  • Save anarchivist/266614 to your computer and use it in GitHub Desktop.
Save anarchivist/266614 to your computer and use it in GitHub Desktop.
create individual MARCXML files for archivists toolkit from a MARC21 file
"""creates individual MARCXML files for archivists toolkit from a MARC21 file"""
import pymarc
import os
import sys
header = u"""<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="MARC21slim2HTML.xsl" ?>
<collection xmlns="http://www.loc.gov/MARC21/slim"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim
http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">"""
reader = pymarc.MARCReader(file(sys.argv[1]))
if len(sys.argv) >= 4:
tag = sys.argv[3]
else:
tag = '090'
count = 0
for record in reader:
count += 1
if record[tag] is not None:
recnum = record[tag].format_field().replace('\\',' ').strip()
else:
recnum = ''
filename = os.path.join(sys.argv[2], '%08d-%s'.strip() + '.xml') % (count, recnum)
fh = open(filename, 'w')
out = "%s%s</collection>" % (header, pymarc.record_to_xml(record))
fh.write(out.encode("utf-8"))
fh.close
print '%s total records' % count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment