Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Convert Harvard Library Bibliographic Dataset (MARC21) to MODS XML
require 'File/MARC.php';
$xsl = new DOMDocument;
$xsltproc = new XSLTProcessor;
$marcxml = new DOMDocument;
$marcxml->preserveWhiteSpace = false;
$i = 0;
foreach (glob('/marc/*.mrc') as $file) {
$items = new File_MARC($file);
while ($record = $items->next()) {
$marcxml->loadXML($record->toXML(), LIBXML_NOCDATA);
$mods = $xsltproc->transformToDoc($marcxml);
$xpath = new DOMXPath($mods);
$xpath->registerNamespace('mods', '');
$nodes = $xpath->query('mods:mods/mods:recordInfo/mods:recordIdentifier');
if (!$nodes->length) continue; // no identifier
$id = $nodes->item(0)->textContent;
$file = sprintf('/mods/%s/%s/%s.xml', substr($id, 0, 3), substr($id, 3, 3), $id);
if (($i++ % 1000) === 0) print "$file\n";
$dir = dirname($file);
if (!file_exists($dir)) mkdir($dir, 0777, true);
$mods->formatOutput = true;
file_put_contents($file, $mods->saveXML($mods->documentElement->firstChild));
$xsl = new DOMDocument;
$xsltproc = new XSLTProcessor;
$mods = new DOMDocument;
$mods->preserveWhiteSpace = false;
$i = 0;
$files = glob('/mods/000/000/*.xml');
$output = fopen('/mods/cloud-000-000.xml', 'w');
fwrite($output, "<batch>\n");
foreach ($files as $file) {
$mods->load($file, LIBXML_NOCDATA);
fwrite($output, $xsltproc->transformToXML($mods));
fwrite($output, "</batch>\n");
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="" version="1.0">
<xsl:output nethod="xml" encoding="utf-8" omit-xml-declaration="yes" standalone="no" indent="yes"/>
<xsl:template match="/mods">
<add id="{recordInfo/recordIdentifier}" version="1" lang="en">
<xsl:for-each select="titleInfo/title">
<field name="title"><xsl:value-of select="."/></field>
<xsl:for-each select="subject[@authority='lcsh']/topic">
<field name="subject"><xsl:value-of select="."/></field>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.