Skip to content

Instantly share code, notes, and snippets.

@bulbil
Last active February 10, 2017 00:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bulbil/10f46b5f146f6d3480b7 to your computer and use it in GitHub Desktop.
Save bulbil/10f46b5f146f6d3480b7 to your computer and use it in GitHub Desktop.
stylesheet for converting Early Novels Database related MARCXML to TSV
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:exslt="http://exslt.org/common"
xmlns:str="http://exslt.org/strings"
exclude-result-prefixes="marc">
<!-- <xsl:import href="http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl"/>
-->
<xsl:output method="text" omit-xml-declaration="yes" indent="no" encoding="utf-8"/>
<!--
2016-12-14
A stylesheet for converting MARC XML to TSV
Nabil Kashyap
github:@bulbil
digitalscholarship@swarthmore.edu
example usage on macOS (though should work with other parser, xsltproc simply builtin)
xsltproc -o outputpath.tsv stylesheet.xsl xmlinput.xml
-->
<!-- List of MARC values we'd like to move over to the TSV:
@tag = marc datafield tag
@code = marc subfield code
@flag = 'm' indicates multiple (as in subfields, i.e., every 521a subfield for multiple 521 entries), 'b' indicates boolean
-->
<xsl:variable name="fields">
<field tag="leader">leader</field>
<field tag="id">id</field>
<field tag="90">callNumber</field>
<field tag="100" code="a">authorName</field>
<field tag="100" code="d">authorDates</field>
<field tag="246" code="a">ENDFullTitle</field>
<field tag="260" code="a">pubLocationStatement</field>
<field tag="260" code="c">pubDateTranscribed</field>
<field tag="300" code="x">format</field>
<field tag="300" code="z">formatStatement</field>
<field tag="500" code="a" flag="m">generalNotes</field>
<field tag="520" code="a" flag="m">paratextTitleControlled</field>
<field tag="520" code="x" flag="m">paratext</field>
<field tag="591" code="a" flag="b">epigraphBoolean</field>
<field tag="592" code="a" flag="m">narrativeFormPrimary</field>
<field tag="592" code="b" flag="m">narrativeFormAdditional</field>
<field tag="592" code="c" flag="m">nonProseForms</field>
<field tag="594" flag="b">inscriptionBoolean</field>
<field tag="595" flag="b">marginaliaBoolean</field>
<field tag="596" code="a">translationClaimControlled</field>
<field tag="989" code="1" flag="m">titleOtherWorks</field>
<field tag="989" code="2" flag="m">titleNouns</field>
<field tag="989" code="3" flag="m">titleAdjectives</field>
<field tag="989" code="4" flag="m">titlePlaces</field>
<field tag="989" code="5" flag="m">titleNames</field>
<field tag="989" code="6" flag="m">titleVerbs</field>
<field tag="989" code="7" flag="m">titleObjects</field>
<field tag="989" code="8" flag="m">titleAdverbs</field>
<field tag="999">catalogedBy</field>
</xsl:variable>
<!-- Tab variable for creating TSV file -->
<xsl:variable name="tab">
<xsl:text>&#x09;</xsl:text>
</xsl:variable>
<!-- New Line variable for creating new rows in TSV file -->
<xsl:variable name="newline">
<xsl:text>&#xa;</xsl:text>
</xsl:variable>
<!-- a delimiter for multiple values, e.g. "comma-space" -->
<xsl:variable name="delimiter">
<xsl:text>&#44;&#32;</xsl:text>
</xsl:variable>
<!-- selecting all of the records
-->
<xsl:variable name="recordSet" select="//marc:record"/>
<!-- corny pseudo array so we can iterate over the fields -->
<xsl:param name="fieldset" select="document('')/*/xsl:variable[@name='fields']/*"/>
<!-- the actual template -->
<xsl:template name="marcXMLToTSV">
<xsl:param name="tag"/>
<xsl:param name="code"/>
<xsl:param name="position"/>
<xsl:param name="record"/>
<xsl:param name="flag"/>
<!-- sets path the current record and datafield -->
<xsl:variable name="currentRecord" select="$recordSet[$record]"/>
<xsl:variable name="currentField" select="$currentRecord/marc:datafield[@tag=$tag]"/>
<!-- main logic for handling different fields -->
<xsl:choose>
<!-- marc leader / marc:leader -->
<xsl:when test="$tag='leader'">
<xsl:variable name="currentLeader" select="$currentRecord/marc:leader"></xsl:variable>
<xsl:if test="not($currentLeader)">
<xsl:value-of select="''"/>
</xsl:if>
<xsl:if test="$currentLeader">
<xsl:value-of select="normalize-space($currentLeader)"/>
</xsl:if>
</xsl:when>
<!-- id / marc:controlfield tag="001" -->
<xsl:when test="$tag='id'">
<xsl:variable name="currentOCLC" select="$currentRecord/marc:controlfield[@tag=001]"></xsl:variable>
<xsl:if test="not($currentOCLC)">
<xsl:value-of select="''"/>
</xsl:if>
<xsl:if test="$currentOCLC">
<xsl:value-of select="normalize-space($currentOCLC)"/>
</xsl:if>
</xsl:when>
<!-- call number / marc:datafield tag="90" -->
<xsl:when test="$tag='90'">
<xsl:variable name="currentLCSH" select="$currentRecord/marc:datafield[@tag=090]/marc:subfield[@code='a']"/>
<xsl:variable name="currentCutter" select="$currentRecord/marc:datafield[@tag=090]/marc:subfield[@code='b']"/>
<xsl:value-of select="normalize-space($currentLCSH)"/>
<xsl:value-of select="normalize-space($currentCutter)"/>
</xsl:when>
<!-- reducing multiple subfields to a delimited list in a single cell -->
<xsl:when test="$flag='m'">
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/>
<xsl:if test="not($currentSubfield)">
<xsl:value-of select="''"/>
</xsl:if>
<xsl:if test="$currentSubfield">
<xsl:variable name="currentCount" select="count($currentSubfield)" />
<xsl:for-each select="$currentSubfield">
<xsl:value-of select="current()"/>
<xsl:if test="position()&lt;$currentCount">
<xsl:value-of select="$delimiter"/>
</xsl:if>
</xsl:for-each>
</xsl:if>
</xsl:when>
<!-- for boolean fields -->
<xsl:when test="$flag='b'">
<xsl:if test="not($code)">
<xsl:if test="count($currentField)&gt;0">
<xsl:value-of select="1"/>
</xsl:if>
<xsl:if test="count($currentField)=0">
<xsl:value-of select="0"/>
</xsl:if>
</xsl:if>
<xsl:if test="$code">
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/>
<xsl:if test="count($currentSubfield)&gt;0">
<xsl:value-of select="1"/>
</xsl:if>
<xsl:if test="count($currentSubfield)=0">
<xsl:value-of select="0"/>
</xsl:if>
</xsl:if>
</xsl:when>
<!-- cataloged by / marc:datafield tag="999" -->
<xsl:when test="$tag=999">
<xsl:if test="not($currentField)">
<xsl:value-of select="''"/>
</xsl:if>
<xsl:if test="$currentField">
<xsl:value-of select="normalize-space($currentField)"/>
</xsl:if>
</xsl:when>
<!-- for all other fields -->
<xsl:otherwise>
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/>
<xsl:if test="not($currentSubfield)">
<xsl:value-of select="''"/>
</xsl:if>
<xsl:if test="$currentSubfield">
<xsl:value-of select="$currentSubfield"/>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
<!-- whether to add a tab or newline -->
<xsl:choose>
<xsl:when test="$position=count($fieldset)">
<xsl:value-of select="$newline"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$tab"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="/">
<!-- set header row -->
<xsl:for-each select="$fieldset">
<xsl:value-of select="normalize-space(.)"/>
<xsl:value-of select="$tab"/>
</xsl:for-each>
<xsl:value-of select="$newline"/>
<!-- iterate through records -->
<xsl:for-each select="$recordSet">
<xsl:variable name="record" select="position()"/>
<!--
for each record get each datafield/subfield we intend
params: @tag - tag int
@code - subfield char
@position - int, index within set of desired fields
@record - int, index within total set of records in collection
-->
<xsl:for-each select="$fieldset">
<xsl:call-template name="marcXMLToTSV">
<xsl:with-param name="tag" select="./@tag"/>
<xsl:with-param name="code" select="./@code"/>
<xsl:with-param name="flag" select="./@flag"/>
<xsl:with-param name="position" select="position()"/>
<xsl:with-param name="record" select="$record"/>
</xsl:call-template>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment