Last active
February 10, 2017 00:09
-
-
Save bulbil/10f46b5f146f6d3480b7 to your computer and use it in GitHub Desktop.
stylesheet for converting Early Novels Database related MARCXML to TSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet | |
xmlns:marc="http://www.loc.gov/MARC21/slim" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" | |
xmlns:exslt="http://exslt.org/common" | |
xmlns:str="http://exslt.org/strings" | |
exclude-result-prefixes="marc"> | |
<!-- <xsl:import href="http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl"/> | |
--> | |
<xsl:output method="text" omit-xml-declaration="yes" indent="no" encoding="utf-8"/> | |
<!-- | |
2016-12-14 | |
A stylesheet for converting MARC XML to TSV | |
Nabil Kashyap | |
github:@bulbil | |
digitalscholarship@swarthmore.edu | |
example usage on macOS (though should work with other parser, xsltproc simply builtin) | |
xsltproc -o outputpath.tsv stylesheet.xsl xmlinput.xml | |
--> | |
<!-- List of MARC values we'd like to move over to the TSV: | |
@tag = marc datafield tag | |
@code = marc subfield code | |
@flag = 'm' indicates multiple (as in subfields, i.e., every 521a subfield for multiple 521 entries), 'b' indicates boolean | |
--> | |
<xsl:variable name="fields"> | |
<field tag="leader">leader</field> | |
<field tag="id">id</field> | |
<field tag="90">callNumber</field> | |
<field tag="100" code="a">authorName</field> | |
<field tag="100" code="d">authorDates</field> | |
<field tag="246" code="a">ENDFullTitle</field> | |
<field tag="260" code="a">pubLocationStatement</field> | |
<field tag="260" code="c">pubDateTranscribed</field> | |
<field tag="300" code="x">format</field> | |
<field tag="300" code="z">formatStatement</field> | |
<field tag="500" code="a" flag="m">generalNotes</field> | |
<field tag="520" code="a" flag="m">paratextTitleControlled</field> | |
<field tag="520" code="x" flag="m">paratext</field> | |
<field tag="591" code="a" flag="b">epigraphBoolean</field> | |
<field tag="592" code="a" flag="m">narrativeFormPrimary</field> | |
<field tag="592" code="b" flag="m">narrativeFormAdditional</field> | |
<field tag="592" code="c" flag="m">nonProseForms</field> | |
<field tag="594" flag="b">inscriptionBoolean</field> | |
<field tag="595" flag="b">marginaliaBoolean</field> | |
<field tag="596" code="a">translationClaimControlled</field> | |
<field tag="989" code="1" flag="m">titleOtherWorks</field> | |
<field tag="989" code="2" flag="m">titleNouns</field> | |
<field tag="989" code="3" flag="m">titleAdjectives</field> | |
<field tag="989" code="4" flag="m">titlePlaces</field> | |
<field tag="989" code="5" flag="m">titleNames</field> | |
<field tag="989" code="6" flag="m">titleVerbs</field> | |
<field tag="989" code="7" flag="m">titleObjects</field> | |
<field tag="989" code="8" flag="m">titleAdverbs</field> | |
<field tag="999">catalogedBy</field> | |
</xsl:variable> | |
<!-- Tab variable for creating TSV file --> | |
<xsl:variable name="tab"> | |
<xsl:text>	</xsl:text> | |
</xsl:variable> | |
<!-- New Line variable for creating new rows in TSV file --> | |
<xsl:variable name="newline"> | |
<xsl:text>
</xsl:text> | |
</xsl:variable> | |
<!-- a delimiter for multiple values, e.g. "comma-space" --> | |
<xsl:variable name="delimiter"> | |
<xsl:text>, </xsl:text> | |
</xsl:variable> | |
<!-- selecting all of the records | |
--> | |
<xsl:variable name="recordSet" select="//marc:record"/> | |
<!-- corny pseudo array so we can iterate over the fields --> | |
<xsl:param name="fieldset" select="document('')/*/xsl:variable[@name='fields']/*"/> | |
<!-- the actual template --> | |
<xsl:template name="marcXMLToTSV"> | |
<xsl:param name="tag"/> | |
<xsl:param name="code"/> | |
<xsl:param name="position"/> | |
<xsl:param name="record"/> | |
<xsl:param name="flag"/> | |
<!-- sets path the current record and datafield --> | |
<xsl:variable name="currentRecord" select="$recordSet[$record]"/> | |
<xsl:variable name="currentField" select="$currentRecord/marc:datafield[@tag=$tag]"/> | |
<!-- main logic for handling different fields --> | |
<xsl:choose> | |
<!-- marc leader / marc:leader --> | |
<xsl:when test="$tag='leader'"> | |
<xsl:variable name="currentLeader" select="$currentRecord/marc:leader"></xsl:variable> | |
<xsl:if test="not($currentLeader)"> | |
<xsl:value-of select="''"/> | |
</xsl:if> | |
<xsl:if test="$currentLeader"> | |
<xsl:value-of select="normalize-space($currentLeader)"/> | |
</xsl:if> | |
</xsl:when> | |
<!-- id / marc:controlfield tag="001" --> | |
<xsl:when test="$tag='id'"> | |
<xsl:variable name="currentOCLC" select="$currentRecord/marc:controlfield[@tag=001]"></xsl:variable> | |
<xsl:if test="not($currentOCLC)"> | |
<xsl:value-of select="''"/> | |
</xsl:if> | |
<xsl:if test="$currentOCLC"> | |
<xsl:value-of select="normalize-space($currentOCLC)"/> | |
</xsl:if> | |
</xsl:when> | |
<!-- call number / marc:datafield tag="90" --> | |
<xsl:when test="$tag='90'"> | |
<xsl:variable name="currentLCSH" select="$currentRecord/marc:datafield[@tag=090]/marc:subfield[@code='a']"/> | |
<xsl:variable name="currentCutter" select="$currentRecord/marc:datafield[@tag=090]/marc:subfield[@code='b']"/> | |
<xsl:value-of select="normalize-space($currentLCSH)"/> | |
<xsl:value-of select="normalize-space($currentCutter)"/> | |
</xsl:when> | |
<!-- reducing multiple subfields to a delimited list in a single cell --> | |
<xsl:when test="$flag='m'"> | |
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/> | |
<xsl:if test="not($currentSubfield)"> | |
<xsl:value-of select="''"/> | |
</xsl:if> | |
<xsl:if test="$currentSubfield"> | |
<xsl:variable name="currentCount" select="count($currentSubfield)" /> | |
<xsl:for-each select="$currentSubfield"> | |
<xsl:value-of select="current()"/> | |
<xsl:if test="position()<$currentCount"> | |
<xsl:value-of select="$delimiter"/> | |
</xsl:if> | |
</xsl:for-each> | |
</xsl:if> | |
</xsl:when> | |
<!-- for boolean fields --> | |
<xsl:when test="$flag='b'"> | |
<xsl:if test="not($code)"> | |
<xsl:if test="count($currentField)>0"> | |
<xsl:value-of select="1"/> | |
</xsl:if> | |
<xsl:if test="count($currentField)=0"> | |
<xsl:value-of select="0"/> | |
</xsl:if> | |
</xsl:if> | |
<xsl:if test="$code"> | |
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/> | |
<xsl:if test="count($currentSubfield)>0"> | |
<xsl:value-of select="1"/> | |
</xsl:if> | |
<xsl:if test="count($currentSubfield)=0"> | |
<xsl:value-of select="0"/> | |
</xsl:if> | |
</xsl:if> | |
</xsl:when> | |
<!-- cataloged by / marc:datafield tag="999" --> | |
<xsl:when test="$tag=999"> | |
<xsl:if test="not($currentField)"> | |
<xsl:value-of select="''"/> | |
</xsl:if> | |
<xsl:if test="$currentField"> | |
<xsl:value-of select="normalize-space($currentField)"/> | |
</xsl:if> | |
</xsl:when> | |
<!-- for all other fields --> | |
<xsl:otherwise> | |
<xsl:variable name="currentSubfield" select="$currentField/marc:subfield[@code=$code]"/> | |
<xsl:if test="not($currentSubfield)"> | |
<xsl:value-of select="''"/> | |
</xsl:if> | |
<xsl:if test="$currentSubfield"> | |
<xsl:value-of select="$currentSubfield"/> | |
</xsl:if> | |
</xsl:otherwise> | |
</xsl:choose> | |
<!-- whether to add a tab or newline --> | |
<xsl:choose> | |
<xsl:when test="$position=count($fieldset)"> | |
<xsl:value-of select="$newline"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="$tab"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:template> | |
<xsl:template match="/"> | |
<!-- set header row --> | |
<xsl:for-each select="$fieldset"> | |
<xsl:value-of select="normalize-space(.)"/> | |
<xsl:value-of select="$tab"/> | |
</xsl:for-each> | |
<xsl:value-of select="$newline"/> | |
<!-- iterate through records --> | |
<xsl:for-each select="$recordSet"> | |
<xsl:variable name="record" select="position()"/> | |
<!-- | |
for each record get each datafield/subfield we intend | |
params: @tag - tag int | |
@code - subfield char | |
@position - int, index within set of desired fields | |
@record - int, index within total set of records in collection | |
--> | |
<xsl:for-each select="$fieldset"> | |
<xsl:call-template name="marcXMLToTSV"> | |
<xsl:with-param name="tag" select="./@tag"/> | |
<xsl:with-param name="code" select="./@code"/> | |
<xsl:with-param name="flag" select="./@flag"/> | |
<xsl:with-param name="position" select="position()"/> | |
<xsl:with-param name="record" select="$record"/> | |
</xsl:call-template> | |
</xsl:for-each> | |
</xsl:for-each> | |
</xsl:template> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment