Skip to content

Instantly share code, notes, and snippets.

@meau
Last active April 20, 2016 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meau/ab3b0997f642940959867320e8686782 to your computer and use it in GitHub Desktop.
Save meau/ab3b0997f642940959867320e8686782 to your computer and use it in GitHub Desktop.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ead="urn:isbn:1-931666-22-9" version="2.0">
<xsl:strip-space elements="*"/>
<!-- standard identity template, which does all of the copying -->
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<!-- get rid of those bioghist daos -->
<xsl:template match="bioghist/dao"/>
<!-- that titleproper/subtitle isn't quite right. Let's fix it. -->
<xsl:template match="titlestmt">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<titleproper>
<xsl:value-of select="(subtitle,titleproper)" separator=" for the "/>
</titleproper>
</xsl:copy>
</xsl:template>
<!-- COPYRIGHT STATEMENT!!! This is actually hard because there's no canonical EAD tag for this and it looks like we have stuff everywhere. Let's look at this together. -->
<!-- finds descgrps, gets them outta there -->
<xsl:template match="descgrp">
<xsl:apply-templates/>
</xsl:template>
<!-- finds head elements, gets them outta there -->
<xsl:template match="head"/>
<!-- Extents that begin with non-numeric characters -->
<!-- Start by stripping parens -->
<xsl:template match="extent/text()[matches(., '^\(')]">
<xsl:value-of select="translate(., '()', '')"/>
</xsl:template>
<!-- Now add zeroes to the beginning of decimals -->
<xsl:template match="extent/text()[matches(., '^\.')]">
<xsl:value-of select="concat('0', .)"/>
</xsl:template>
<!-- Nested unitdates.
1.
exactly one unitdate positioned at the end of unittitle
and containing text other than space. -->
<xsl:template
match="unittitle[count(unitdate) = 1 and (text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or emph or title) and unitdate[not(following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or following-sibling::emph or following-sibling::title)]]">
<xsl:copy>
<xsl:copy-of select="current()/(text() | emph | title)"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!--2.
unittitle contains text,
contains one unitdate,
there is text after unitdate-->
<xsl:template
match="unittitle[count(unitdate) = 1 and (text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or emph or title) and unitdate[following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or following-sibling::emph or following-sibling::title]]">
<xsl:copy>
<xsl:apply-templates select="current()/(text() | emph | title | unitdate/text())"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!--3.
unittitle contains multiple unitdates,
contains text other than space and punctuation,
there is text after last unitdate-->
<xsl:template
match="unittitle[count(unitdate) > 1 and (text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or emph or title) and unitdate[position() = last()][following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or following-sibling::emph or following-sibling::title]]">
<xsl:copy>
<xsl:apply-templates select="current()/(text() | emph | title | unitdate/text())"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!-- 4.
unittitle contains text other than space or punctuation,
contains multiple unitdates,
last unitdate ends unittitle,
all unitdates other than last are followed by space or punctuation (no other text, i.e. all unitdates are at end of unittitle)
-->
<xsl:template
match="unittitle[count(unitdate) > 1 and (text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or title or emph) and unitdate[position() = last()][not(following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or following-sibling::emph or following-sibling::title)] and unitdate[position() != last()][following-sibling::text()[matches(., '^[\p{Z}\p{P}]*$')]] and not(unitdate[position() != last()][following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))]])]">
<xsl:copy>
<xsl:copy-of select="current()/(text() | emph | title)"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!-- 5.
unittitle contains text other than space or punctuation,
contains multiple unitdates,
last unitdate ends unittitle,
some other unitdates are followed by text
-->
<xsl:template
match="unittitle[count(unitdate) > 1 and (text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or title or emph) and unitdate[position() = last()][not(following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or following-sibling::emph or following-sibling::title)] and unitdate[position() != last()][following-sibling::text()[not(matches(., '^[\p{Z}\p{P}]*$'))]]]">
<xsl:copy>
<xsl:apply-templates select="current()/(text() | emph | title | unitdate/text())"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!-- 6.
Find any unitdates nested in unittitle where the following is true:
unittitle contains one or more unitdates,
does not contain text other than space or comma
-->
<xsl:template
match="unittitle[count(unitdate) >= 1 and not((text()[not(matches(., '^[\p{Z}\p{P}]*$'))] or emph or title))]">
<xsl:copy>
<xsl:apply-templates select="current()/(text() | emph | title | unitdate/text())"/>
</xsl:copy>
<xsl:copy-of select="unitdate"/>
</xsl:template>
<!-- Get containers ready for container management functionality in 1.5 -->
<!--adds an @id attribute to the first container element that doesn't already have an @id or @parent attribute-->
<xsl:template match="container[not(@id | @parent)][1]">
<xsl:copy>
<xsl:attribute name="id">
<xsl:value-of select="generate-id()"/>
</xsl:attribute>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<!--adds a @parent attribute to the following container elements that don't already have an @Id or @parent attribute-->
<xsl:template match="container[not(@id | @parent)][position() > 1]">
<xsl:copy>
<xsl:attribute name="parent">
<xsl:value-of select="generate-id(../container[not(@id | @parent)][1])"/>
</xsl:attribute>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<!-- We're missing a lot of @level attributes. Let's fix that. -->
<!--add a @level='file' attribute if no level is expressed, prior to importing into ASpace -->
<xsl:template match="dsc//*[did][not(@level)]">
<xsl:copy>
<xsl:attribute name="level">
<xsl:value-of select="'file'"/>
</xsl:attribute>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<!--add a @level='collection' attribute if no level is expressed within archdesc, prior to importing into ASpace -->
<xsl:template match="archdesc[not(@level)]">
<xsl:copy>
<xsl:attribute name="level">
<xsl:value-of select="'collection'"/>
</xsl:attribute>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<!-- this next template will match "container ranges" and then proceed to split them up into individual container elements
the whole process isn't documented here, but here are a few illustrative examples:
if the text = 1-5, then 5 container elements are produced (1 to 5)
if the text = 1-10b, then only 1 container element is produced (with the same input value)
if the text = 5-1, then only 1 container element is produced (with the same input value... but an example is provided for how to produce 5 elements, if desired)-->
<xsl:template match="ead:container[lower-case(@type)='box'][matches(replace(., '\s', ''), '^[1-9](\d*)[-](\d+)$')]">
<xsl:variable name="mostAttributes" select="@* except @id"/>
<xsl:variable name="IDAttribute" select="@id"/>
<xsl:variable name="containerStart" select="xs:integer(substring-before(., '-'))" as="xs:integer"/>
<xsl:variable name="containerEnd" select="xs:integer(substring-after(., '-'))" as="xs:integer"/>
<!-- if you've got a box range like 24-20, for whatever reason (let's hope it's a typo), then this if statement will make sure that it's included in the output
(the for-each statement that's below will not count backwards, so if you ask it go from 24 to 20, it will return an empty sequence).
alternatively, you could still tokenize these containers, if you still choose, like so: reverse($containerEnd to $containerStart) -->
<xsl:if test="$containerStart gt $containerEnd">
<xsl:copy-of select="."/>
</xsl:if>
<xsl:for-each select="$containerStart to $containerEnd">
<xsl:variable name="currentContainer" as="xs:integer">
<xsl:value-of select="."/>
</xsl:variable>
<xsl:element name="container" namespace="urn:isbn:1-931666-22-9">
<xsl:apply-templates select="$IDAttribute" mode="id-attribute-copy-for-multiple">
<xsl:with-param name="currentContainer" select="if ($currentContainer eq $containerStart) then '' else concat('--', $currentContainer)"/>
</xsl:apply-templates>
<xsl:apply-templates select="$mostAttributes"/>
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<!-- appends extra info to the @id attributes of split container rangers, so as to keep the EAD export valid, since an @id value can only appear once per file-->
<xsl:template match="@id" mode="id-attribute-copy-for-multiple">
<xsl:param name="currentContainer"/>
<xsl:attribute name="id">
<xsl:value-of select="concat(., $currentContainer)"/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment