Created
May 20, 2016 16:02
-
-
Save amclark42/68fd79dfbd9dea1fe8e85de1b29992f6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:xs="http://www.w3.org/2001/XMLSchema" | |
xmlns:tei="http://www.tei-c.org/ns/1.0" | |
xpath-default-namespace="http://www.tei-c.org/ns/1.0" | |
exclude-result-prefixes="xs xsl tei" | |
version="2.0"> | |
<xsl:output indent="yes" exclude-result-prefixes="#all"/> | |
<xsl:variable name="regex" select="'(March|April|June|July|August|September|October|November).* \d\d\d\d'"/> | |
<xsl:template match="/"> | |
<TEI xmlns="http://www.tei-c.org/ns/1.0"> | |
<teiHeader> | |
<xsl:apply-templates select="//teiHeader/*" mode="header"/> | |
</teiHeader> | |
<text> | |
<body> | |
<xsl:apply-templates select="//p"/> | |
</body> | |
</text> | |
</TEI> | |
</xsl:template> | |
<xsl:template match="*" mode="header"> | |
<xsl:copy> | |
<xsl:copy-of select="@*"/> | |
<xsl:apply-templates mode="header"/> | |
</xsl:copy> | |
</xsl:template> | |
<xsl:template match="text()[not(ancestor::p)]"/> | |
<xsl:template match="p"/> | |
<xsl:template match="p//*"> | |
<xsl:copy> | |
<xsl:copy-of select="@*"/> | |
<xsl:apply-templates/> | |
</xsl:copy> | |
</xsl:template> | |
<xsl:template match="p[matches(normalize-space(.),$regex)]"> | |
<!-- This only needs to be changed if there's a need to test for text outside of the | |
entries. If rewriting this template to identify preceding-sibling::p, for | |
example, getting the first entry will require you to provide the starting index (or | |
risk out-of-bounds sequencing errors). --> | |
<xsl:variable name="seqStartIndex" select="0"/> | |
<!-- "allFollowing" doesn't actually grab every following <p>, just the first 8. This | |
assumes that the next entry's delimiter can be found within that subset. --> | |
<xsl:variable name="allFollowing" select="subsequence(following-sibling::p,$seqStartIndex,8)"/> | |
<!-- The next entry will be the next following <p> matching the same selection | |
criteria that runs this template. --> | |
<xsl:variable name="nextEntry" select="$allFollowing[descendant::text()[matches(normalize-space(.),$regex)]][1]"/> | |
<!-- Get the index of the next entry. If there is no next entry, consider all | |
remaining <p>s to be part of this entry. --> | |
<xsl:variable name="entryDelimiter" select="if ( $nextEntry ) then | |
index-of($allFollowing,$nextEntry) | |
else min($allFollowing/last()) + 1"/> | |
<!-- Test $entryDelimiter for the presence of multiple index matches. If so, take the | |
minimum index to get the next entry. --> | |
<xsl:variable name="endLoc" select="if ( count($entryDelimiter) = 1 ) then | |
if ( $nextEntry and $entryDelimiter ne 1 ) then | |
$entryDelimiter - 1 | |
else $entryDelimiter | |
else min($entryDelimiter) - 1"/> | |
<!-- Get all <p>s between this entry heading and the next, to form a complete entry. --> | |
<xsl:variable name="content" select="subsequence($allFollowing,1,$endLoc)"/> | |
<div xmlns="http://www.tei-c.org/ns/1.0" type="entry"> | |
<!--<xsl:attribute name="n" select="$entryDelimiter"/>--> | |
<bibl><xsl:apply-templates/></bibl> | |
<xsl:copy-of select="$content"/> | |
</div> | |
<!-- Add extra spacing to aid proofing. --> | |
<xsl:text> </xsl:text> | |
</xsl:template> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment