Skip to content

Instantly share code, notes, and snippets.

@BradKnowles
Created February 4, 2021 06:11
Show Gist options
  • Save BradKnowles/ffb92e44ef8d8d0cfb071bdc639a2620 to your computer and use it in GitHub Desktop.
Save BradKnowles/ffb92e44ef8d8d0cfb071bdc639a2620 to your computer and use it in GitHub Desktop.
XSLT for Gutenburg RDF to JSON
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="3.0" xml:base="http://www.gutenberg.org/"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:cc="http://web.resource.org/cc/"
xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:dcam="http://purl.org/dc/dcam/"
xmlns:marcrel="http://id.loc.gov/vocabulary/relators/">
<xsl:output omit-xml-declaration="yes" method="text" />
<xsl:variable name='crlf' select="'&#xD;&#xA;'" />
<xsl:template match="/">
<!-- Beginining of JSON -->
<xsl:text>{</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:call-template name="number_property">
<xsl:with-param name="name" select="'bookId'" />
<xsl:with-param name="path" select="tokenize(/rdf:RDF/pgterms:ebook/@rdf:about,'/')[last()]" />
</xsl:call-template>
<xsl:apply-templates />
<xsl:text> "contributors" : [</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:for-each-group select="//pgterms:agent" group-by=".">
<xsl:text> {</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "contributorId" : </xsl:text>
<xsl:value-of select="tokenize(@rdf:about,'/')[last()]" />
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "contributorType" : "</xsl:text>
<xsl:value-of select="./parent::*/local-name()" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "name" : "</xsl:text>
<xsl:value-of select="pgterms:name" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "webpage" : "</xsl:text>
<xsl:value-of select="pgterms:webpage/@rdf:resource" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:call-template name="number_property">
<xsl:with-param name="space" select="' '" />
<xsl:with-param name="name" select="'birthYear'" />
<xsl:with-param name="path" select="pgterms:birthdate" />
</xsl:call-template>
<xsl:call-template name="number_property">
<xsl:with-param name="space" select="' '" />
<xsl:with-param name="name" select="'deathYear'" />
<xsl:with-param name="path" select="pgterms:deathdate" />
</xsl:call-template>
<xsl:text> "aliases" : [</xsl:text>
<xsl:value-of select="$crlf" />
<!-- aliases begin -->
<xsl:for-each select="pgterms:alias">
<xsl:text> "</xsl:text>
<xsl:value-of select="." />
<xsl:text>"</xsl:text>
<xsl:if test="not(position() = last())">
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:if>
</xsl:for-each>
<xsl:value-of select="$crlf" />
<xsl:text> ]</xsl:text>
<xsl:value-of select="$crlf" />
<!-- aliases end -->
<xsl:text> }</xsl:text>
<xsl:if test="not(position() = last())">
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:if>
</xsl:for-each-group>
<xsl:value-of select="$crlf" />
<xsl:text> ],</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "subjects" : [</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:for-each-group select="//dcterms:subject" group-by="rdf:Description/dcam:memberOf/@rdf:resource">
<xsl:for-each select="current-group()">
<!-- Exclude the Library of Congress Classification (LCC) code. It will be extracted separately -->
<xsl:if test="not(current-grouping-key() = 'http://purl.org/dc/terms/LCC')">
<xsl:text> "</xsl:text>
<xsl:value-of select="rdf:Description/rdf:value" />
<xsl:text>"</xsl:text>
<xsl:if test="not(position() = last())">
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:if>
</xsl:if>
</xsl:for-each>
</xsl:for-each-group>
<xsl:value-of select="$crlf" />
<xsl:text> ],</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "bookshelves" : [</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:for-each-group select="//pgterms:bookshelf" group-by="rdf:Description">
<xsl:text> "</xsl:text>
<xsl:value-of select="rdf:Description/rdf:value" />
<xsl:text>"</xsl:text>
<xsl:if test="not(position() = last())">
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:if>
</xsl:for-each-group>
<xsl:value-of select="$crlf" />
<xsl:text> ],</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "formats" : [</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:for-each-group select="//dcterms:hasFormat" group-by="pgterms:file">
<xsl:text> {</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "URL" : "</xsl:text>
<xsl:value-of select="pgterms:file/@rdf:about" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "mimeType" : "</xsl:text>
<xsl:value-of select="pgterms:file/dcterms:format/rdf:Description/rdf:value" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> "lastModified" : "</xsl:text>
<xsl:value-of select="pgterms:file/dcterms:modified" />
<xsl:text>"</xsl:text>
<xsl:value-of select="$crlf" />
<xsl:text> }</xsl:text>
<xsl:if test="not(position() = last())">
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:if>
</xsl:for-each-group>
<xsl:value-of select="$crlf" />
<xsl:text> ]</xsl:text>
<!-- End of JSON -->
<xsl:value-of select="$crlf" />
<xsl:text>}</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:template>
<xsl:template match="pgterms:ebook/dcterms:description">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'notes'" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dcterms:title">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'title'" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dcterms:issued">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'releaseDate'" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dcterms:type">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'category'" />
<xsl:with-param name="path" select="rdf:Description/rdf:value" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dcterms:language">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'language'" />
<xsl:with-param name="path" select="rdf:Description/rdf:value" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dcam:memberOf[@rdf:resource='http://purl.org/dc/terms/LCC']">
<xsl:call-template name="string_property">
<xsl:with-param name="name" select="'lccClass'" />
<xsl:with-param name="path" select=".//preceding-sibling::rdf:value" />
</xsl:call-template>
</xsl:template>
<xsl:template name="string_property">
<xsl:param name = "name" />
<xsl:param name = "path" select="." />
<xsl:text> "</xsl:text>
<xsl:value-of select="$name" />
<xsl:text>" : "</xsl:text>
<xsl:value-of select="normalize-space(replace($path, '&#13;', ' :'))" />
<xsl:text>",</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:template>
<xsl:template name="number_property">
<xsl:param name = "space" select="' '" />
<xsl:param name = "name" />
<xsl:param name = "path" select="." />
<xsl:value-of select="$space" />
<xsl:text>"</xsl:text>
<xsl:value-of select="$name" />
<xsl:text>" : </xsl:text>
<xsl:choose>
<xsl:when test="$path">
<xsl:value-of select="$path" />
</xsl:when>
<xsl:otherwise>
<xsl:text>null</xsl:text>
</xsl:otherwise>
</xsl:choose>
<xsl:text>,</xsl:text>
<xsl:value-of select="$crlf" />
</xsl:template>
<xsl:template match="text()" />
<!-- <xsl:template name="ErrorOnUnmatched" match="*">
<xsl:if test="name() != 'rdf:RDF'">
<xsl:message terminate="no">Unexpected element: <xsl:value-of select="name()" /></xsl:message>
</xsl:if>
</xsl:template> -->
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment