Skip to content

Instantly share code, notes, and snippets.

@wincentbalin
Last active July 20, 2021 22:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wincentbalin/bd62c3a8e5a82b42d6c29124acaf352b to your computer and use it in GitHub Desktop.
Save wincentbalin/bd62c3a8e5a82b42d6c29124acaf352b to your computer and use it in GitHub Desktop.
Transform XML files from gesetze-im-internet.de to text
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="newline"><xsl:text>
</xsl:text></xsl:variable>
<xsl:variable name="space"><xsl:text> </xsl:text></xsl:variable>
<xsl:variable name="tab" select="concat($space, $space, $space, $space)"/>
<xsl:template match="/dokumente">
<xsl:apply-templates select="norm/metadaten/langue"/>
<xsl:apply-templates select="norm[metadaten/enbez/text() != 'Inhaltsübersicht' or metadaten/langue and textdaten]"/>
<xsl:value-of select="concat($newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline)"/>
</xsl:template>
<xsl:template match="norm/metadaten/langue">
<xsl:value-of select="concat(normalize-space(.), $newline, $newline, $newline)"/>
</xsl:template>
<xsl:template match="norm[metadaten/enbez]">
<xsl:choose>
<xsl:when test="metadaten/titel">
<xsl:value-of select="concat(metadaten/enbez, $space, $space, normalize-space(metadaten/titel),
$newline, $newline)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(metadaten/enbez,
$newline, $newline)"/>
</xsl:otherwise>
</xsl:choose>
<xsl:apply-templates select="textdaten"/>
</xsl:template>
<xsl:template match="norm[metadaten/langue and textdaten]">
<xsl:apply-templates select="textdaten"/>
</xsl:template>
<xsl:template match="textdaten">
<xsl:apply-templates select="text/Content"/>
<xsl:if test="fussnoten">
<xsl:value-of select="$newline"/>
<xsl:apply-templates select="fussnoten/Content"/>
</xsl:if>
<xsl:value-of select="concat($newline, $newline)"/>
</xsl:template>
<xsl:template match="P">
<xsl:apply-templates/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="DL">
<xsl:value-of select="$newline"/>
<xsl:apply-templates/>
<xsl:if test="name(../../..) != 'DL' and name(..) != 'P' and position() != last()">
<xsl:value-of select="$newline"/>
</xsl:if>
</xsl:template>
<xsl:template match="DT">
<xsl:if test="name(../../../..) = 'DL'">
<xsl:value-of select="$tab"/>
</xsl:if>
<xsl:value-of select="concat($tab, ., $space)"/>
</xsl:template>
<xsl:template match="DD">
<xsl:apply-templates/>
<xsl:if test="position() != last()">
<xsl:value-of select="$newline"/>
</xsl:if>
</xsl:template>
<xsl:template match="BR">
<xsl:choose>
<xsl:when test="name(..) = 'entry'">
<xsl:value-of select="$space"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$newline"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="TOC">
<!-- Do nothing -->
</xsl:template>
<xsl:template match="Title">
<xsl:apply-templates/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="table">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="row">
<xsl:apply-templates select="entry"/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="entry">
<xsl:apply-templates/>
<xsl:if test="position() != last()">
<xsl:value-of select="$tab"/>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment