Skip to content

Instantly share code, notes, and snippets.

@andybluntish
Created August 30, 2015 15:08
Show Gist options
  • Save andybluntish/1482506d8636608fb1ef to your computer and use it in GitHub Desktop.
Save andybluntish/1482506d8636608fb1ef to your computer and use it in GitHub Desktop.
Serialize XHTML as JSON
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml">
<xsl:output method="text" encoding="utf-8" media-type="application/json" />
<!-- Start processing at the root -->
<xsl:template match="/*[node()]">
<xsl:apply-templates select="." mode="element" />
</xsl:template>
<!--
Attributes
Write HTML attributes as escaped key-value pairs. Rename the 'class' attribute to 'className',
otherwise just use the same attribute name.
-->
<xsl:template match="@*" mode="attribute">
<xsl:text>"</xsl:text>
<!-- if the attribute name is "class", use "className" instead -->
<xsl:choose>
<xsl:when test="name() = 'class'">
<xsl:text>className</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="name()"/>
</xsl:otherwise>
</xsl:choose>
<xsl:text>": </xsl:text>
<xsl:apply-templates match="text()" />
<xsl:if test="position() &lt; last()">,</xsl:if>
</xsl:template>
<!--
Element or Text Node
-->
<xsl:template match="node()" mode="element">
<!-- Only produce output if it's an element or non-empty string -->
<xsl:if test="local-name() or normalize-space(.) != ''">
<xsl:text>{</xsl:text>
<xsl:choose>
<!-- Element -->
<xsl:when test="local-name()">
<!-- Tag Name -->
<xsl:text>"tagName": "</xsl:text>
<xsl:value-of select="local-name()" />
<xsl:text>"</xsl:text>
<!-- List HTML attributes -->
<xsl:if test="count(@*) &gt; 0">
<xsl:text>,</xsl:text>
<xsl:text>"attributes": {</xsl:text>
<xsl:apply-templates select="@*" mode="attribute" />
<xsl:text>}</xsl:text>
</xsl:if>
<!-- Children -->
<xsl:if test="count(./child::node()) &gt; 0">
<xsl:text>,</xsl:text>
<xsl:text>"children": [</xsl:text>
<xsl:apply-templates mode="element" />
<xsl:text>]</xsl:text>
</xsl:if>
</xsl:when>
<!-- #text -->
<xsl:when test="self::text()">
<xsl:if test="normalize-space(.) != ''">
<!-- Tag Name -->
<xsl:text>"tagName": "#text"</xsl:text>
<!-- Content -->
<xsl:text>,</xsl:text>
<xsl:text>"content": </xsl:text>
<xsl:apply-templates select="." />
</xsl:if>
</xsl:when>
</xsl:choose>
<xsl:text>}</xsl:text>
<!-- Append a comma if there are following sibling elements, or non-empty text nodes -->
<xsl:if test="count(following-sibling::*) &gt; 0 or normalize-space(following-sibling::text()) != ''">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:if>
</xsl:template>
<!--
Text node escaping:
https://github.com/doekman/xml2json-xslt/blob/master/xml2json.xslt#L36
-->
<!-- string -->
<xsl:template match="text()">
<xsl:call-template name="escape-string">
<xsl:with-param name="s" select="."/>
</xsl:call-template>
</xsl:template>
<!-- Main template for escaping strings; used by above template and for object-properties
Responsibilities: placed quotes around string, and chain up to next filter, escape-bs-string -->
<xsl:template name="escape-string">
<xsl:param name="s"/>
<xsl:text>"</xsl:text>
<xsl:call-template name="escape-bs-string">
<xsl:with-param name="s" select="$s"/>
</xsl:call-template>
<xsl:text>"</xsl:text>
</xsl:template>
<!-- Escape the backslash (\) before everything else. -->
<xsl:template name="escape-bs-string">
<xsl:param name="s"/>
<xsl:choose>
<xsl:when test="contains($s,'\')">
<xsl:call-template name="escape-quot-string">
<xsl:with-param name="s" select="concat(substring-before($s,'\'),'\\')"/>
</xsl:call-template>
<xsl:call-template name="escape-bs-string">
<xsl:with-param name="s" select="substring-after($s,'\')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="escape-quot-string">
<xsl:with-param name="s" select="$s"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Escape the double quote ("). -->
<xsl:template name="escape-quot-string">
<xsl:param name="s"/>
<xsl:choose>
<xsl:when test="contains($s,'&quot;')">
<xsl:call-template name="encode-string">
<xsl:with-param name="s" select="concat(substring-before($s,'&quot;'),'\&quot;')"/>
</xsl:call-template>
<xsl:call-template name="escape-quot-string">
<xsl:with-param name="s" select="substring-after($s,'&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="encode-string">
<xsl:with-param name="s" select="$s"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Replace tab, line feed and/or carriage return by its matching escape code. Can't escape backslash
or double quote here, because they don't replace characters (&#x0; becomes \t), but they prefix
characters (\ becomes \\). Besides, backslash should be seperate anyway, because it should be
processed first. This function can't do that. -->
<xsl:template name="encode-string">
<xsl:param name="s"/>
<xsl:choose>
<!-- tab -->
<xsl:when test="contains($s,'&#x9;')">
<xsl:call-template name="encode-string">
<xsl:with-param name="s" select="concat(substring-before($s,'&#x9;'),'\t',substring-after($s,'&#x9;'))"/>
</xsl:call-template>
</xsl:when>
<!-- line feed -->
<xsl:when test="contains($s,'&#xA;')">
<xsl:call-template name="encode-string">
<xsl:with-param name="s" select="concat(substring-before($s,'&#xA;'),'\n',substring-after($s,'&#xA;'))"/>
</xsl:call-template>
</xsl:when>
<!-- carriage return -->
<xsl:when test="contains($s,'&#xD;')">
<xsl:call-template name="encode-string">
<xsl:with-param name="s" select="concat(substring-before($s,'&#xD;'),'\r',substring-after($s,'&#xD;'))"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise><xsl:value-of select="$s"/></xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment