Skip to content

Instantly share code, notes, and snippets.

@vincentml
Created August 4, 2023 17:12
Show Gist options
  • Save vincentml/87eb0f254d2b4af1fcd94d6277f3dbe3 to your computer and use it in GitHub Desktop.
Save vincentml/87eb0f254d2b4af1fcd94d6277f3dbe3 to your computer and use it in GitHub Desktop.
XML Formatter
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl"
xmlns:this="formatter"
exclude-result-prefixes="xs xd this"
expand-text="true"
version="3.0">
<xd:doc scope="stylesheet">
<xd:desc>
<xd:p><xd:b>Created on:</xd:b> Aug 3, 2023</xd:p>
<xd:p><xd:b>Author:</xd:b> Vincent Lizzi</xd:p>
<xd:p><xd:b>Input:</xd:b> XML</xd:p>
<xd:p><xd:b>Output:</xd:b> Indented XML as text or html for display</xd:p>
</xd:desc>
</xd:doc>
<xd:doc>
<xd:desc>'text' or 'html'</xd:desc>
</xd:doc>
<xsl:param name="output" as="xs:string" select="'html'" static="true"/>
<xd:doc>
<xd:desc>space is a single space character. using a different character such as _ can be useful for testing to see which spaces are inserted and which spaces are from the source document.</xd:desc>
</xd:doc>
<xsl:param name="space" as="xs:string" select="' '"/>
<xd:doc>
<xd:desc>break is the character to use for line ending</xd:desc>
</xd:doc>
<xsl:param name="break" as="xs:string" select="'&#xa;'"/>
<xd:doc>
<xd:desc>how many attributes can be output in a single line vs output on multiple lines</xd:desc>
</xd:doc>
<xsl:param name="attinlinenum" as="xs:integer" select="2"/>
<xd:doc>
<xd:desc>how long the value of any attribute may to be output on a single line vs output on multiple lines</xd:desc>
</xd:doc>
<xsl:param name="attinlinelen" as="xs:integer" select="6"/>
<xsl:mode use-accumulators="#all" on-no-match="shallow-copy"/>
<xsl:output method="text" use-when="$output eq 'text'"/>
<xsl:output method="html" use-when="$output eq 'html'"/>
<xd:doc>
<xd:desc>produce HTML wrapper if HTML output is requested</xd:desc>
</xd:doc>
<xsl:template match="/" expand-text="no" use-when="$output eq 'html'">
<html>
<head>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/styles/default.min.css"/>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/highlight.min.js"></script>
<script>hljs.highlightAll();</script>
<style type="text/css">
/* Browser specific (not valid) styles to make preformatted text wrap */
pre, code.language-xml {
white-space: pre-wrap; /* css-3 */
white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
white-space: -pre-wrap; /* Opera 4-6 */
white-space: -o-pre-wrap; /* Opera 7 */
word-wrap: break-word; /* Internet Explorer 5.5+ */
}
</style>
</head>
<body>
<p>Viewing: <xsl:value-of select="replace(base-uri(/), '.*?/?([^/]+)$', '$1')"/></p>
<pre>
<code class="language-xml">
<xsl:apply-templates select="node()"/>
</code>
</pre>
</body>
</html>
</xsl:template>
<xsl:key name="text-elements" match="*[text()[normalize-space()]]" use="name()"/>
<xsl:accumulator name="mixed-content" initial-value="false()" as="xs:boolean">
<xsl:accumulator-rule match="text()" select="
string-length(normalize-space()) gt 0
or exists(key('text-elements', name(..)))
"/>
</xsl:accumulator>
<xsl:accumulator name="mixed-parent" initial-value="false()" as="xs:boolean">
<xsl:accumulator-rule match="*" select="exists(../text()[normalize-space()])"/>
</xsl:accumulator>
<xd:doc>
<xd:desc>depth indicates how far to indent based on a count of ancestor elements that should not increment in mixed-content</xd:desc>
</xd:doc>
<xsl:accumulator name="depth" initial-value="0" as="xs:integer">
<xsl:accumulator-rule match="*"
select="count(ancestor::*[accumulator-before('mixed-content') = false()])"/>
</xsl:accumulator>
<xd:doc>
<xd:desc>element nodes should be output using context aware formatting</xd:desc>
</xd:doc>
<xsl:template match="*">
<xsl:variable name="depth" as="xs:integer" select="accumulator-before('depth')"/>
<xsl:variable name="mixed-content" as="xs:boolean" select="accumulator-before('mixed-content')"/>
<xsl:variable name="mixed-parent" as="xs:boolean" select="accumulator-before('mixed-parent')"/>
<xsl:variable name="indent" as="xs:string" select="if ($mixed-parent or $mixed-content) then '' else
$break || string-join(for $i in 1 to $depth * 2 return $space, '')
"/>
<xsl:variable name="attcount" select="count(@*)"/>
<xsl:variable name="open" as="xs:string" select="$indent || '&lt;' || name()"/>
<xsl:variable name="tail" as="xs:string" select="
if (exists(node()))
then if ($attcount gt 1 and accumulator-before('mixed-content'))
then $indent || ' >'
else '>'
else '/>'"/>
<xsl:variable name="close" as="xs:string" select="
if ($tail eq '/>') then '' else
(if (exists(text()[normalize-space()])) then () else $indent)
|| '&lt;/' || name() || '>'"/>
<xsl:variable name="attlist" as="xs:string*">
<xsl:variable name="attlen" as="xs:boolean" select="every $a in @* satisfies string-length($a) le $attinlinelen"/>
<xsl:for-each select="@*">
<xsl:sort select="name()"/>
<xsl:sequence select="
(if ($mixed-parent or $attcount le $attinlinenum or $attlen) then $space else $indent || $space)
|| name() || '=&quot;' || this:escape(string()) || '&quot;'"/>
</xsl:for-each>
<xsl:variable name="here" select="."/>
<xsl:for-each select="in-scope-prefixes(.)">
<xsl:if test="not(. eq 'xml') and
not(. = $here/ancestor::*/in-scope-prefixes(.))">
<xsl:variable name="xmlns" select="if (. eq '') then 'xmlns' else 'xmlns:'"/>
<xsl:sequence select="$indent || $space || $xmlns || . || '=&quot;' || namespace-uri-for-prefix(., $here) || '&quot;'"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="debug">{$indent} d="{$depth}" mpb="{accumulator-before('mixed-parent')}" mpa="{accumulator-after('mixed-parent')}" mcb="{accumulator-before('mixed-content')}" mca="{accumulator-after('mixed-content')}"</xsl:variable>
<xsl:value-of select="$open || $attlist || $tail"/>
<xsl:apply-templates select="node()"/>
<xsl:value-of select="$close"/>
</xsl:template>
<xd:doc>
<xd:desc>text node should be output if it contains text or is in mixed-content or discarded if insignificant space between elements not in mixed-content</xd:desc>
</xd:doc>
<xsl:template match="text()">
<xsl:if test="accumulator-after('mixed-content') or string-length(normalize-space()) ge 1">
<xsl:value-of select="this:escape(.)"/>
</xsl:if>
</xsl:template>
<xd:doc>
<xd:desc>comment nodes</xd:desc>
</xd:doc>
<xsl:template match="comment()">
<xsl:text>&lt;--{this:escape-amp(.)}--></xsl:text>
</xsl:template>
<xd:doc>
<xd:desc>processing instruction nodes</xd:desc>
</xd:doc>
<xsl:template match="processing-instruction()">
<xsl:text>&lt;?{name()} {.}?></xsl:text>
</xsl:template>
<xd:doc>
<xd:desc>escape ampersand and less-than characters</xd:desc>
<xd:param name="text">any string</xd:param>
</xd:doc>
<xsl:function name="this:escape" as="xs:string">
<xsl:param name="text" as="xs:string"/>
<xsl:value-of select="$text => replace('&amp;', '&amp;amp;') => replace('&lt;', '&amp;lt;')"/>
</xsl:function>
<xd:doc>
<xd:desc>escape ampersand characters</xd:desc>
<xd:param name="text">any string</xd:param>
</xd:doc>
<xsl:function name="this:escape-amp" as="xs:string">
<xsl:param name="text" as="xs:string"/>
<xsl:value-of select="$text => replace('&amp;', '&amp;amp;')"/>
</xsl:function>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment