Skip to content

Instantly share code, notes, and snippets.

Last active August 26, 2020 08:35
Show Gist options
  • Save ap/281631 to your computer and use it in GitHub Desktop.
Save ap/281631 to your computer and use it in GitHub Desktop.
HTML to Markdown in (E)XSLT
<xsl:stylesheet version="1.0"
extension-element-prefixes="fn str my"
<xsl:output method="text" encoding="utf-8" />
<xsl:template match="/">
<xsl:apply-templates select="//h:body/node()"/>
<!--================== INLINE ELEMENTS =====================-->
<!-- FIXME backslash special characters:
* periods following a newline + number
* asterisks and underscores
* backticks
<xsl:template match="text()">
<xsl:template match="h:br">
<xsl:text> &#10;</xsl:text>
<xsl:template match="h:a[@href]">
<xsl:value-of select="@href"/>
<xsl:if test="@title">
<xsl:text> "</xsl:text>
<xsl:value-of select="@title"/>
<xsl:template match="h:img[not(@width | @height)]">
<xsl:value-of select="@alt"/>
<xsl:value-of select="@src"/>
<xsl:if test="@title">
<xsl:text> "</xsl:text>
<xsl:value-of select="@title"/>
<xsl:template match="h:em">
<xsl:template match="h:strong">
<xsl:template match="h:strong[h:em]">
<xsl:template match="h:em[h:strong]">
<xsl:template match="h:code">
<!--================== BLOCK ELEMENTS =====================-->
<fn:function name="my:prefix-all-lines">
<xsl:param name="pfx" />
<xsl:param name="text"><xsl:apply-templates /></xsl:param>
<xsl:for-each select="str:tokenize( $text, '&#10;' )">
<xsl:value-of select="concat( $pfx, ., '&#10;' )"/>
<fn:function name="my:prefix-first-line">
<xsl:param name="pfx" />
<xsl:param name="text"><xsl:apply-templates /></xsl:param>
<xsl:for-each select="str:tokenize( $text, '&#10;' )">
<xsl:when test="position() = 1"><xsl:value-of select="concat( $pfx, ., '&#10;' )"/></xsl:when>
<xsl:otherwise><xsl:value-of select="concat( str:padding( string-length( $pfx ), ' ' ), ., '&#10;' )"/></xsl:otherwise>
<xsl:template match="h:p">
<xsl:value-of select="my:prefix-all-lines( '' )"/>
<xsl:template match="h:h1">
<xsl:value-of select="my:prefix-first-line( '# ' )"/>
<xsl:template match="h:h2">
<xsl:value-of select="my:prefix-first-line( '## ' )"/>
<xsl:template match="h:h3">
<xsl:value-of select="my:prefix-first-line( '### ' )"/>
<xsl:template match="h:h4">
<xsl:value-of select="my:prefix-first-line( '#### ' )"/>
<xsl:template match="h:h5">
<xsl:value-of select="my:prefix-first-line( '##### ' )"/>
<xsl:template match="h:h6">
<xsl:value-of select="my:prefix-first-line( '###### ' )"/>
<xsl:template match="h:blockquote">
<xsl:value-of select="my:prefix-all-lines( '> ' )"/>
<xsl:template match="h:pre[h:code]">
<xsl:value-of select="my:prefix-all-lines( '&#9;', string-value(h:code) )"/>
<xsl:template match="h:ul | h:ol">
<xsl:apply-templates select="h:li"/>
<xsl:template match="h:ul/h:li">
<xsl:value-of select="my:prefix-first-line( '* ' )"/>
<xsl:template match="h:ol/h:li">
<xsl:value-of select="my:prefix-first-line( concat( position(), '. ' ) )"/>
<xsl:template match="h:hr">
<xsl:value-of select="my:prefix-all-lines( '--------', '' )"/>
<!--================== UNSUPPORTED BLOCK ELEMENTS =====================-->
<xsl:template match="h:address | h:center | h:dir | h:div | h:dl | h:fieldset | h:form | h:isindex | h:menu | h:noframes | h:noscript | h:pre | h:table | h:dd | h:dt | h:frameset | h:tbody | h:td | h:tfoot | h:th | h:thead | h:tr">
<xsl:apply-templates select="." mode="block" />
<xsl:template match="*" mode="block">
<xsl:value-of select="local-name()"/>
<xsl:for-each select="@*">
<xsl:text> </xsl:text>
<xsl:value-of select="local-name()"/>
<xsl:value-of select="."/>
<xsl:apply-templates mode="block" />
<xsl:value-of select="local-name()"/>
<!--================== UNSUPPORTED INLINE ELEMENTS =====================-->
<!-- this is basically a copypaste of the above template -->
<xsl:template match="h:*">
<xsl:value-of select="local-name()"/>
<xsl:for-each select="@*">
<xsl:text> </xsl:text>
<xsl:value-of select="local-name()"/>
<xsl:value-of select="."/>
<xsl:apply-templates />
<xsl:value-of select="local-name()"/>
Copy link

ap commented Aug 17, 2020

Err… yes. I don’t know what I was thinking… a decade ago. Anyway, fixed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment