Skip to content

Instantly share code, notes, and snippets.

@pablojimeno
Forked from PLTGit/docx_to_markdown.xslt
Created December 2, 2016 18:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pablojimeno/5e1715ef153d54383fc4c899103c0622 to your computer and use it in GitHub Desktop.
Save pablojimeno/5e1715ef153d54383fc4c899103c0622 to your computer and use it in GitHub Desktop.
Super Rudimentary DOCX -> Markdown XSL Template - not full featured, and very MSWord specific
<?xml version="1.0"?>
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
>
<xsl:output method="text"/>
<xsl:template match="//w:p">
<!-- Paragraph level tweaks (prefixes) -->
<xsl:choose>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading1']">
<xsl:text># </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading2']">
<xsl:text>## </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading3']">
<xsl:text>### </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading4']">
<xsl:text>#### </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading5']">
<xsl:text>##### </xsl:text>
</xsl:when>
<!-- Unordered list -->
<!-- WARNING: we can't tell the difference between ordered -->
<!-- and unordered without looking at additional XML content -->
<!-- from the docx. Don't do ordered lists. -->
<xsl:when test="./w:pPr/w:pStyle[@w:val='Compact']">
<xsl:text>* </xsl:text>
</xsl:when>
</xsl:choose>
<!-- Content extraction and formatting for the current block -->
<xsl:for-each select="./w:r">
<!-- If it's preformatted, use those. Otherwise, stack up the -->
<!-- bold/italic indicators. Markdown is very nice in that the -->
<!-- start and end formatters are exactly the same. -->
<xsl:variable name="fmt">
<xsl:choose>
<xsl:when test="./w:rPr/w:rStyle[@w:val='VerbatimChar']">
<xsl:text>```</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:if test="./w:rPr/w:i">
<xsl:text>*</xsl:text>
</xsl:if>
<xsl:if test="./w:rPr/w:b">
<xsl:text>**</xsl:text>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- Format on the way in... -->
<xsl:value-of select="$fmt"/>
<!-- actual value -->
<xsl:value-of select="./w:t"/>
<!-- ...format on the way out. -->
<xsl:value-of select="$fmt"/>
</xsl:for-each>
<!-- Paragraph Separator (double carriage return) -->
<xsl:choose>
<!-- If we are a list, and the next entry is ALSO part of a -->
<!-- list, only do a single carriage retrurn. -->
<xsl:when test="(
./w:pPr/w:pStyle[@w:val='Compact']
) and (
./following-sibling::*[1]/w:pPr/w:pStyle[@w:val='Compact']
)">
<!-- single carriage return in lists -->
<xsl:text>&#10;</xsl:text>
</xsl:when>
<xsl:otherwise>
<!-- Multiple carriage returns to separate blocks. -->
<xsl:text>&#10;&#10;</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment