Skip to content

Instantly share code, notes, and snippets.

@PLTGit
Created May 12, 2016 16:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save PLTGit/f3fc0683034c5e222e14e49c60104015 to your computer and use it in GitHub Desktop.
Save PLTGit/f3fc0683034c5e222e14e49c60104015 to your computer and use it in GitHub Desktop.
Super Rudimentary DOCX -> Markdown XSL Template - not full featured, and very MSWord specific
<?xml version="1.0"?>
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
>
<xsl:output method="text"/>
<xsl:template match="//w:p">
<!-- Paragraph level tweaks (prefixes) -->
<xsl:choose>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading1']">
<xsl:text># </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading2']">
<xsl:text>## </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading3']">
<xsl:text>### </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading4']">
<xsl:text>#### </xsl:text>
</xsl:when>
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading5']">
<xsl:text>##### </xsl:text>
</xsl:when>
<!-- Unordered list -->
<!-- WARNING: we can't tell the difference between ordered -->
<!-- and unordered without looking at additional XML content -->
<!-- from the docx. Don't do ordered lists. -->
<xsl:when test="./w:pPr/w:pStyle[@w:val='Compact']">
<xsl:text>* </xsl:text>
</xsl:when>
</xsl:choose>
<!-- Content extraction and formatting for the current block -->
<xsl:for-each select="./w:r">
<!-- If it's preformatted, use those. Otherwise, stack up the -->
<!-- bold/italic indicators. Markdown is very nice in that the -->
<!-- start and end formatters are exactly the same. -->
<xsl:variable name="fmt">
<xsl:choose>
<xsl:when test="./w:rPr/w:rStyle[@w:val='VerbatimChar']">
<xsl:text>```</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:if test="./w:rPr/w:i">
<xsl:text>*</xsl:text>
</xsl:if>
<xsl:if test="./w:rPr/w:b">
<xsl:text>**</xsl:text>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- Format on the way in... -->
<xsl:value-of select="$fmt"/>
<!-- actual value -->
<xsl:value-of select="./w:t"/>
<!-- ...format on the way out. -->
<xsl:value-of select="$fmt"/>
</xsl:for-each>
<!-- Paragraph Separator (double carriage return) -->
<xsl:choose>
<!-- If we are a list, and the next entry is ALSO part of a -->
<!-- list, only do a single carriage retrurn. -->
<xsl:when test="(
./w:pPr/w:pStyle[@w:val='Compact']
) and (
./following-sibling::*[1]/w:pPr/w:pStyle[@w:val='Compact']
)">
<!-- single carriage return in lists -->
<xsl:text>&#10;</xsl:text>
</xsl:when>
<xsl:otherwise>
<!-- Multiple carriage returns to separate blocks. -->
<xsl:text>&#10;&#10;</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
@PLTGit
Copy link
Author

PLTGit commented May 12, 2016

Useful for something like:

unzip -c my_file.docx "word/document.xml" | tail -n +3 | xsltproc word_to_markdown.xslt -

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment