-
-
Save pablojimeno/5e1715ef153d54383fc4c899103c0622 to your computer and use it in GitHub Desktop.
Super Rudimentary DOCX -> Markdown XSL Template - not full featured, and very MSWord specific
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<xsl:stylesheet | |
version="1.0" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" | |
> | |
<xsl:output method="text"/> | |
<xsl:template match="//w:p"> | |
<!-- Paragraph level tweaks (prefixes) --> | |
<xsl:choose> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading1']"> | |
<xsl:text># </xsl:text> | |
</xsl:when> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading2']"> | |
<xsl:text>## </xsl:text> | |
</xsl:when> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading3']"> | |
<xsl:text>### </xsl:text> | |
</xsl:when> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading4']"> | |
<xsl:text>#### </xsl:text> | |
</xsl:when> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Heading5']"> | |
<xsl:text>##### </xsl:text> | |
</xsl:when> | |
<!-- Unordered list --> | |
<!-- WARNING: we can't tell the difference between ordered --> | |
<!-- and unordered without looking at additional XML content --> | |
<!-- from the docx. Don't do ordered lists. --> | |
<xsl:when test="./w:pPr/w:pStyle[@w:val='Compact']"> | |
<xsl:text>* </xsl:text> | |
</xsl:when> | |
</xsl:choose> | |
<!-- Content extraction and formatting for the current block --> | |
<xsl:for-each select="./w:r"> | |
<!-- If it's preformatted, use those. Otherwise, stack up the --> | |
<!-- bold/italic indicators. Markdown is very nice in that the --> | |
<!-- start and end formatters are exactly the same. --> | |
<xsl:variable name="fmt"> | |
<xsl:choose> | |
<xsl:when test="./w:rPr/w:rStyle[@w:val='VerbatimChar']"> | |
<xsl:text>```</xsl:text> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:if test="./w:rPr/w:i"> | |
<xsl:text>*</xsl:text> | |
</xsl:if> | |
<xsl:if test="./w:rPr/w:b"> | |
<xsl:text>**</xsl:text> | |
</xsl:if> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:variable> | |
<!-- Format on the way in... --> | |
<xsl:value-of select="$fmt"/> | |
<!-- actual value --> | |
<xsl:value-of select="./w:t"/> | |
<!-- ...format on the way out. --> | |
<xsl:value-of select="$fmt"/> | |
</xsl:for-each> | |
<!-- Paragraph Separator (double carriage return) --> | |
<xsl:choose> | |
<!-- If we are a list, and the next entry is ALSO part of a --> | |
<!-- list, only do a single carriage retrurn. --> | |
<xsl:when test="( | |
./w:pPr/w:pStyle[@w:val='Compact'] | |
) and ( | |
./following-sibling::*[1]/w:pPr/w:pStyle[@w:val='Compact'] | |
)"> | |
<!-- single carriage return in lists --> | |
<xsl:text> </xsl:text> | |
</xsl:when> | |
<xsl:otherwise> | |
<!-- Multiple carriage returns to separate blocks. --> | |
<xsl:text> </xsl:text> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:template> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment