Skip to content

Instantly share code, notes, and snippets.

@psychemedia psychemedia/ouxml2md.xslt
Last active Aug 29, 2019

Embed
What would you like to do?
Testing - ou-xml to markdown xslt
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
xmlns:str="http://exslt.org/strings" extension-element-prefixes="exsl">
<!-- xmlns:functx="http://www.functx.com" -->
<!-- Strip out any whitespace used to style layout of XML doc we're processing -->
<xsl:strip-space elements="*"/>
<!-- Defining a parameter means we can pass values in -->
<xsl:param name="filestub">test</xsl:param>
<xsl:output method="text" />
<xsl:template match="/">
<xsl:apply-templates/>
</xsl:template>
<!-- some common HTMLy things... -->
<xsl:template match="a">
<xsl:text>[</xsl:text>
<xsl:apply-templates select="node()|text()" />
<xsl:text>](</xsl:text>
<xsl:value-of select="@href" />
<xsl:text>)</xsl:text>
</xsl:template>
<xsl:template match="i">
<xsl:text>*</xsl:text>
<xsl:apply-templates />
<xsl:text>*</xsl:text>
</xsl:template>
<xsl:template match="b">
<xsl:text>__</xsl:text>
<xsl:apply-templates />
<xsl:text>__</xsl:text>
</xsl:template>
<xsl:template match="Paragraph/br">
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="br">
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<!-- some OU-XML alternatives to HTMLy things... -->
<!-- If the parent is a ListItem, we need to indent by at least one space.
This then allows us to have multi-paragraph lists.
-->
<xsl:template match="Paragraph">
<xsl:text>&#xa;</xsl:text>
<xsl:if test="parent::ListItem">
<xsl:text></xsl:text>
</xsl:if>
<xsl:apply-templates select="*|text()" />
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Image">
<xsl:text>&#xa;&#xa;![</xsl:text>
<xsl:value-of select="@alt" />
<xsl:text>](</xsl:text>
<!-- <xsl:value-of select="@src" /> -->
<!-- preprocess the XML to swap in image paths we can resolve? -->
<!-- Alternatively we could leave the full image path here map on that; more likely to be unique? -->
<xsl:value-of select='str:split(@src, "\\")[last()]' />
<xsl:text>)&#xa;</xsl:text>
</xsl:template>
<!-- TO DO: does this also have to cope with situation where there is no internal paragraph? -->
<xsl:template match="Quote">
<xsl:apply-templates />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<!-- TO DO: multiline quotes -->
<xsl:template match="Quote/Paragraph">
<xsl:text>&#xa;</xsl:text>
<xsl:text disable-output-escaping="yes">></xsl:text>
<xsl:apply-templates />
<!-- <xsl:value-of select="str:replace(text(), '&#xa;', '&#xa;&gt; ')" /> -->
</xsl:template>
<!-- TO DO - nested lists -->
<xsl:template match="ListItem">
<!-- <xsl:value-of select="functx:repeat-string(' ', count(ancestor::li))"/> -->
<xsl:text></xsl:text>
<xsl:choose>
<xsl:when test="name(..) = 'NumberedList'">
<xsl:value-of select="position()" />
<xsl:text>. </xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:text>* </xsl:text>
</xsl:otherwise>
</xsl:choose>
<xsl:value-of select="normalize-space(text())" />
<!-- <xsl:apply-templates select="* except (NumberedList|BulletedList)" /> -->
<xsl:apply-templates />
<xsl:text>&#xa;</xsl:text>
<xsl:apply-templates select="NumberedList|BulletedList" />
</xsl:template>
<!-- Original didn't process text() nodes for these to prevent unnecessary whitespace -->
<xsl:template match="NumberedList|BulletedList">
<xsl:apply-templates />
</xsl:template>
<!-- OU-XML things -->
<xsl:template match="Item">
<!-- metadata? Or directory path? OR Readme in directory? Or contents list? -->
<!-- <xsl:value-of select="@Module"/> - <xsl:value-of select="CourseTitle"/> -->
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="Unit">
<!-- metadata? -->
<!-- How can we count which unit we are in and use that in setting filenames? -->
<!-- <xsl:value-of select="UnitTitle"/> -->
<xsl:param name="filestub" select="position()"/>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="LearningOutcomes">
<xsl:text>&#xa;&#xa;## Learning Outcomes&#xa;&#xa;</xsl:text>
<xsl:apply-templates />
</xsl:template>
<!-- The md output actually starts here with document partitioning -->
<xsl:template match="Session">
<!-- Create a new output document for each session -->
<!-- This requires the directory path to be set, so for new directories
create directory path stub at the start of the filename and postprocess? -->
<!-- or to generate a filename (needs tweaking) on _UNIT_SESSION_ -->
<!-- test_{count(../preceding-sibling::node())}_{position()}.md -->
<exsl:document method="html" href="{$filestub}_{count(../preceding-sibling::node())}_{position()}.md">
<xsl:apply-templates />
</exsl:document>
</xsl:template>
<xsl:template match="Session/Title">
<xsl:text># </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Section/Title">
<xsl:text>&#xa;&#xa;## </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="SubSection">
<xsl:text>---&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:apply-templates />
</xsl:template>
<xsl:template match="SubSubSection">
<xsl:text>---&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:apply-templates />
</xsl:template>
<xsl:template match="InternalSection">
<xsl:text>&#xa;---&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:text>&#xa;---&#xa;</xsl:text>
</xsl:template>
<xsl:template match="InternalSection/Heading">
<xsl:text>&#xa;&#xa;### </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<!-- should we need to add metadata here somewhow? -->
<xsl:template match="Exercise">
<xsl:apply-templates />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Exercise/Heading">
<xsl:text>&#xa;&#xa;### </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Timing">
<xsl:text>__Timing: </xsl:text>
<xsl:value-of select="." />
<xsl:text>__&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Question">
<xsl:text>&#xa;&#xa;#### Question&#xa;</xsl:text>
<xsl:apply-templates />
</xsl:template>
<xsl:template match="Discussion">
<xsl:text>&#xa;&#xa;#### Discussion&#xa;</xsl:text>
<xsl:apply-templates />
</xsl:template>
<!-- Caption relates to Figure, along with Image -->
<xsl:template match="Figure/Caption">
<xsl:apply-templates />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Figure/Caption/Number">
<xsl:text>__</xsl:text>
<xsl:apply-templates />
<xsl:text>__</xsl:text>
</xsl:template>
<!-- it would be nice to do more with Glossary items? -->
<xsl:template match="GlossaryTerm">
<xsl:text>__</xsl:text>
<xsl:value-of select="." />
<xsl:text>__</xsl:text>
</xsl:template>
<xsl:template match="Box">
<xsl:apply-templates />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Box/Heading">
<xsl:text>&#xa;&#xa;### </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Activity">
<xsl:apply-templates />
<xsl:text>&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="Activity/Heading">
<xsl:text>&#xa;&#xa;### </xsl:text>
<xsl:value-of select="." />
<xsl:text>&#xa;</xsl:text>
</xsl:template>
<xsl:template match="ComputerUI">
<xsl:text>`</xsl:text>
<xsl:apply-templates />
<xsl:text>`</xsl:text>
</xsl:template>
<xsl:template match="ProgramListing">
<xsl:text>&#xa;&#xa;```python&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:text>&#xa;```&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="ComputerCode">
<xsl:text>&#xa;&#xa;```python&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:text>&#xa;```&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="ComputerDisplay">
<xsl:text>&#xa;&#xa;```python&#xa;</xsl:text>
<xsl:apply-templates />
<xsl:text>&#xa;```&#xa;&#xa;</xsl:text>
</xsl:template>
<xsl:template match="ComputerDisplay/Paragraph">
<xsl:text>&#xa;</xsl:text>
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ComputerDisplay/Paragraph/text()">
<xsl:value-of select="." disable-output-escaping="yes" />
</xsl:template>
<!-- TO DO -->
<!-- is there a transcript element? -->
<xsl:template match="LearningOutcome">
<div class='learningOutcome'>
<xsl:apply-templates />
</div>
</xsl:template>
<xsl:template match="Section">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="SideNote">
<div style='background:lightblue'>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="SideNoteParagraph">
<p>
<xsl:apply-templates />
</p>
</xsl:template>
<xsl:template match="Tables">
##-- TABLES
<xsl:apply-templates />
##-- TABLES
</xsl:template>
<xsl:template match="Table">
<table>
<xsl:apply-templates />
</table>
</xsl:template>
<xsl:template match="TableHead/Number">
<em>
<xsl:value-of select="." />
</em>
</xsl:template>
<xsl:template match="TableHead">
<caption>
<xsl:apply-templates />
</caption>
</xsl:template>
<xsl:template match="tbody">
<tbody>
<xsl:apply-templates />
</tbody>
</xsl:template>
<xsl:template match="tr">
<tr>
<xsl:apply-templates />
</tr>
</xsl:template>
<xsl:template match="th">
<th>
<xsl:apply-templates />
</th>
</xsl:template>
<xsl:template match="td">
<td class="highlight_{@highlight}" rowspan="{@rowspan}" colspan="{@colspan}">
<xsl:apply-templates />
</td>
</xsl:template>
<xsl:template match="Figures">
##-- FIGURES
<xsl:apply-templates />
##-- ENDFIGURES
</xsl:template>
<xsl:template match="MediaContent">
##-- MEDIACONTENT
<xsl:apply-templates />
##-- ENDMEDIACONTENT
</xsl:template>
<xsl:template match="Chemistry">
##-- CHEMISTRY
<xsl:apply-templates />
##-- ENDCHEMISTRY
</xsl:template>
<xsl:template match="Figure">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="Extract">
##-- EXTRACT
<xsl:apply-templates />
##-- ENDEXTRACT
</xsl:template>
<xsl:template match="Dialogue">
##-- DIALOGUE
<xsl:apply-templates />
##-- ENDDIALOGUE
</xsl:template>
<xsl:template match="SAQ">
##-- SAQ
<xsl:apply-templates />
##-- ENDSAQ
</xsl:template>
<xsl:template match="ITQ">
##-- ITQ
<xsl:apply-templates />
##-- ENDITQ
</xsl:template>
<xsl:template match="KeyPoints">
##-- KEYPOINTS
<xsl:apply-templates />
##-- ENDKEYPOINTS
</xsl:template>
<xsl:template match="Summary">
##-- SUMMARY
<xsl:apply-templates />
##-- ENDSUMMARY
</xsl:template>
<xsl:template match="Reading">
##-- READING
<xsl:apply-templates />
##-- ENDREADING
</xsl:template>
<xsl:template match="Example">
##-- EXAMPLE
<xsl:apply-templates />
##-- ENDEXAMPLE
</xsl:template>
<xsl:template match="Verse">
##-- VERSE
<xsl:apply-templates />
##-- ENDVERSE
</xsl:template>
<xsl:template match="StudyNote">
<div style='background:lightgreen'>
<xsl:apply-templates />
</div>
</xsl:template>
<!-- This is here as a warning / catch all for any missed heading types -->
<xsl:template match="Heading">
<h1>
<xsl:value-of select="." />
</h1>
</xsl:template>
<!-- how do we handle this? -->
<xsl:template match="CrossRef">
<a href="{idref}">
<xsl:value-of select="." />
</a>
</xsl:template>
<xsl:template match="TeX">
<xsl:text>$$</xsl:text>
<xsl:value-of select="." />
<xsl:text>$$</xsl:text>
</xsl:template>
</xsl:stylesheet>
#Read in xslt file
with open('ouxml2md.xslt','r') as f:
xslt = f.read()
import lxml.html
from lxml import etree
xslt_doc = etree.fromstring(xslt)
xslt_transformer = etree.XSLT(xslt_doc)
#Path to an OU XML file on OpenLearn
openlearn_xml_url='https://www.open.edu/openlearn/science-maths-technology/learn-code-data-analysis/altformat-ouxml'
#Get XML
import requests
dummy_xml = requests.get(openlearn_xml_url).content
#Conversion
output_path_stub='test_' #path/filenameprefix for generated output files
source_doc = etree.fromstring(dummy_xml)
output_doc = xslt_transformer(source_doc, filestub=etree.XSLT.strparam(output_path_stub))
#Output markdown files are also saved... I'm not sure what, if any, use output_doc is...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.