Testing - ou-xml to markdown xslt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<xsl:stylesheet version="1.0" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:exsl="http://exslt.org/common" | |
xmlns:str="http://exslt.org/strings" extension-element-prefixes="exsl"> | |
<!-- xmlns:functx="http://www.functx.com" --> | |
<!-- Strip out any whitespace used to style layout of XML doc we're processing --> | |
<xsl:strip-space elements="*"/> | |
<!-- Defining a parameter means we can pass values in --> | |
<xsl:param name="filestub">test</xsl:param> | |
<xsl:output method="text" /> | |
<xsl:template match="/"> | |
<xsl:apply-templates/> | |
</xsl:template> | |
<!-- some common HTMLy things... --> | |
<xsl:template match="a"> | |
<xsl:text>[</xsl:text> | |
<xsl:apply-templates select="node()|text()" /> | |
<xsl:text>](</xsl:text> | |
<xsl:value-of select="@href" /> | |
<xsl:text>)</xsl:text> | |
</xsl:template> | |
<xsl:template match="i"> | |
<xsl:text>*</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>*</xsl:text> | |
</xsl:template> | |
<xsl:template match="b"> | |
<xsl:text>__</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>__</xsl:text> | |
</xsl:template> | |
<xsl:template match="Paragraph/br"> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="br"> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<!-- some OU-XML alternatives to HTMLy things... --> | |
<!-- If the parent is a ListItem, we need to indent by at least one space. | |
This then allows us to have multi-paragraph lists. | |
--> | |
<xsl:template match="Paragraph"> | |
<xsl:text>
</xsl:text> | |
<xsl:if test="parent::ListItem"> | |
<xsl:text></xsl:text> | |
</xsl:if> | |
<xsl:apply-templates select="*|text()" /> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<xsl:template match="Image"> | |
<xsl:text>

[last()]' /> | |
<xsl:text>)
</xsl:text> | |
</xsl:template> | |
<!-- TO DO: does this also have to cope with situation where there is no internal paragraph? --> | |
<xsl:template match="Quote"> | |
<xsl:apply-templates /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<!-- TO DO: multiline quotes --> | |
<xsl:template match="Quote/Paragraph"> | |
<xsl:text>
</xsl:text> | |
<xsl:text disable-output-escaping="yes">></xsl:text> | |
<xsl:apply-templates /> | |
<!-- <xsl:value-of select="str:replace(text(), '
', '
> ')" /> --> | |
</xsl:template> | |
<!-- TO DO - nested lists --> | |
<xsl:template match="ListItem"> | |
<!-- <xsl:value-of select="functx:repeat-string(' ', count(ancestor::li))"/> --> | |
<xsl:text></xsl:text> | |
<xsl:choose> | |
<xsl:when test="name(..) = 'NumberedList'"> | |
<xsl:value-of select="position()" /> | |
<xsl:text>. </xsl:text> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:text>* </xsl:text> | |
</xsl:otherwise> | |
</xsl:choose> | |
<xsl:value-of select="normalize-space(text())" /> | |
<!-- <xsl:apply-templates select="* except (NumberedList|BulletedList)" /> --> | |
<xsl:apply-templates /> | |
<xsl:text>
</xsl:text> | |
<xsl:apply-templates select="NumberedList|BulletedList" /> | |
</xsl:template> | |
<!-- Original didn't process text() nodes for these to prevent unnecessary whitespace --> | |
<xsl:template match="NumberedList|BulletedList"> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<!-- OU-XML things --> | |
<xsl:template match="Item"> | |
<!-- metadata? Or directory path? OR Readme in directory? Or contents list? --> | |
<!-- <xsl:value-of select="@Module"/> - <xsl:value-of select="CourseTitle"/> --> | |
<xsl:apply-templates/> | |
</xsl:template> | |
<xsl:template match="Unit"> | |
<!-- metadata? --> | |
<!-- How can we count which unit we are in and use that in setting filenames? --> | |
<!-- <xsl:value-of select="UnitTitle"/> --> | |
<xsl:param name="filestub" select="position()"/> | |
<xsl:apply-templates/> | |
</xsl:template> | |
<xsl:template match="LearningOutcomes"> | |
<xsl:text>

## Learning Outcomes

</xsl:text> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<!-- The md output actually starts here with document partitioning --> | |
<xsl:template match="Session"> | |
<!-- Create a new output document for each session --> | |
<!-- This requires the directory path to be set, so for new directories | |
create directory path stub at the start of the filename and postprocess? --> | |
<!-- or to generate a filename (needs tweaking) on _UNIT_SESSION_ --> | |
<!-- test_{count(../preceding-sibling::node())}_{position()}.md --> | |
<exsl:document method="html" href="{$filestub}_{count(../preceding-sibling::node())}_{position()}.md"> | |
<xsl:apply-templates /> | |
</exsl:document> | |
</xsl:template> | |
<xsl:template match="Session/Title"> | |
<xsl:text># </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="Section/Title"> | |
<xsl:text>

## </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="SubSection"> | |
<xsl:text>---
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="SubSubSection"> | |
<xsl:text>---
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="InternalSection"> | |
<xsl:text>
---
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>
---
</xsl:text> | |
</xsl:template> | |
<xsl:template match="InternalSection/Heading"> | |
<xsl:text>

### </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<!-- should we need to add metadata here somewhow? --> | |
<xsl:template match="Exercise"> | |
<xsl:apply-templates /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="Exercise/Heading"> | |
<xsl:text>

### </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<xsl:template match="Timing"> | |
<xsl:text>__Timing: </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>__
</xsl:text> | |
</xsl:template> | |
<xsl:template match="Question"> | |
<xsl:text>

#### Question
</xsl:text> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="Discussion"> | |
<xsl:text>

#### Discussion
</xsl:text> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<!-- Caption relates to Figure, along with Image --> | |
<xsl:template match="Figure/Caption"> | |
<xsl:apply-templates /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="Figure/Caption/Number"> | |
<xsl:text>__</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>__</xsl:text> | |
</xsl:template> | |
<!-- it would be nice to do more with Glossary items? --> | |
<xsl:template match="GlossaryTerm"> | |
<xsl:text>__</xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>__</xsl:text> | |
</xsl:template> | |
<xsl:template match="Box"> | |
<xsl:apply-templates /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="Box/Heading"> | |
<xsl:text>

### </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<xsl:template match="Activity"> | |
<xsl:apply-templates /> | |
<xsl:text>

</xsl:text> | |
</xsl:template> | |
<xsl:template match="Activity/Heading"> | |
<xsl:text>

### </xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>
</xsl:text> | |
</xsl:template> | |
<xsl:template match="ComputerUI"> | |
<xsl:text>`</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>`</xsl:text> | |
</xsl:template> | |
<xsl:template match="ProgramListing"> | |
<xsl:text>

```python
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>
```

</xsl:text> | |
</xsl:template> | |
<xsl:template match="ComputerCode"> | |
<xsl:text>

```python
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>
```

</xsl:text> | |
</xsl:template> | |
<xsl:template match="ComputerDisplay"> | |
<xsl:text>

```python
</xsl:text> | |
<xsl:apply-templates /> | |
<xsl:text>
```

</xsl:text> | |
</xsl:template> | |
<xsl:template match="ComputerDisplay/Paragraph"> | |
<xsl:text>
</xsl:text> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="ComputerDisplay/Paragraph/text()"> | |
<xsl:value-of select="." disable-output-escaping="yes" /> | |
</xsl:template> | |
<!-- TO DO --> | |
<!-- is there a transcript element? --> | |
<xsl:template match="LearningOutcome"> | |
<div class='learningOutcome'> | |
<xsl:apply-templates /> | |
</div> | |
</xsl:template> | |
<xsl:template match="Section"> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="SideNote"> | |
<div style='background:lightblue'> | |
<xsl:apply-templates/> | |
</div> | |
</xsl:template> | |
<xsl:template match="SideNoteParagraph"> | |
<p> | |
<xsl:apply-templates /> | |
</p> | |
</xsl:template> | |
<xsl:template match="Tables"> | |
##-- TABLES | |
<xsl:apply-templates /> | |
##-- TABLES | |
</xsl:template> | |
<xsl:template match="Table"> | |
<table> | |
<xsl:apply-templates /> | |
</table> | |
</xsl:template> | |
<xsl:template match="TableHead/Number"> | |
<em> | |
<xsl:value-of select="." /> | |
</em> | |
</xsl:template> | |
<xsl:template match="TableHead"> | |
<caption> | |
<xsl:apply-templates /> | |
</caption> | |
</xsl:template> | |
<xsl:template match="tbody"> | |
<tbody> | |
<xsl:apply-templates /> | |
</tbody> | |
</xsl:template> | |
<xsl:template match="tr"> | |
<tr> | |
<xsl:apply-templates /> | |
</tr> | |
</xsl:template> | |
<xsl:template match="th"> | |
<th> | |
<xsl:apply-templates /> | |
</th> | |
</xsl:template> | |
<xsl:template match="td"> | |
<td class="highlight_{@highlight}" rowspan="{@rowspan}" colspan="{@colspan}"> | |
<xsl:apply-templates /> | |
</td> | |
</xsl:template> | |
<xsl:template match="Figures"> | |
##-- FIGURES | |
<xsl:apply-templates /> | |
##-- ENDFIGURES | |
</xsl:template> | |
<xsl:template match="MediaContent"> | |
##-- MEDIACONTENT | |
<xsl:apply-templates /> | |
##-- ENDMEDIACONTENT | |
</xsl:template> | |
<xsl:template match="Chemistry"> | |
##-- CHEMISTRY | |
<xsl:apply-templates /> | |
##-- ENDCHEMISTRY | |
</xsl:template> | |
<xsl:template match="Figure"> | |
<xsl:apply-templates /> | |
</xsl:template> | |
<xsl:template match="Extract"> | |
##-- EXTRACT | |
<xsl:apply-templates /> | |
##-- ENDEXTRACT | |
</xsl:template> | |
<xsl:template match="Dialogue"> | |
##-- DIALOGUE | |
<xsl:apply-templates /> | |
##-- ENDDIALOGUE | |
</xsl:template> | |
<xsl:template match="SAQ"> | |
##-- SAQ | |
<xsl:apply-templates /> | |
##-- ENDSAQ | |
</xsl:template> | |
<xsl:template match="ITQ"> | |
##-- ITQ | |
<xsl:apply-templates /> | |
##-- ENDITQ | |
</xsl:template> | |
<xsl:template match="KeyPoints"> | |
##-- KEYPOINTS | |
<xsl:apply-templates /> | |
##-- ENDKEYPOINTS | |
</xsl:template> | |
<xsl:template match="Summary"> | |
##-- SUMMARY | |
<xsl:apply-templates /> | |
##-- ENDSUMMARY | |
</xsl:template> | |
<xsl:template match="Reading"> | |
##-- READING | |
<xsl:apply-templates /> | |
##-- ENDREADING | |
</xsl:template> | |
<xsl:template match="Example"> | |
##-- EXAMPLE | |
<xsl:apply-templates /> | |
##-- ENDEXAMPLE | |
</xsl:template> | |
<xsl:template match="Verse"> | |
##-- VERSE | |
<xsl:apply-templates /> | |
##-- ENDVERSE | |
</xsl:template> | |
<xsl:template match="StudyNote"> | |
<div style='background:lightgreen'> | |
<xsl:apply-templates /> | |
</div> | |
</xsl:template> | |
<!-- This is here as a warning / catch all for any missed heading types --> | |
<xsl:template match="Heading"> | |
<h1> | |
<xsl:value-of select="." /> | |
</h1> | |
</xsl:template> | |
<!-- how do we handle this? --> | |
<xsl:template match="CrossRef"> | |
<a href="{idref}"> | |
<xsl:value-of select="." /> | |
</a> | |
</xsl:template> | |
<xsl:template match="TeX"> | |
<xsl:text>$$</xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>$$</xsl:text> | |
</xsl:template> | |
</xsl:stylesheet> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Read in xslt file | |
with open('ouxml2md.xslt','r') as f: | |
xslt = f.read() | |
import lxml.html | |
from lxml import etree | |
xslt_doc = etree.fromstring(xslt) | |
xslt_transformer = etree.XSLT(xslt_doc) | |
#Path to an OU XML file on OpenLearn | |
openlearn_xml_url='https://www.open.edu/openlearn/science-maths-technology/learn-code-data-analysis/altformat-ouxml' | |
#Get XML | |
import requests | |
dummy_xml = requests.get(openlearn_xml_url).content | |
#Conversion | |
output_path_stub='test_' #path/filenameprefix for generated output files | |
source_doc = etree.fromstring(dummy_xml) | |
output_doc = xslt_transformer(source_doc, filestub=etree.XSLT.strparam(output_path_stub)) | |
#Output markdown files are also saved... I'm not sure what, if any, use output_doc is... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment