Skip to content

Instantly share code, notes, and snippets.

@mzjn
Created December 22, 2011 17:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mzjn/1511035 to your computer and use it in GitHub Desktop.
Save mzjn/1511035 to your computer and use it in GitHub Desktop.
Jython implementation of the DocBook XSLT 2 'highlight' extension for Saxon 9.4 or later
from os.path import exists
from array import array
from java.io import StringReader, File
from javax.xml.transform.sax import SAXSource
from javax.xml.transform.stream import StreamSource
from org.xml.sax import InputSource
from net.sf.saxon.s9api import (QName, Processor, Serializer, ItemType, OccurrenceIndicator,
XdmAtomicValue, ExtensionFunction, SequenceType)
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.formatters import HtmlFormatter, get_formatter_by_name
EXT_NS = "http://docbook.org/extensions/xslt20"
XHTML_NS = "http://www.w3.org/1999/xhtml"
FO_NS = "http://www.w3.org/1999/XSL/Format"
qName = QName("", EXT_NS, "highlight")
h_pre = QName("", XHTML_NS, "pre")
f_wrapper = QName("", FO_NS, "wrapper")
cssfile = "highlight.css"
# ExtensionFunction is new in Saxon 9.4
class Pygmenter(ExtensionFunction):
def getName(self):
return qName
def getArgumentTypes(self):
s1 = SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
s2 = SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
return array(SequenceType, [s1, s2])
def getResultType(self):
return SequenceType.makeSequenceType(ItemType.ANY_NODE,
OccurrenceIndicator.ONE_OR_MORE)
def call(self, arguments):
code = arguments[0].toString()
language = ""
if len(arguments) > 1:
language = arguments[1].toString()
pygmenter = DocBookPygmenter()
if fo:
pygmenter.setFormatter("fo")
result = pygmenter.format(code, language)
processor = Processor(True)
builder = processor.newDocumentBuilder()
try:
source = SAXSource(InputSource(StringReader(result)))
xdmnode = builder.build(source) # net.sf.saxon.s9api.XdmNode
except Exception, sae:
# I don't ever expect this to happen
raise RuntimeError(sae)
return xdmnode
class DocBookPygmenter(object):
def __init__(self, fmt="html"):
self.formatname = fmt
def setFormatter(self, name):
self.formatname = name
def format(self, code, language):
if language == "":
lexer = guess_lexer(code)
else:
lexer = get_lexer_by_name(language)
if self.formatname == "fo":
formatter = FoFormatter()
else:
formatter = HtmlFormatter(linenos=False)
result = highlight(code, lexer, formatter)
if self.formatname == "html":
result = "<div xmlns='%s'>%s</div>" % (XHTML_NS, result)
return result
def make_css():
css = HtmlFormatter().get_style_defs()
f = open(cssfile, "w")
f.write(css)
f.close()
def transform(xml, xsl, out):
proc = Processor(True)
proc.registerExtensionFunction(Pygmenter())
comp = proc.newXsltCompiler()
source = proc.newDocumentBuilder().build(StreamSource(File(xml)))
compiled = comp.compile(StreamSource(File(xsl)))
result = proc.newSerializer(File(out))
trans = compiled.load()
trans.setInitialContextNode(source)
trans.setDestination(result)
trans.setParameter(QName("docbook.css"), XdmAtomicValue(cssfile))
trans.transform()
if not fo and not exists(cssfile):
make_css()
if __name__ == '__main__':
xml = "verbatim.xml"
xsl = "../xslt/base/html/docbook.xsl"
out = xml.replace("xml", "html")
fo = True
if fo:
# Register XSL-FO formatter
from pygments.formatters import _mapping
from xslfo import FoFormatter
_mapping.FORMATTERS[FoFormatter] = ('XSL Formatting Objects',
('fo', 'FO', 'XSL-FO'), ('*.fo',),
'Format tokens as fo:inline elements.')
xsl = xsl.replace("html", "fo")
out = xml.replace("xml", "fo")
transform(xml, xsl, out)
@mzjn
Copy link
Author

mzjn commented Dec 22, 2011

This is a variant of pygmenter.py that uses the new ExtensionFunction class in Saxon 9.4.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment