-
-
Save mzjn/9ba641b40af20b13fbfa to your computer and use it in GitHub Desktop.
Jython implementation of the DocBook XSLT 2 'highlight' extension for Saxon 9.3 or later
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import exists | |
from array import array | |
from java.io import StringReader, File | |
from javax.xml.transform.sax import SAXSource | |
from javax.xml.transform.stream import StreamSource | |
from org.xml.sax import InputSource | |
from net.sf.saxon.lib import ExtensionFunctionCall, ExtensionFunctionDefinition | |
from net.sf.saxon.om import StructuredQName | |
from net.sf.saxon.om import Axis as omAxis | |
from net.sf.saxon.value import SequenceType | |
from net.sf.saxon.s9api import Axis, QName, Processor, Serializer, XdmAtomicValue | |
from pygments import highlight | |
from pygments.lexers import get_lexer_by_name, guess_lexer | |
from pygments.formatters import HtmlFormatter, get_formatter_by_name | |
XHTML_NS = "http://www.w3.org/1999/xhtml" | |
FO_NS = "http://www.w3.org/1999/XSL/Format" | |
EXT_NS = "http://docbook.org/extensions/xslt20" | |
qName = StructuredQName("", EXT_NS, "highlight") | |
h_pre = QName("", XHTML_NS, "pre") | |
f_wrapper = QName("", FO_NS, "wrapper") | |
cssfile = "highlight.css" | |
class Pygmenter(ExtensionFunctionDefinition): | |
def getFunctionQName(self): | |
return qName | |
def getMinimumNumberOfArguments(self): | |
return 1 | |
def getMaximumNumberOfArguments(self): | |
return 2 | |
def getArgumentTypes(self): | |
return array(SequenceType, [SequenceType.ATOMIC_SEQUENCE]) | |
def getResultType(self, suppliedArgumentTypes): | |
return SequenceType.NODE_SEQUENCE | |
def makeCallExpression(self): | |
return HighlightCall() | |
class HighlightCall(ExtensionFunctionCall): | |
def call(self, arguments, context): | |
# With Saxon 9.3, 'code' is a unicode object | |
# With Saxon 9.4, 'code' is a net.sf.saxon.tree.util.FastStringBuffer | |
code = arguments[0].next().getPrimitiveStringValue() | |
# Need this with Saxon 9.4 to avoid | |
# AttributeError: 'FastStringBuffer' object has no attribute 'decode' | |
# which is raised in Pygment's lexer.py module. | |
if not isinstance(code, unicode): | |
code = code.toString() | |
language = "" | |
if len(arguments) > 1: | |
language = arguments[1].next().getPrimitiveStringValue() | |
pygmenter = DocBookPygmenter() | |
if fo: | |
pygmenter.setFormatter("fo") | |
result = pygmenter.format(code, language) | |
processor = Processor(context.getConfiguration()) | |
builder = processor.newDocumentBuilder() | |
try: | |
source = SAXSource(InputSource(StringReader(result))) | |
xdmnode = builder.build(source) | |
except Exception, sae: | |
# I don't ever expect this to happen | |
raise RuntimeError(sae) | |
elem = None | |
if fo: | |
iterator = xdmnode.axisIterator(Axis.DESCENDANT, f_wrapper) | |
else: | |
iterator = xdmnode.axisIterator(Axis.DESCENDANT, h_pre) # s9api.XdmSequenceIterator | |
while elem == None and iterator.hasNext(): | |
elem = iterator.next() # net.sf.saxon.s9api.XdmNode | |
un = elem.getUnderlyingNode() # net.sf.saxon.tree.tiny.TinyElementImpl | |
se = un.iterateAxis(omAxis.CHILD) # net.sf.saxon.tree.tiny.SiblingEnumeration | |
# (subclass of net.sf.saxon.om.SequenceIterator) | |
return se | |
class DocBookPygmenter(object): | |
def __init__(self, fmt="html"): | |
self.formatname = fmt | |
def setFormatter(self, name): | |
self.formatname = name | |
def format(self, code, language): | |
if language == "": | |
lexer = guess_lexer(code) | |
else: | |
lexer = get_lexer_by_name(language) | |
if self.formatname == "fo": | |
formatter = FoFormatter() | |
else: | |
formatter = HtmlFormatter(linenos=False) | |
result = highlight(code, lexer, formatter) | |
if self.formatname == "html": | |
result = "<div xmlns='%s'>%s</div>" % (XHTML_NS, result) | |
return result | |
def make_css(): | |
css = HtmlFormatter().get_style_defs() | |
f = open(cssfile, "w") | |
f.write(css) | |
f.close() | |
def transform(xml, xsl, out): | |
proc = Processor(True) | |
proc.registerExtensionFunction(Pygmenter()) | |
comp = proc.newXsltCompiler() | |
source = proc.newDocumentBuilder().build(StreamSource(File(xml))) | |
compiled = comp.compile(StreamSource(File(xsl))) | |
result = proc.newSerializer(File(out)) | |
trans = compiled.load() | |
trans.setInitialContextNode(source) | |
trans.setDestination(result) | |
trans.setParameter(QName("docbook.css"), XdmAtomicValue(cssfile)) | |
trans.transform() | |
if not fo and not exists(cssfile): | |
make_css() | |
if __name__ == '__main__': | |
xml = "verbatim.xml" | |
xsl = "../xslt/base/html/docbook.xsl" | |
out = xml.replace("xml", "html") | |
fo = True | |
if fo: | |
# Register XSL-FO formatter | |
from pygments.formatters import _mapping | |
from xslfo import FoFormatter | |
_mapping.FORMATTERS[FoFormatter] = ('XSL Formatting Objects', | |
('fo', 'FO', 'XSL-FO'), ('*.fo',), | |
'Format tokens as fo:inline elements.') | |
xsl = xsl.replace("html", "fo") | |
out = xml.replace("xml", "fo") | |
transform(xml, xsl, out) |
Updated with workaround for non-obvious difference between Saxon 9.3 and 9.4.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a "pure Jython" implementation of the DocBook XSLT 2 'highlight' extension for the Saxon XSLT processor. The inspiration came from this blog post: http://norman.walsh.name/2011/08/31/xsltPygments, which shows how Pygments can be called from Java via Jython.
For XSL-FO output, the formatter in https://gist.github.com/89fe2c4685f71e07e941 can be used.