Skip to content

Instantly share code, notes, and snippets.

@mzjn

mzjn/xslfo.py Secret

Created November 20, 2011 15:52
Show Gist options
  • Save mzjn/89fe2c4685f71e07e941 to your computer and use it in GitHub Desktop.
Save mzjn/89fe2c4685f71e07e941 to your computer and use it in GitHub Desktop.
Pygments XSL-FO formatter
# -*- coding: utf-8 -*-
"""
pygments.formatters.xslfo
~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for XSL Formatting Objects output.
:copyright: 2011
:license: BSD, see LICENSE for more details.
"""
import StringIO, sys
from pygments.formatter import Formatter
from pygments.util import get_bool_opt, get_int_opt
__all__ = ['FoFormatter']
FO_NS = "http://www.w3.org/1999/XSL/Format"
def escape_html(text):
"""Escape &, <, > as well as single and double quotes for HTML."""
return text.replace('&', '&amp;'). \
replace('<', '&lt;'). \
replace('>', '&gt;'). \
replace('"', '&quot;'). \
replace("'", '&#39;')
fo_start = """<fo:root xmlns:fo='%s'>
<fo:layout-master-set>
<fo:simple-page-master master-name='my-page'>
<fo:region-body margin='5mm'/>
</fo:simple-page-master>
</fo:layout-master-set>
<fo:page-sequence master-reference='my-page'>
<fo:flow flow-name='xsl-region-body'>
<fo:block
hyphenate='false' font-family='monospace' font-size='9pt'
font-selection-strategy='character-by-character' text-align='start'
wrap-option='wrap' hyphenation-character='#x21B5;'
white-space-collapse='false' white-space-treatment='preserve'
linefeed-treatment='preserve'>""" % FO_NS
fo_end = """</fo:block>
</fo:flow>
</fo:page-sequence>
</fo:root>"""
class FoFormatter(Formatter):
"""
Format tokens as ``fo:inline`` elements wrapped in ``fo:wrapper``.
With the `full` option, a complete document is output, ready
for rendering by an XSL-FO processor.
"""
name = 'FO'
aliases = ['fo', 'xslfo']
filenames = ['*.fo']
def __init__(self, **options):
Formatter.__init__(self, **options)
self.full = options.get('full', False)
self.linenos = options.get('linenos', False)
def format(self, tokensource, outfile):
"""
Format ``tokensource``, an iterable of ``(tokentype, tokenstring)``
tuples and write it into ``outfile``.
"""
enc = self.encoding
if self.full:
outfile.write(fo_start)
outfile.write('<fo:wrapper xmlns:fo="%s">' % FO_NS)
for ttype, value in tokensource:
if enc:
value = value.encode(enc)
style = self._get_style(ttype)
inline = style and "<fo:inline %s>" % style or ''
inlineend = inline and '</fo:inline>' or ''
value = escape_html(value)
parts = value.split('\n')
# Hack to get line breaks working
if len(value) == 1 and ord(value[0]) == 10:
parts = ["\n"]
for num, part in enumerate(parts[:-1]):
outfile.write(inline + part + inlineend)
outfile.write(inline + parts[-1] + inlineend)
outfile.write('</fo:wrapper>')
sys.stdout.flush()
if self.full:
outfile.write(fo_end)
sys.stdout.flush() # Seems to be needed in Jython. Why?
def _get_style(self, tokentype):
otokentype = tokentype
while not self.style.styles_token(tokentype):
tokentype = tokentype.parent
value = self.style.style_for_token(tokentype)
result = ''
if value['color']:
result = ' color="#' + value['color'] + '"'
if value['bold']:
result += ' font-weight="bold"'
if value['italic']:
result += ' font-style="italic"'
return result
@mzjn
Copy link
Author

mzjn commented Nov 20, 2011

Currently, Pygments does not provide a XSL-FO formatter. This is a crude prototype of such a formatter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment