scanny/rst_renderer.py

## rst_renderer.py
# encoding: utf-8

"""
Helper objects for rendering to .docx format.
"""

from __future__ import (
    absolute_import, division, print_function, unicode_literals
)

from docutils import core
from lxml import etree


class RstRenderer(object):
    """
    Service class that knows how to render a RestructuredText string to
    a python-docx Document object.
    """
    def __init__(self, blkcntnr, rst, style_overrides={}):
        self._blkcntnr = blkcntnr
        self._rst = rst
        self._style_overrides = style_overrides

    def render(self):
        """
        Parse the RestructuredText in *rst* and render it into *blkcntnr* as
        paragraphs, bullets, etc., including recognizing and rendering bold
        and italic runs within block elements.
        """
        self._render_container(self._rst_etree)

    @property
    def _styles(self):
        """
        The dict providing lookup for style names for this RST document.
        """
        if not hasattr(self, '_styles_'):
            self._styles_ = {
                'h1': 'Heading 1',
                'p':  'Body Text',
                'li': 'List Bullet',
                'lc': 'List Continue',
                'b':  'Strong',
                'i':  'Emphasis',
            }
            self._styles_.update(self._style_overrides)
        return self._styles_

    def _render_container(self, container):
        """
        Render each element in *container* in turn.
        """
        for element in container:
            tag = element.tag
            if tag == 'section':
                self._render_container(element)
            elif tag == 'title':
                self._render_paragraph(element, self._styles['h1'])
            elif tag == 'paragraph':
                self._render_paragraph(element, self._styles['p'])
            elif tag == 'bullet_list':
                self._render_bullet_list(element)
            else:
                raise NotImplementedError('unrecognized tag %s' % tag)

    @property
    def _rst_etree(self):
        """
        Return the root element of a RestructuredText XML document produced by
        converting *rst* to XML and then parsing that XML using lxml.
        """
        def normalize_whitespace(elm):
            if elm.text is not None:
                elm.text = elm.text.replace('\n', ' ')
            for child in elm:
                normalize_whitespace(child)
            if elm.tail is not None:
                elm.tail = elm.tail.replace('\n', ' ')

        root_element = etree.fromstring(self._rst_xml)
        normalize_whitespace(root_element)
        # ----
        # with open('_scratch/rst2etree.xml', 'w') as f:
        #     f.write(etree.tostring(root_element))
        # ----
        return root_element

    @property
    def _rst_xml(self):
        """
        Bytes containing XML corresponding to the RestructuredText in *rst*.
        The XML vocabulary is a simple one using tags like `paragraph` and
        `strong`.
        """
        if self._rst is None:
            return '<document/>'
        return core.publish_string(source=self._rst, writer_name='xml')

    def _render_bullet_list(self, bullet_list):
        """
        Add a bullet to *blkcntnr* for each list item in *bullet_list*.
        """
        def render_list_item(list_item):
            for idx, para in enumerate(list_item):
                style_key = 'li' if idx == 0 else 'lc'
                self._render_paragraph(para, self._styles[style_key])

        for list_item in bullet_list:
            render_list_item(list_item)

    def _render_paragraph(self, para, style):
        """
        Add a new paragraph to *blkcntnr* containing the content in the
        `paragraph` element *para*. Create appropriate runs for text having
        strong and emphasis inline formatting.
        """
        paragraph = self._blkcntnr.add_paragraph(style=style)
        if para.text is not None:
            paragraph.add_run(para.text)
        for child in para:
            style_key = {'strong': 'b', 'emphasis': 'i'}.get(child.tag)
            if child.text is not None:
                paragraph.add_run(child.text, self._styles[style_key])
            if child.tail is not None:
                paragraph.add_run(child.tail)
	# encoding: utf-8

	"""
	Helper objects for rendering to .docx format.
	"""

	from __future__ import (
	absolute_import, division, print_function, unicode_literals
	)

	from docutils import core
	from lxml import etree


	class RstRenderer(object):
	"""
	Service class that knows how to render a RestructuredText string to
	a python-docx Document object.
	"""
	def __init__(self, blkcntnr, rst, style_overrides={}):
	self._blkcntnr = blkcntnr
	self._rst = rst
	self._style_overrides = style_overrides

	def render(self):
	"""
	Parse the RestructuredText in rst and render it into blkcntnr as
	paragraphs, bullets, etc., including recognizing and rendering bold
	and italic runs within block elements.
	"""
	self._render_container(self._rst_etree)

	@property
	def _styles(self):
	"""
	The dict providing lookup for style names for this RST document.
	"""
	if not hasattr(self, '_styles_'):
	self._styles_ = {
	'h1': 'Heading 1',
	'p': 'Body Text',
	'li': 'List Bullet',
	'lc': 'List Continue',
	'b': 'Strong',
	'i': 'Emphasis',
	}
	self._styles_.update(self._style_overrides)
	return self._styles_

	def _render_container(self, container):
	"""
	Render each element in container in turn.
	"""
	for element in container:
	tag = element.tag
	if tag == 'section':
	self._render_container(element)
	elif tag == 'title':
	self._render_paragraph(element, self._styles['h1'])
	elif tag == 'paragraph':
	self._render_paragraph(element, self._styles['p'])
	elif tag == 'bullet_list':
	self._render_bullet_list(element)
	else:
	raise NotImplementedError('unrecognized tag %s' % tag)

	@property
	def _rst_etree(self):
	"""
	Return the root element of a RestructuredText XML document produced by
	converting rst to XML and then parsing that XML using lxml.
	"""
	def normalize_whitespace(elm):
	if elm.text is not None:
	elm.text = elm.text.replace('\n', ' ')
	for child in elm:
	normalize_whitespace(child)
	if elm.tail is not None:
	elm.tail = elm.tail.replace('\n', ' ')

	root_element = etree.fromstring(self._rst_xml)
	normalize_whitespace(root_element)
	# ----
	# with open('_scratch/rst2etree.xml', 'w') as f:
	# f.write(etree.tostring(root_element))
	# ----
	return root_element

	@property
	def _rst_xml(self):
	"""
	Bytes containing XML corresponding to the RestructuredText in rst.
	The XML vocabulary is a simple one using tags like `paragraph` and
	`strong`.
	"""
	if self._rst is None:
	return '<document/>'
	return core.publish_string(source=self._rst, writer_name='xml')

	def _render_bullet_list(self, bullet_list):
	"""
	Add a bullet to blkcntnr for each list item in bullet_list.
	"""
	def render_list_item(list_item):
	for idx, para in enumerate(list_item):
	style_key = 'li' if idx == 0 else 'lc'
	self._render_paragraph(para, self._styles[style_key])

	for list_item in bullet_list:
	render_list_item(list_item)

	def _render_paragraph(self, para, style):
	"""
	Add a new paragraph to blkcntnr containing the content in the
	`paragraph` element para. Create appropriate runs for text having
	strong and emphasis inline formatting.
	"""
	paragraph = self._blkcntnr.add_paragraph(style=style)
	if para.text is not None:
	paragraph.add_run(para.text)
	for child in para:
	style_key = {'strong': 'b', 'emphasis': 'i'}.get(child.tag)
	if child.text is not None:
	paragraph.add_run(child.text, self._styles[style_key])
	if child.tail is not None:
	paragraph.add_run(child.tail)