WaYdotNET/pdf.py

## pdf.py
from weasyprint import HTML, CSS


class PdfGenerator:
    """
    Generate a PDF out of a rendered template, with the possibility to integrate nicely
    a header and a footer if provided.

    Notes:
    ------
    - When Weasyprint renders an html into a PDF, it goes though several intermediate steps.
      Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page`
      or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive
      method `get_element` for example.
      For more, see:
      https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source
      https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure
    - Warning: the logic of this class relies heavily on the internal Weasyprint API.
    - This generator draws its inspiration and, also a bit of its implementation, from this
      discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92
    """
    OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}'

    def __init__(self, main_html, header_html=None, footer_html=None,
                 base_url=None, side_margin=2, extra_vertical_margin=30):
        """
        Parameters
        ----------
        main_html: str
            An HTML file (most of the time a template rendered into a string) which represents
            the core of the PDF to generate.
        header_html: str
            An optional header html.
        footer_html: str
            An optional footer html.
        base_url: str
            An absolute url to the page which serves as a reference to Weasyprint to fetch assets,
            required to get our media.
        side_margin: int, interpreted in cm, by default 2cm
            The margin to apply on the core of the rendered PDF (i.e. main_html).
        extra_vertical_margin: int, interpreted in pixel, by default 30 pixels
            An extra margin to apply between the main content and header and the footer.
            The goal is to avoid having the content of `main_html` touching the header or the
            footer.
        """
        self.main_html = main_html
        self.header_html = header_html
        self.footer_html = footer_html
        self.base_url = base_url
        self.side_margin = side_margin
        self.extra_vertical_margin = extra_vertical_margin

    def _compute_overlay_element(self, element: str):
        """
        Parameters
        ----------
        element: str
            Either 'header' or 'footer'

        Returns
        -------
        element_body: BlockBox
            A Weasyprint pre-rendered representation of an html element
        element_height: float
            The height of this element, which will be then translated in a html height
        """
        html = HTML(
            string=getattr(self, f'{element}_html'),
            base_url=self.base_url,
        )
        element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)])
        element_page = element_doc.pages[0]
        element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body')
        element_body = element_body.copy_with_children(element_body.all_children())
        element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element)

        if element == 'header':
            element_height = element_html.height
        if element == 'footer':
            element_height = element_page.height - element_html.position_y

        return element_body, element_height

    def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None):
        """
        Insert the header and the footer in the main document.

        Parameters
        ----------
        main_doc: Document
            The top level representation for a PDF page in Weasyprint.
        header_body: BlockBox
            A representation for an html element in Weasyprint.
        footer_body: BlockBox
            A representation for an html element in Weasyprint.
        """
        for page in main_doc.pages:
            page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body')

            if header_body:
                page_body.children += header_body.all_children()
            if footer_body:
                page_body.children += footer_body.all_children()

    def render_pdf(self):
        """
        Returns
        -------
        pdf: a bytes sequence
            The rendered PDF.
        """
        if self.header_html:
            header_body, header_height = self._compute_overlay_element('header')
        else:
            header_body, header_height = None, 0
        if self.footer_html:
            footer_body, footer_height = self._compute_overlay_element('footer')
        else:
            footer_body, footer_height = None, 0

        margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format(
            header_size=header_height + self.extra_vertical_margin,
            footer_size=footer_height + self.extra_vertical_margin,
            side_margin=f'{self.side_margin}cm',
        )
        content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins

        html = HTML(
            string=self.main_html,
            base_url=self.base_url,
        )
        main_doc = html.render(stylesheets=[CSS(string=content_print_layout)])

        if self.header_html or self.footer_html:
            self._apply_overlay_on_main(main_doc, header_body, footer_body)
        pdf = main_doc.write_pdf()

        return pdf

    @staticmethod
    def get_element(boxes, element):
        """
        Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the
        box which is named `element`.

        Look at the notes of the class for more details on Weasyprint insides.
        """
        for box in boxes:
            if box.element_tag == element:
                return box
            return PdfGenerator.get_element(box.all_children(), element)
	from weasyprint import HTML, CSS


	class PdfGenerator:
	"""
	Generate a PDF out of a rendered template, with the possibility to integrate nicely
	a header and a footer if provided.

	Notes:
	------
	- When Weasyprint renders an html into a PDF, it goes though several intermediate steps.
	Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page`
	or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive
	method `get_element` for example.
	For more, see:
	https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source
	https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure
	- Warning: the logic of this class relies heavily on the internal Weasyprint API.
	- This generator draws its inspiration and, also a bit of its implementation, from this
	discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92
	"""
	OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}'

	def __init__(self, main_html, header_html=None, footer_html=None,
	base_url=None, side_margin=2, extra_vertical_margin=30):
	"""
	Parameters
	----------
	main_html: str
	An HTML file (most of the time a template rendered into a string) which represents
	the core of the PDF to generate.
	header_html: str
	An optional header html.
	footer_html: str
	An optional footer html.
	base_url: str
	An absolute url to the page which serves as a reference to Weasyprint to fetch assets,
	required to get our media.
	side_margin: int, interpreted in cm, by default 2cm
	The margin to apply on the core of the rendered PDF (i.e. main_html).
	extra_vertical_margin: int, interpreted in pixel, by default 30 pixels
	An extra margin to apply between the main content and header and the footer.
	The goal is to avoid having the content of `main_html` touching the header or the
	footer.
	"""
	self.main_html = main_html
	self.header_html = header_html
	self.footer_html = footer_html
	self.base_url = base_url
	self.side_margin = side_margin
	self.extra_vertical_margin = extra_vertical_margin

	def _compute_overlay_element(self, element: str):
	"""
	Parameters
	----------
	element: str
	Either 'header' or 'footer'

	Returns
	-------
	element_body: BlockBox
	A Weasyprint pre-rendered representation of an html element
	element_height: float
	The height of this element, which will be then translated in a html height
	"""
	html = HTML(
	string=getattr(self, f'{element}_html'),
	base_url=self.base_url,
	)
	element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)])
	element_page = element_doc.pages[0]
	element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body')
	element_body = element_body.copy_with_children(element_body.all_children())
	element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element)

	if element == 'header':
	element_height = element_html.height
	if element == 'footer':
	element_height = element_page.height - element_html.position_y

	return element_body, element_height

	def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None):
	"""
	Insert the header and the footer in the main document.

	Parameters
	----------
	main_doc: Document
	The top level representation for a PDF page in Weasyprint.
	header_body: BlockBox
	A representation for an html element in Weasyprint.
	footer_body: BlockBox
	A representation for an html element in Weasyprint.
	"""
	for page in main_doc.pages:
	page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body')

	if header_body:
	page_body.children += header_body.all_children()
	if footer_body:
	page_body.children += footer_body.all_children()

	def render_pdf(self):
	"""
	Returns
	-------
	pdf: a bytes sequence
	The rendered PDF.
	"""
	if self.header_html:
	header_body, header_height = self._compute_overlay_element('header')
	else:
	header_body, header_height = None, 0
	if self.footer_html:
	footer_body, footer_height = self._compute_overlay_element('footer')
	else:
	footer_body, footer_height = None, 0

	margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format(
	header_size=header_height + self.extra_vertical_margin,
	footer_size=footer_height + self.extra_vertical_margin,
	side_margin=f'{self.side_margin}cm',
	)
	content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins

	html = HTML(
	string=self.main_html,
	base_url=self.base_url,
	)
	main_doc = html.render(stylesheets=[CSS(string=content_print_layout)])

	if self.header_html or self.footer_html:
	self._apply_overlay_on_main(main_doc, header_body, footer_body)
	pdf = main_doc.write_pdf()

	return pdf

	@staticmethod
	def get_element(boxes, element):
	"""
	Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the
	box which is named `element`.

	Look at the notes of the class for more details on Weasyprint insides.
	"""
	for box in boxes:
	if box.element_tag == element:
	return box
	return PdfGenerator.get_element(box.all_children(), element)