Skip to content

Instantly share code, notes, and snippets.

@WaYdotNET
Forked from nathandem/pdf.py
Created June 11, 2019 19:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save WaYdotNET/b73aeb2dd50a9e226407aa2898d11d13 to your computer and use it in GitHub Desktop.
Save WaYdotNET/b73aeb2dd50a9e226407aa2898d11d13 to your computer and use it in GitHub Desktop.
Arbitrary header/footer integration in Weasyprint PDF pages
from weasyprint import HTML, CSS
class PdfGenerator:
"""
Generate a PDF out of a rendered template, with the possibility to integrate nicely
a header and a footer if provided.
Notes:
------
- When Weasyprint renders an html into a PDF, it goes though several intermediate steps.
Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page`
or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive
method `get_element` for example.
For more, see:
https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source
https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure
- Warning: the logic of this class relies heavily on the internal Weasyprint API.
- This generator draws its inspiration and, also a bit of its implementation, from this
discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92
"""
OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}'
def __init__(self, main_html, header_html=None, footer_html=None,
base_url=None, side_margin=2, extra_vertical_margin=30):
"""
Parameters
----------
main_html: str
An HTML file (most of the time a template rendered into a string) which represents
the core of the PDF to generate.
header_html: str
An optional header html.
footer_html: str
An optional footer html.
base_url: str
An absolute url to the page which serves as a reference to Weasyprint to fetch assets,
required to get our media.
side_margin: int, interpreted in cm, by default 2cm
The margin to apply on the core of the rendered PDF (i.e. main_html).
extra_vertical_margin: int, interpreted in pixel, by default 30 pixels
An extra margin to apply between the main content and header and the footer.
The goal is to avoid having the content of `main_html` touching the header or the
footer.
"""
self.main_html = main_html
self.header_html = header_html
self.footer_html = footer_html
self.base_url = base_url
self.side_margin = side_margin
self.extra_vertical_margin = extra_vertical_margin
def _compute_overlay_element(self, element: str):
"""
Parameters
----------
element: str
Either 'header' or 'footer'
Returns
-------
element_body: BlockBox
A Weasyprint pre-rendered representation of an html element
element_height: float
The height of this element, which will be then translated in a html height
"""
html = HTML(
string=getattr(self, f'{element}_html'),
base_url=self.base_url,
)
element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)])
element_page = element_doc.pages[0]
element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body')
element_body = element_body.copy_with_children(element_body.all_children())
element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element)
if element == 'header':
element_height = element_html.height
if element == 'footer':
element_height = element_page.height - element_html.position_y
return element_body, element_height
def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None):
"""
Insert the header and the footer in the main document.
Parameters
----------
main_doc: Document
The top level representation for a PDF page in Weasyprint.
header_body: BlockBox
A representation for an html element in Weasyprint.
footer_body: BlockBox
A representation for an html element in Weasyprint.
"""
for page in main_doc.pages:
page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body')
if header_body:
page_body.children += header_body.all_children()
if footer_body:
page_body.children += footer_body.all_children()
def render_pdf(self):
"""
Returns
-------
pdf: a bytes sequence
The rendered PDF.
"""
if self.header_html:
header_body, header_height = self._compute_overlay_element('header')
else:
header_body, header_height = None, 0
if self.footer_html:
footer_body, footer_height = self._compute_overlay_element('footer')
else:
footer_body, footer_height = None, 0
margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format(
header_size=header_height + self.extra_vertical_margin,
footer_size=footer_height + self.extra_vertical_margin,
side_margin=f'{self.side_margin}cm',
)
content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins
html = HTML(
string=self.main_html,
base_url=self.base_url,
)
main_doc = html.render(stylesheets=[CSS(string=content_print_layout)])
if self.header_html or self.footer_html:
self._apply_overlay_on_main(main_doc, header_body, footer_body)
pdf = main_doc.write_pdf()
return pdf
@staticmethod
def get_element(boxes, element):
"""
Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the
box which is named `element`.
Look at the notes of the class for more details on Weasyprint insides.
"""
for box in boxes:
if box.element_tag == element:
return box
return PdfGenerator.get_element(box.all_children(), element)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment