Skip to content

Instantly share code, notes, and snippets.

@jvfiel

jvfiel/pdf.py Secret

Created December 14, 2016 10:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvfiel/6e7e3557332c350550908a536823461e to your computer and use it in GitHub Desktop.
Save jvfiel/6e7e3557332c350550908a536823461e to your computer and use it in GitHub Desktop.
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
# MIT License. See license.txt
from __future__ import unicode_literals
import pdfkit, os, frappe
from frappe.utils import scrub_urls
from frappe import _
from bs4 import BeautifulSoup
from pyPdf import PdfFileWriter, PdfFileReader
"""
After the pdf is created, we open the file and insert the side text image in every page.
pre-requisites:
pip install reportlab
pip install pypdf2
"""
def append_pdf_test(fname):
from reportlab.pdfgen import canvas
from PyPDF2 import PdfFileWriter, PdfFileReader
import getpass
# Create the watermark from an image
c = canvas.Canvas('watermark.pdf')
# Draw the image at x, y. I positioned the x,y to be where i like here
c.drawImage('/home/'+getpass.getuser()+'/frappe-bench/sites/development/public/files/Side.png', 0, 250)
c.save()
# Get the watermark file you just created
watermark = PdfFileReader(open("watermark.pdf", "rb"))
# Get our files ready
output_file = PdfFileWriter()
input_file = PdfFileReader(open(fname, "rb"))
#input_file = filedata
# Number of pages in input document
page_count = input_file.getNumPages()
# Go through all the input file pages to add a watermark to them
for page_number in range(page_count):
print "Watermarking page {} of {}".format(page_number, page_count)
# merge the watermark with the page
input_page = input_file.getPage(page_number)
input_page.mergePage(watermark.getPage(0))
# add page from input file to output document
output_file.addPage(input_page)
# finally, write "output" to document-output.pdf
fname = "/home/"++getpass.getuser()++"/document-output.pdf"
with open(fname, "wb") as outputStream:
output_file.write(outputStream)
return fname
def get_pdf(html, options=None, output=None):
html = scrub_urls(html)
html, options = prepare_options(html, options)
fname = os.path.join("/tmp", "frappe-pdf-{0}.pdf".format(frappe.generate_hash()))
try:
pdfkit.from_string(html, fname, options=options or {})
if output:
append_pdf(PdfFileReader(file(fname, "rb")), output)
else:
fname = append_pdf_test(fname)
with open(fname, "rb") as fileobj:
filedata = fileobj.read()
except IOError, e:
if ("ContentNotFoundError" in e.message
or "ContentOperationNotPermittedError" in e.message
or "UnknownContentError" in e.message
or "RemoteHostClosedError" in e.message):
# allow pdfs with missing images if file got created
fname = append_pdf_test(fname)
if os.path.exists(fname):
with open(fname, "rb") as fileobj:
filedata = fileobj.read()
else:
frappe.throw(_("PDF generation failed because of broken image links"))
else:
raise
finally:
cleanup(fname, options)
if output:
return output
return filedata
def append_pdf(input, output):
# Merging multiple pdf files
[output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)]
def prepare_options(html, options):
if not options:
options = {}
options.update({
'print-media-type': None,
'background': None,
'images': None,
'quiet': None,
# 'no-outline': None,
'encoding': "UTF-8",
# 'load-error-handling': 'ignore',
# defaults
'margin-right': '15mm',
'margin-left': '15mm',
})
html, html_options = read_options_from_html(html)
options.update(html_options or {})
# cookies
if frappe.session and frappe.session.sid:
options['cookie'] = [('sid', '{0}'.format(frappe.session.sid))]
# page size
if not options.get("page-size"):
options['page-size'] = frappe.db.get_single_value("Print Settings", "pdf_page_size") or "A4"
return html, options
def read_options_from_html(html):
options = {}
soup = BeautifulSoup(html, "html5lib")
# extract pdfkit options from html
for html_id in ("margin-top", "margin-bottom", "margin-left", "margin-right", "page-size"):
try:
tag = soup.find(id=html_id)
if tag and tag.contents:
options[html_id] = tag.contents
except:
pass
options.update(prepare_header_footer(soup))
toggle_visible_pdf(soup)
return soup.prettify(), options
def prepare_header_footer(soup):
options = {}
head = soup.find("head").contents
styles = soup.find_all("style")
bootstrap = frappe.read_file(os.path.join(frappe.local.sites_path, "assets/frappe/css/bootstrap.css"))
fontawesome = frappe.read_file(os.path.join(frappe.local.sites_path, "assets/frappe/css/font-awesome.css"))
# extract header and footer
for html_id in ("header-html", "footer-html"):
content = soup.find(id=html_id)
if content:
# there could be multiple instances of header-html/footer-html
for tag in soup.find_all(id=html_id):
tag.extract()
toggle_visible_pdf(content)
html = frappe.render_template("templates/print_formats/pdf_header_footer.html", {
"head": head,
"styles": styles,
"content": content,
"html_id": html_id,
"bootstrap": bootstrap,
"fontawesome": fontawesome
})
# create temp file
fname = os.path.join("/tmp", "frappe-pdf-{0}.html".format(frappe.generate_hash()))
with open(fname, "w") as f:
f.write(html.encode("utf-8"))
# {"header-html": "/tmp/frappe-pdf-random.html"}
options[html_id] = fname
else:
if html_id == "header-html":
options["margin-top"] = "15mm"
elif html_id == "footer-html":
options["margin-bottom"] = "15mm"
return options
def cleanup(fname, options):
if os.path.exists(fname):
os.remove(fname)
for key in ("header-html", "footer-html"):
if options.get(key) and os.path.exists(options[key]):
os.remove(options[key])
def toggle_visible_pdf(soup):
for tag in soup.find_all(attrs={"class": "visible-pdf"}):
# remove visible-pdf class to unhide
tag.attrs['class'].remove('visible-pdf')
for tag in soup.find_all(attrs={"class": "hidden-pdf"}):
# remove tag from html
tag.extract()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment