Skip to content

Instantly share code, notes, and snippets.

@alexanderankin
Last active October 13, 2018 14:10
Show Gist options
  • Save alexanderankin/d59c999f1dbde5b1439b54958d79e8ca to your computer and use it in GitHub Desktop.
Save alexanderankin/d59c999f1dbde5b1439b54958d79e8ca to your computer and use it in GitHub Desktop.
fpdf2 Bookmarks Feature Draft Specification
from collections import OrderedDict as o_dict
from fpdf import FPDF
from fpdf.util import textstring
def create_dictionary_string(dict_):
"""format ordered dictionary as PDF dictionary"""
return ''.join([
'<<',
'\n'.join([' '.join(f) for f in dict_.items()]),
' >>'
])
def create_list_string(list_):
"""format list of strings as PDF array"""
return '[' + ' '.join(list_) + ']'
def iobj_ref(n):
"""format an indirect PDF Object reference from its id number"""
return str(n) + ' 0 R'
class Bookmark(FPDF):
def __init__(self, orientation='P', unit='mm', format='A4'):
super(Bookmark, self).__init__(orientation, unit, format)
self.outlines = []
self.outline_root_obj_reference = None
self.current_page_number = 0
def bookmark(self, txt, level=0, y=0):
if y == -1:
y = self.get_y()
self.outlines.append({
't': txt, 'level': level, 'y': y,
'p': self.current_page_number
})
def add_page(self, *a, **k):
if not self.current_page_number:
self.current_page_number = 1
else:
self.current_page_number += 1
super(Bookmark, self).add_page(*a, **k)
def _putbookmarks(self):
"""Print Bookmark tags into the resources section.
If the accumulated list of bookmarks is empty, this is a no-op.
First, some assembly required in order to organize a flat list into a
tree. Then, after all nested indirect references are in place, make and
dictionary for each bookmark, and make and insert the dictionary for
the document outline.
"""
if not self.outlines:
return
lru = {}
last_level = 0
for index, outline_el in enumerate(self.outlines):
if outline_el['level'] > 0:
# Set parent and last pointers
parent = lru[outline_el['level'] - 1]
outline_el['parent'] = parent
self.outlines[parent]['last'] = index
if outline_el['level'] > last_level:
# Level increasing: set first pointer
self.outlines[parent]['first'] = index
else:
outline_el['parent'] = len(self.outlines)
if outline_el['level'] <= last_level and index > 0:
# Set prev and next pointers
prev = lru[outline_el['level']]
self.outlines[prev]['next'] = index
outline_el['prev'] = prev
lru[outline_el['level']] = index
last_level = outline_el['level']
# Outline items
n = self.n + 1
for index, outline_el in enumerate(self.outlines):
elem_fields = o_dict()
elem_fields['/Title'] = textstring(outline_el['t'])
elem_fields['/Parent'] = iobj_ref(n + outline_el['parent'])
for opt in ['prev', 'next', 'first', 'last']:
if opt in outline_el:
tag = '/' + opt.capitalize()
elem_fields[tag] = iobj_ref(n + outline_el[opt])
# page 582 PDF Reference 1.7
elem_fields['/Dest'] = create_list_string([
# page object reference
iobj_ref(1 + 2 * outline_el['p']),
'/XYZ', # vs /Fit, /FitH, /FitV, /FitR, etc...
# left, top
'0', '%.2f' % ((self.h - outline_el['y']) * self.k),
'null' # zoom
])
elem_fields['/Count'] = '0'
self._newobj()
self._out(create_dictionary_string(elem_fields))
self._out('endobj')
# Outline object (page 585 PDF Reference 1.7)
outlines_dictionary = o_dict()
outlines_dictionary['/Type'] = '/Outlines'
outlines_dictionary['/First'] = iobj_ref(n)
outlines_dictionary['/Last'] = iobj_ref(n + lru[0])
self._newobj()
outlines_string = create_dictionary_string(outlines_dictionary)
# writing three lines of deletable code is easier than windows &
# re-generating test hashes.
outlines_mutable_string = bytearray(outlines_string, 'ascii')
outlines_mutable_string[17] = ord(' ')
outlines_string = outlines_mutable_string.decode('ascii')
self._out(outlines_string)
self._out('endobj')
# Saved for Catalog Dictionary
self.outline_root_obj_reference = self.n
def _putresources(self):
super(Bookmark, self)._putresources()
self._putbookmarks()
def _putcatalog(self):
super(Bookmark, self)._putcatalog()
if self.outlines:
self._out('/Outlines ' + iobj_ref(self.outline_root_obj_reference))
self._out('/PageMode /UseOutlines')
if __name__ == '__main__':
pass
import bookmark
import os
from test_utils import (
set_doc_date_0, calculate_hash_of_file
)
def main_1():
doc = bookmark.Bookmark()
set_doc_date_0(doc)
doc.set_font('Arial', size=12)
doc.add_page()
doc.cell(w = 0, txt = 'Hello World')
doc.bookmark(txt='hello world')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world2')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world3')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world4')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world5')
outfile = "523061d10e720fc353e1c2899558de4e.pdf.test"
doc.output(outfile)
good = "523061d10e720fc353e1c2899558de4e"
assert(good == calculate_hash_of_file(outfile))
os.unlink(outfile)
def main_2():
doc = bookmark.Bookmark()
set_doc_date_0(doc)
doc.set_font('Arial', size=12)
doc.add_page()
doc.cell(w = 0, txt = 'Hello World')
doc.bookmark(txt='hello world')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world2')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world3', level=1)
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world4')
doc.add_page()
doc.cell(w = 0, txt = 'Hello World2')
doc.bookmark(txt='hello world5')
outfile = "bc3db4ad8dd3c80944f38e5eaae52883.pdf.test"
doc.output(outfile)
good = "bc3db4ad8dd3c80944f38e5eaae52883"
# print(calculate_hash_of_file(outfile))
assert(good == calculate_hash_of_file(outfile))
os.unlink(outfile)
if __name__ == '__main__':
main_1()
main_2()
import inspect
import sys
import os
import hashlib
import datetime
def set_doc_date_0(doc):
"""Sets the document date to unix epoch start."""
# 1969-12-31 19:00:00
time_tuple = (1969, 12, 31, 19, 00, 00)
zero = datetime.datetime(*time_tuple)
doc.set_creation_date(zero)
def calculate_hash_of_file(full_path):
"""Finds md5 hash of a file given an abs path, reading in whole file."""
with open(full_path, 'rb') as file:
data = file.read()
return hashlib.md5(data).hexdigest()
def relative_path_to(place):
"""Finds Relative Path to a place
Works by getting the file of the caller module, then joining the directory
of that absolute path and the place in the argument.
"""
caller_file = inspect.getfile(sys._getframe(1))
return os.path.join(os.path.dirname(os.path.abspath(caller_file)), place)
@alexanderankin
Copy link
Author

usage:

  • pip install fpdf2
  • git clone https://gist.github.com/d59c999f1dbde5b1439b54958d79e8ca.git && cd d59c999f1dbde5b1439b54958d79e8ca && python test.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment