Skip to content

Instantly share code, notes, and snippets.

@s-leroux
Forked from oxplot/svglinkify.md
Last active January 26, 2017 15:02
Show Gist options
  • Save s-leroux/59c6a0c5421c034d759263d6f55757bb to your computer and use it in GitHub Desktop.
Save s-leroux/59c6a0c5421c034d759263d6f55757bb to your computer and use it in GitHub Desktop.
Add hyperlinks to PDFs created by Inkscape
#!/usr/bin/env python
# svglinkify.py - Add hyperlinks to PDFs generated by Inkscape
# Copyright (C) 2015 Mansour Behabadi <mansour@oxplot.com>
#
# This script comes with no warranty whatsoever. Use at your own risk.
# If you decide to distribute verbatim or modified versions of this
# code, you must retain this copyright notice.
#
# Usage: svglinkify.py <svg-file> <inkscape-gen-pdf> <linkified-pdf>
# Requires:
# qpdf
# inkscape
# python 2/3
#
# WARNING Since this script is one heck of a hack, you should follow the
# instructions below to the letter, or you will fail miserably.
#
# 1. Start by making an SVG that looks nice and everything and add a
# piece of text somewhere.
#
# 2. Select the rectangle tool and draw a box on top of the text.
# This box will be the clickable area of our link. Set its fill color
# to #ff00ff (magenta) and remove any strokes.
#
# 3. Right click the box and select "Create Link". In the "Object
# attributes" window that opens up, type the destination link in
# "Href".
#
# 4. Send the box to the back (using End key on the keyboard) so you can
# see your text. DO NOT move your box at any time after you've
# created the link. More details below.
#
# 5. Export your SVG as PDF and run svglinkify.py:
#
# $ svglinkify.py my_doc.svg my_doc.pdf my_doc_with_links.pdf
#
# So you pass your SVG file as the first arg, the exported PDF as the
# 2nd arg and the name of final PDF as 3rd arg.
#
# 6. If you did everything right, open my_doc_with_links.pdf and you
# should be able to click your text and open the link in browser. You
# also notice that the magenta box is gone. That's it. Now read the
# sections below if you hate being frustrated when things break.
#
# HOW IT WORKS
#
# The script looks for magenta boxes (surprise!) that have a link. It
# then extracts their x,y position and hyperlinks. It does the same
# search for magenta boxes in the generated PDF and tries to match them
# up by their relative locations. Therefore it's crucial to get the
# locations right. Since SVG is pretty damn flexible, locations aren't
# always simple x,y attributes. When you create a link for an object,
# you wrap it in a group. Groups don't have x,y, instead they are
# transformed using 2D matrices which means, maths calculations must be
# done in order to find out where the enclosed box really is. This
# script is too dumb to do that. That's why you should not move a box
# after you create a link for it.
#
# You could either delete it and draw a new one, or if you like it
# dangerous, you can enter the group (ie double cliking the box) and
# then move the box. This way, you're not moving the group so no
# transformations will be applied. You're bound to make a mistake sooner
# or later this way, so don't do it.
#
# If you can't get this to work after at least several attempts, email
# me your SVG and the PDF inkscape generated for you and I should be
# able to help.
from __future__ import unicode_literals
from __future__ import print_function
from itertools import count
from subprocess import call, PIPE, Popen
import os
import re
import sys
import tempfile
# Magic to support python both 2 and 3
try:
range = xrange
except:
pass
try:
import HTMLParser as html_parser
except:
import html.parser as html_parser
_html_parser = html_parser.HTMLParser()
try:
html_unescape = _html_parser.unescape
except:
import html
html_unescape = html.unescape
# Command line parsing
if len(sys.argv) < 4:
print('Usage: %s <svg-file> <inkscape-gen-pdf> <linkified-pdf>'
% sys.argv[0], file=sys.stderr)
exit(1)
svg_path = sys.argv[1]
pdf_in_path = sys.argv[2]
pdf_out_path = sys.argv[3]
# Load the link rects from SVG file
SVG_X_PAT = re.compile(r'\bx="([^"]+)"')
SVG_Y_PAT = re.compile(r'\by="([^"]+)"')
with open(svg_path, 'r') as svg_file:
svg_rects = [(
html_unescape(i[0]),
float(SVG_X_PAT.search(i[1]).group(1)),
float(SVG_Y_PAT.search(i[1]).group(1))
) for i in re.findall(r'''
<a[^>]*?\bxlink:href="([^"]+)"[^>]*>\s*<rect
([^>]*?\bstyle="[^"]*?\bfill:[#]ff00ff\b[^>]*)
''', svg_file.read(), re.X)]
# QDFy the input PDF & load the resulting PDF to memory
fd, qdf_tmppath = tempfile.mkstemp()
os.close(fd)
try:
if call(['qpdf', '--qdf', pdf_in_path, qdf_tmppath]) != 0:
print('error: qpdf failed', file=sys.stderr)
exit(1)
with open(qdf_tmppath, 'rb') as ps_file:
pdf_data = ps_file.read()
finally:
try:
os.unlink(qdf_tmppath)
except:
pass
# Load the rects and last object ID from PDF file
PDF_RECT_PAT = re.compile(br'''
\b1\s+0\s+1\s+rg(?:\s+/a0\s+gs)?
((?:\s+[\d.-]+\s+[\d.-]+\s+[\d.-]+\s+[\d.-]+\s+re\s+f)+)\b
''', re.X)
m = PDF_RECT_PAT.search(pdf_data)
pdf_rects = re.split(br'\s+', m.group(1).strip()) if m else []
pdf_rects = [
list(map(float, pdf_rects[i:i + 4]))
for i in range(0, len(pdf_rects), 6)
]
last_obj = re.search(br'\bxref\s+(\d+)\s+(\d+)\b', pdf_data)
if not last_obj:
print('error: could not find last obj id', file=sys.stderr)
exit(1)
last_obj = tuple(map(int, last_obj.groups()))
# Some sanity check to ensure our matches are good
if len(svg_rects) != len(pdf_rects):
print('''
error: found diff # of rects in svg & ps
This can be due to number of reasons:
- you've moved the box after creating a link for it - bad move!
fix: delete it and draw a new box and DON'T MOVE it this time
- you've grouped the boxes and done some fancy things
fix: see above
- you forgot to remove the strokes from the boxes
- you have removed a box but Inkscape is still keeping it in the file
fix: do a document cleanup or close/re-open your file
'''.strip(), file=sys.stderr)
exit(1)
# Match up the rects based on their relative X,Y position
# FIXME there is a possibility that due to rounding errors, links get
# matched up incorrectly. Always check the final PDF before sharing.
svg_rects.sort(key=(lambda x: int(x[2] * 100)), reverse=True)
svg_rects.sort(key=lambda x: int(x[1] * 100))
pdf_rects.sort(key=lambda x: (int(x[0] * 100), int(x[1] * 100)))
# Generate the PDF hyperlink objects
pdf_link_tpl = '''
%%QDF: ignore_newline
%d %d obj
<<
/A << /S /URI /URI (%s) >>
/Border [ 0 0 0 ]
/Rect [ %f %f %f %f ]
/Subtype /Link
/Type /Annot
>>
endobj
'''.strip()
pdf_links = '\n'.join(pdf_link_tpl % (
c, last_obj[0], s[0], p[0], p[1], p[0] + p[2], p[1] + p[3]
) for p, s, c in zip(pdf_rects, svg_rects, count(last_obj[1])))
# Remove the visual rects from PDF, write out the new hyperlink objs
pdf_data = PDF_RECT_PAT.sub(b'', pdf_data)
pdf_data = re.sub(
(r'\bxref\s+%d\s+%d\b' % last_obj).encode('ascii'),
(pdf_links + '\nxref\n%d %d' % (
last_obj[0], last_obj[1] + len(svg_rects)
)).encode('ascii'),
pdf_data
)
pdf_data = re.sub(
br'([%][%]\s+Page\s+1\s+[%][%][^\n]+\s+\d+\s+\d+\s+obj\s+<<)',
(r'\1/Annots [%s] ' % ' '.join(
'%d %d R' % (i + last_obj[1], last_obj[0])
for i in range(len(svg_rects))
)).encode('ascii'), pdf_data)
# Optimize and save the new file
fd, out_tmppath = tempfile.mkstemp()
os.close(fd)
try:
with open(out_tmppath, 'wb') as out_tmpfile:
fix_qdf_proc = Popen(['fix-qdf'], stdin=PIPE, stdout=out_tmpfile)
fix_qdf_proc.communicate(pdf_data)
if fix_qdf_proc.wait() != 0:
print('error: failed writing the mod pdf', file=sys.stderr)
exit(1)
if call([
'qpdf', '--object-streams=generate', '--stream-data=compress',
out_tmppath, pdf_out_path
]) != 0:
print('error: failed writing the mod pdf', file=sys.stderr)
exit(1)
finally:
try:
os.unlink(out_tmppath)
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment