Skip to content

Instantly share code, notes, and snippets.

@tremby
Last active October 26, 2018 23:54
Show Gist options
  • Save tremby/301892548b070fe781c9de8a72406fcd to your computer and use it in GitHub Desktop.
Save tremby/301892548b070fe781c9de8a72406fcd to your computer and use it in GitHub Desktop.
Wrapper for pdftk which stamps particular pages with patch PDFs; see --help
#!/usr/bin/env python3
import argparse
import os
import shutil
import string
import subprocess
import tempfile
# Ensure required executables exist
for prog in ('pdftk', 'ps2pdf'):
if shutil.which(prog) is None:
raise RuntimeError("The program {prog} is required".format(prog=prog))
def toBase26(num):
digits = string.ascii_uppercase
if num == 0:
return digits[0]
out = []
while num:
out.append(digits[int(num % 26)])
num = int(num // 26)
out.reverse()
return ''.join(out)
parser = argparse.ArgumentParser(description="Patch particular pages of a PDF",
epilog="Example: %(prog)s contract.pdf 2=sig-p2.pdf 3=sig-p3.pdf signed.pdf")
def pagePdfPair(string):
try:
(page, pdf) = string.split('=', 1)
if not len(pdf):
raise ValueError
except ValueError:
parser.error("Expected a page=PDF pair")
try:
page = int(page)
except ValueError:
parser.error("Page number must be an integer")
if page < 1:
parser.error("Page number must be greater than zero")
return (page, pdf)
parser.add_argument('inputPdf', type=str,
help="the input file, particular pages of which will be stamped")
parser.add_argument('patchPdfs', metavar="N=patchPdf", type=pagePdfPair, nargs='+',
help="a page number and patch PDF pair")
parser.add_argument('outputPdf', type=str,
help="the output file")
parser.add_argument('-f', '--force', action='store_true',
help="force writing to the given output file even if it already exists")
args = parser.parse_args()
# Prevent overwriting output file
if not args.force and os.path.isfile(args.outputPdf):
parser.error("Output file {filename} already exists and --force not specified".format(filename=args.outputPdf))
# Ensure all input files exist
if not os.path.isfile(args.inputPdf):
parser.error("Input file {filename} doesn't exist".format(filename=args.inputPdf))
for (page, pdf) in args.patchPdfs:
if not os.path.isfile(pdf):
parser.error("Patch file {filename} doesn't exist".format(filename=pdf))
# Get number of pages in input file
data = subprocess.run(['pdftk', args.inputPdf, 'dump_data'], stdout=subprocess.PIPE, encoding='utf8')
totalPages = None
for line in data.stdout.splitlines():
if line.startswith('NumberOfPages: '):
totalPages = int(''.join(filter(str.isdigit, line)))
break
# Check no patch page numbers are out of range
for (page, pdf) in args.patchPdfs:
if page > totalPages:
parser.error("The input PDF has a page count of {total}, so page {page} cannot be patched".format(total=totalPages, page=page))
# Sort patch pages by page number
# Python sorts by first member by default, which makes this easy
args.patchPdfs.sort()
def getPatchForPage(p):
for (page, pdf) in args.patchPdfs:
if page == p:
return 'PATCH{handle}'.format(handle=toBase26(page))
return 'BLANK'
# Get a temporary context directory
with tempfile.TemporaryDirectory() as tempdir:
# Make a blank page PDF
blankPdf = os.path.join(tempdir, 'blank.pdf')
subprocess.run(['ps2pdf', '-sPAPERSIZE=a4', '-', blankPdf], input="")
# Make a patch PDF with the same total number of pages as the input PDF
pdftkargs = ['pdftk', 'BLANK={filename}'.format(filename=blankPdf)]
for (page, pdf) in args.patchPdfs:
pdftkargs.append('PATCH{handle}={filename}'.format(handle=toBase26(page), filename=pdf))
pdftkargs.append('cat')
for page in range(1, totalPages + 1):
pdftkargs.append(getPatchForPage(page))
pdftkargs.append('output')
patchPdf = os.path.join(tempdir, 'patch.pdf')
pdftkargs.append(patchPdf)
subprocess.run(pdftkargs)
# Stamp the patch PDF on the input PDF
subprocess.run(['pdftk', args.inputPdf, 'multistamp', patchPdf, 'output', args.outputPdf])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment