Created
April 14, 2017 16:44
-
-
Save roderickm/4504d4e4a58770a850c428d1909747b7 to your computer and use it in GitHub Desktop.
Combine PDFs into a single file, allowing a single page to be picked from the input files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# | |
# join | |
# Joining pages from a collection of PDF files into a single PDF file. | |
# | |
# join [--pickpage <pagenum>] [--output <file>] [--shuffle] [--verbose]" | |
# | |
# Parameter: | |
# | |
# --pickpage <pagenum> | |
# Pick only page number <pagenum> from each source file. | |
# If this option is not specified then all of the pages from a PDF file are appended | |
# to the output PDF file before the next input PDF file is processed. | |
# | |
# --shuffle | |
# Take a page from each PDF input file in turn before taking another from each file. | |
# If this option is not specified then all of the pages from a PDF file are appended | |
# to the output PDF file before the next input PDF file is processed. | |
# | |
# --verbose | |
# Write information about the doings of this tool to stderr. | |
# | |
import sys | |
import os | |
import getopt | |
import tempfile | |
import shutil | |
from CoreFoundation import * | |
from Quartz.CoreGraphics import * | |
verbose = False | |
def createPDFDocumentWithPath(path): | |
global verbose | |
if verbose: | |
print "Creating PDF document from file %s" % (path) | |
return CGPDFDocumentCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, path, len(path), False)) | |
def writePageFromDoc(writeContext, doc, pageNum): | |
global verbose | |
page = CGPDFDocumentGetPage(doc, pageNum) | |
if page: | |
mediaBox = CGPDFPageGetBoxRect(page, kCGPDFMediaBox) | |
if CGRectIsEmpty(mediaBox): | |
mediaBox = None | |
CGContextBeginPage(writeContext, mediaBox) | |
CGContextDrawPDFPage(writeContext, page) | |
CGContextEndPage(writeContext) | |
if verbose: | |
print "Copied page %d from %s" % (pageNum, doc) | |
def shufflePages(writeContext, docs, maxPages): | |
for pageNum in xrange(1, maxPages + 1): | |
for doc in docs: | |
writePageFromDoc(writeContext, doc, pageNum) | |
def append(writeContext, docs, maxPages, pickpage=None): | |
for doc in docs: | |
if pickpage: | |
writePageFromDoc(writeContext, doc, pickpage) | |
else: | |
for pageNum in xrange(1, maxPages + 1) : | |
writePageFromDoc(writeContext, doc, pageNum) | |
def main(argv): | |
global verbose | |
# The PDF context we will draw into to create a new PDF | |
writeContext = None | |
# If True then generate more verbose information | |
source = None | |
shuffle = False | |
# Parse the command line options | |
try: | |
options, args = getopt.getopt(argv, "o:p:sv", ["output=", "pickpage=", "shuffle", "verbose"]) | |
except getopt.GetoptError: | |
usage() | |
sys.exit(2) | |
for option, arg in options: | |
if option in ("-o", "--output") : | |
if verbose: | |
print "Setting %s as the destination." % (arg) | |
writeContext = CGPDFContextCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, arg, len(arg), False), None, None) | |
elif option in ("-p", "--pickpage") : | |
try: | |
pickpage = int(arg) | |
if verbose: | |
print "Picking page number %d from each input file." % (pickpage) | |
except Exception, e: | |
print str(e) | |
sys.exit(2) | |
elif option in ("-s", "--shuffle") : | |
if verbose : | |
print "Shuffle pages to the output file." | |
shuffle = True | |
elif option in ("-v", "--verbose") : | |
print "Verbose mode enabled." | |
verbose = True | |
else : | |
print "Unknown argument: %s" % (option) | |
if writeContext: | |
# create PDFDocuments for all of the files. | |
docs = map(createPDFDocumentWithPath, args) | |
# find the maximum number of pages. | |
maxPages = 0 | |
for doc in docs: | |
if CGPDFDocumentGetNumberOfPages(doc) > maxPages: | |
maxPages = CGPDFDocumentGetNumberOfPages(doc) | |
if shuffle: | |
shufflePages(writeContext, docs, maxPages) | |
elif pickpage: | |
append(writeContext, docs, maxPages, pickpage) | |
else: | |
append(writeContext, docs, maxPages) | |
CGPDFContextClose(writeContext) | |
del writeContext | |
#CGContextRelease(writeContext) | |
def usage(): | |
print "Usage: join [--pickpage <pagenum>] [--output <file>] [--shuffle] [--verbose]" | |
if __name__ == "__main__": | |
main(sys.argv[1:]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a modification of the
join.py
script included with OS X.Find the original at
/System/Library/Automator/Combine\ PDF\ Pages.action/Contents/Resources/join.py
.This script allows me to assemble expense reports easily, grabbing the first page from many statements and assembling them into a single file using the
--pickpage 1
option.