Skip to content

Instantly share code, notes, and snippets.

@fidelthomet
Created May 19, 2020 14:01
Show Gist options
  • Save fidelthomet/17c7cc587bd9db3e77a8bfb5c7308738 to your computer and use it in GitHub Desktop.
Save fidelthomet/17c7cc587bd9db3e77a8bfb5c7308738 to your computer and use it in GitHub Desktop.
python3 script to merge pdf files with option to skip first or last page, based on https://stackoverflow.com/a/3444735
from argparse import ArgumentParser
from glob import glob
from PyPDF2 import PdfFileReader, PdfFileWriter
def PDF_cat(files, output_filename, skipfirstpage, skiplastpage):
# First open all the files, then produce the output file, and
# finally close the input files. This is necessary because
# the data isn't read from the input files until the write
# operation. Thanks to
# https://stackoverflow.com/questions/6773631/problem-with-closing-_
# python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733
writer = PdfFileWriter()
skipfirst = 1 if skipfirstpage else 0
skiplast = 1 if skiplastpage else 0
# collect and open input files
inp = [open(f,'rb') for f in sorted(glob(files)) if f != output_filename]
n = len(inp)
print ('merging %d files' % n)
for i, fh in enumerate(inp, 1):
reader = PdfFileReader(fh)
for pg in range(skipfirst, reader.getNumPages() - skiplast):
writer.addPage(reader.getPage(pg))
print('writing output file...')
with open(output_filename, 'wb') as fout:
writer.write(fout)
# finallly...
for fh in inp:
fh.close()
if __name__ == '__main__':
parser = ArgumentParser()
# add more options if you like
parser.add_argument('-o', '--output',
dest='output_filename',
help='write merged PDF files to FILE',
metavar='FILE')
parser.add_argument(dest='files',
help='PDF files to merge')
parser.add_argument('-f', '--skipfirstpage',
dest='skipfirstpage',
action='store_true',
help='skip first page of each merged PDF')
parser.add_argument('-l', '--skiplastpage',
dest='skiplastpage',
action='store_true',
help='skip last page of each merged PDF')
parser.set_defaults(output_filename='mergedPDFs.pdf', files='.\*.pdf',
skipfirstpage=False, skiplastpage=False, verbose=False)
args = parser.parse_args()
PDF_cat(args.files, args.output_filename, args.skipfirstpage, args.skiplastpage)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment