Skip to content

Instantly share code, notes, and snippets.

@pilhoon
Last active April 20, 2020 14:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pilhoon/21035748f0abb8837e84b1813c3e6ed0 to your computer and use it in GitHub Desktop.
Save pilhoon/21035748f0abb8837e84b1813c3e6ed0 to your computer and use it in GitHub Desktop.
pdf joiner
#!/Users/ph/anaconda2/bin/python
# Suppose,
# ls pdf_folder/
# cover.pdf even174.pdf even352.pdf odd1.pdf odd175.pdf
# # number is starting page, `even*` is in reverse order, `odd*` is in right order.
# # cover.pdf includes 3 pages, 1,2 are front pages and 3 is last cover.
from PyPDF2 import PdfFileReader, PdfFileWriter
import glob
import os
import argparse
import pdb
parser = argparse.ArgumentParser()
parser.add_argument('pdffolder', type=str)
parser.add_argument('--out', type=str, default='out.pdf')
args = parser.parse_args()
assert os.path.exists(args.pdffolder)
assert not os.path.exists(args.out)
pages = {}
minus_pages = {} # zeroth page does not exist.
totalpage = 0
for f in glob.glob(args.pdffolder + '/*.pdf'):
pfr = PdfFileReader(f)
pagenum = pfr.getNumPages()
ff = f.replace(args.pdffolder, '')
ff = ff.replace('/','')
if ff.startswith('odd'):
totalpage += pagenum
ffx = ff.replace('odd','')
ffx = ffx.replace('.pdf', '')
startidx = int(ffx)
for i in range(pagenum):
idx = i*2 + startidx
if idx > 0:
pages[idx] = pfr.getPage(i)
else: # idx < 0
minus_pages[-idx] = pfr.getPage(i)
elif ff.startswith('even'):
totalpage += pagenum
ffx = ff.replace('even','')
ffx = ffx.replace('.pdf', '')
endidx = int(ffx)
for i in range(pagenum):
idx = endidx - i*2
if idx > 0:
pages[idx] = pfr.getPage(i)
else:
minus_pages[-idx] = pfr.getPage(i)
assert totalpage == len(pages) + len(minus_pages)
r = PdfFileWriter()
if len(minus_pages)>0:
stpg = max(minus_pages.keys())
print('before first page, we have %d pages'%stpg)
for i in range(stpg, 0, -1):
r.addPage(minus_pages[i])
stpg = min(pages.keys())
edpg = max(pages.keys())
if stpg>1:
assert len(minus_pages)==0
for i in range(stpg, edpg+1):
r.addPage(pages[i])
coverf = args.pdffolder+'/cover.pdf'
if os.path.exists(coverf):
cover = PdfFileReader(coverf)
#assert 3 == cover.getNumPages()
if cover.getNumPages() > 1:
r.insertPage(cover.getPage(1), 0)
r.insertPage(cover.getPage(0), 0)
if cover.getNumPages() > 2:
for i in range(2, cover.getNumPages()):
r.addPage(cover.getPage(i))
r.write(open(args.out, 'wb'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment