Skip to content

Instantly share code, notes, and snippets.

@say4n
Last active October 24, 2020 12:54
Show Gist options
  • Save say4n/8febb9d775de8282c31f67ccd5a020fd to your computer and use it in GitHub Desktop.
Save say4n/8febb9d775de8282c31f67ccd5a020fd to your computer and use it in GitHub Desktop.
Split PDFs!

usage

This will split the file input.pdf into (n + 1) files.

$ chmod +x splitPDF.py
$ ./splitPDF.py input.pdf splitPageNum_1 ... splitPageNum_n

source

Benjamin Han

#!/usr/bin/python
"""Splits an input pdf file into several given a list of splitting
points (page numbers).
"""
__author__ = 'benhdj@cs.cmu.edu (Benjamin Han)'
import sys
import os
from CoreGraphics import *
def Usage ():
print """
Usage: splitPDF.py inputFN splitPageNum1 splitPageNum2 ...
- inputFN: the path to the input pdf file.
- splitPageNum1, ...: each one is a positive integer; the numbers
must not exceed the number of pages of the input file, and the
entire sequence must be strictly increasing.
Example: splitPDF.py input.pdf 3 5
This will split file input.pdf into 3 files (assuming input.pdf is 10
pages long):
- input.part1.1_3.pdf contains page 1-3;
- input.part2.4_5.pdf contains page 4-5;
- input.part3.6_10.pdf contains page 6-10.
"""
if len(sys.argv) < 3:
Usage()
sys.exit(1)
else:
inputFN = sys.argv[1]
inputDoc = \
CGPDFDocumentCreateWithProvider(\
CGDataProviderCreateWithFilename(inputFN))
if inputDoc:
maxPages = inputDoc.getNumberOfPages()
print '%s has %d pages' % (inputFN, maxPages)
else:
sys.exit(2)
try:
splitPageNums = map(int, sys.argv[2:])
except:
print 'Error: invalid split page number(s).'
for i, splitPageNum in enumerate(splitPageNums):
if splitPageNum < 1 or splitPageNum > maxPages:
print 'Error: a split page number must be >= 1 and <= %d.' % \
maxPages
sys.exit(3)
elif i and splitPageNums[i - 1] >= splitPageNum:
print 'Error: split page numbers must be increasing.'
sys.exit(4)
baseFN = os.path.splitext(os.path.basename(inputFN))[0]
pageRect = CGRectMake (0, 0, 612, 792)
if splitPageNums[-1] < maxPages:
splitPageNums.append(maxPages)
startPageNum = 1
for i, splitPageNum in enumerate(splitPageNums):
outputFN = '%s.part%d.%d_%d.pdf' % \
(baseFN, i + 1, startPageNum, splitPageNum)
writeContext = CGPDFContextCreateWithFilename(outputFN, pageRect)
print 'Writing page %d-%d to %s...' % \
(startPageNum, splitPageNum, outputFN)
for pageNum in xrange(startPageNum, splitPageNum + 1):
mediaBox = inputDoc.getMediaBox(pageNum)
writeContext.beginPage(mediaBox)
writeContext.drawPDFDocument(mediaBox, inputDoc, pageNum)
writeContext.endPage()
startPageNum = splitPageNum + 1
print 'Done: %d file(s) generated.' % len(splitPageNums)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment