This will split the file input.pdf into (n + 1) files.
$ chmod +x splitPDF.py
$ ./splitPDF.py input.pdf splitPageNum_1 ... splitPageNum_n
This will split the file input.pdf into (n + 1) files.
$ chmod +x splitPDF.py
$ ./splitPDF.py input.pdf splitPageNum_1 ... splitPageNum_n
#!/usr/bin/python | |
"""Splits an input pdf file into several given a list of splitting | |
points (page numbers). | |
""" | |
__author__ = 'benhdj@cs.cmu.edu (Benjamin Han)' | |
import sys | |
import os | |
from CoreGraphics import * | |
def Usage (): | |
print """ | |
Usage: splitPDF.py inputFN splitPageNum1 splitPageNum2 ... | |
- inputFN: the path to the input pdf file. | |
- splitPageNum1, ...: each one is a positive integer; the numbers | |
must not exceed the number of pages of the input file, and the | |
entire sequence must be strictly increasing. | |
Example: splitPDF.py input.pdf 3 5 | |
This will split file input.pdf into 3 files (assuming input.pdf is 10 | |
pages long): | |
- input.part1.1_3.pdf contains page 1-3; | |
- input.part2.4_5.pdf contains page 4-5; | |
- input.part3.6_10.pdf contains page 6-10. | |
""" | |
if len(sys.argv) < 3: | |
Usage() | |
sys.exit(1) | |
else: | |
inputFN = sys.argv[1] | |
inputDoc = \ | |
CGPDFDocumentCreateWithProvider(\ | |
CGDataProviderCreateWithFilename(inputFN)) | |
if inputDoc: | |
maxPages = inputDoc.getNumberOfPages() | |
print '%s has %d pages' % (inputFN, maxPages) | |
else: | |
sys.exit(2) | |
try: | |
splitPageNums = map(int, sys.argv[2:]) | |
except: | |
print 'Error: invalid split page number(s).' | |
for i, splitPageNum in enumerate(splitPageNums): | |
if splitPageNum < 1 or splitPageNum > maxPages: | |
print 'Error: a split page number must be >= 1 and <= %d.' % \ | |
maxPages | |
sys.exit(3) | |
elif i and splitPageNums[i - 1] >= splitPageNum: | |
print 'Error: split page numbers must be increasing.' | |
sys.exit(4) | |
baseFN = os.path.splitext(os.path.basename(inputFN))[0] | |
pageRect = CGRectMake (0, 0, 612, 792) | |
if splitPageNums[-1] < maxPages: | |
splitPageNums.append(maxPages) | |
startPageNum = 1 | |
for i, splitPageNum in enumerate(splitPageNums): | |
outputFN = '%s.part%d.%d_%d.pdf' % \ | |
(baseFN, i + 1, startPageNum, splitPageNum) | |
writeContext = CGPDFContextCreateWithFilename(outputFN, pageRect) | |
print 'Writing page %d-%d to %s...' % \ | |
(startPageNum, splitPageNum, outputFN) | |
for pageNum in xrange(startPageNum, splitPageNum + 1): | |
mediaBox = inputDoc.getMediaBox(pageNum) | |
writeContext.beginPage(mediaBox) | |
writeContext.drawPDFDocument(mediaBox, inputDoc, pageNum) | |
writeContext.endPage() | |
startPageNum = splitPageNum + 1 | |
print 'Done: %d file(s) generated.' % len(splitPageNums) |