Skip to content

Instantly share code, notes, and snippets.

@patsweet
Created January 8, 2014 17:15
Show Gist options
  • Save patsweet/8320481 to your computer and use it in GitHub Desktop.
Save patsweet/8320481 to your computer and use it in GitHub Desktop.
Takes a pdf file and extracts a range of pages. Useful for batch removing cover pages or editing
#!/usr/bin/python
import argparse
import os
from pyPdf import PdfFileWriter, PdfFileReader
def is_valid_file(parser, arg):
if not os.path.exists(arg):
parser.error("The file %s does not exist." % arg)
else:
return arg
def main():
parser = argparse.ArgumentParser(description="Split PDFs")
parser.add_argument('pdf_file', help="input file that you are splitting",
metavar="SOURCE-PDF", type=lambda x: is_valid_file(parser, x))
parser.add_argument('output_file', help="name of output file", metavar="OUTPUT-PDF")
parser.add_argument('start_page', help="start page", type=int, metavar="START-PAGE")
parser.add_argument('end_page', help="end page", type=int, metavar="END-PAGE")
args = parser.parse_args()
if args.start_page > args.end_page or args.start_page < 0:
parser.error("Bad page numbers.")
with open(args.pdf_file, "rb") as pdf_file:
inputpdf = PdfFileReader(pdf_file)
outputpdf = PdfFileWriter()
if inputpdf.numPages < args.end_page:
parser.error("End page out of range. Only %s pages in document." % inputpdf.numPages)
for page in xrange(args.start_page, args.end_page+1):
outputpdf.addPage(inputpdf.getPage(page))
outputpdf.write(open(args.output_file, "wb"))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment