jeetsukumaran/extract-pdf-pages.py

## extract-pdf-pages.py
#! /usr/bin/env python

###############################################################################
##
##  Copyright 2012 Jeet Sukumaran.
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 3 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License along
##  with this program. If not, see <http://www.gnu.org/licenses/>.
##
###############################################################################

"""
Extract specified pages from source PDF.
"""

import sys
import os
import argparse
import pyPdf

__prog__ = os.path.basename(__file__)
__version__ = "1.0.0"
__description__ = __doc__
__author__ = 'Jeet Sukumaran'
__copyright__ = 'Copyright (C) 2012 Jeet Sukumaran.'

def main():
    """
    Main CLI handler.
    """

    parser = argparse.ArgumentParser(description=__description__)
    parser.add_argument("--version", action="version", version="%(prog)s " + __version__)
    parser.add_argument("src_pdf",
            metavar="SOURCE-PDF",
            type=argparse.FileType('rb'),
            help="path to input pdf file")
    parser.add_argument("first_page",
            metavar="FIRST-PAGE",
            type=int,
            help="number of first page (1-based index: first page is '1')")
    parser.add_argument("last_page",
            metavar="LAST-PAGE",
            type=str,
            help="number of last page; if preceded by '+' (e.g., '+30'), specifies number of pages following first page to extract")
    parser.add_argument("-o", "--output-filepath",
            type=str,
            default=None,
            help="path to output file (if not given, will write to standard output)")

    args = parser.parse_args()
    first_page = args.first_page - 1
    if args.last_page.startswith("+"):
        last_page = args.last_page[1:].replace(" ", "")
        if not last_page:
            sys.exit("Need to specify number of pages")
        last_page = first_page + int(last_page)
    else:
        last_page = int(args.last_page) - 1

    pdf_in = pyPdf.PdfFileReader(args.src_pdf)
    pdf_out = pyPdf.PdfFileWriter()
    for pg_num in range(first_page, last_page + 1):
        pdf_out.addPage(pdf_in.getPage(pg_num))
    if args.output_filepath:
        out_stream = open(os.path.expandvars(os.path.expanduser(args.output_filepath)), "wb")
    else:
        out_stream = sys.stdout
    pdf_out.write(out_stream)
    out_stream.close()

if __name__ == '__main__':
    main()
	#! /usr/bin/env python

	###############################################################################
	##
	## Copyright 2012 Jeet Sukumaran.
	##
	## This program is free software; you can redistribute it and/or modify
	## it under the terms of the GNU General Public License as published by
	## the Free Software Foundation; either version 3 of the License, or
	## (at your option) any later version.
	##
	## This program is distributed in the hope that it will be useful,
	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	## GNU General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License along
	## with this program. If not, see <http://www.gnu.org/licenses/>.
	##
	###############################################################################

	"""
	Extract specified pages from source PDF.
	"""

	import sys
	import os
	import argparse
	import pyPdf

	__prog__ = os.path.basename(__file__)
	__version__ = "1.0.0"
	__description__ = __doc__
	__author__ = 'Jeet Sukumaran'
	__copyright__ = 'Copyright (C) 2012 Jeet Sukumaran.'

	def main():
	"""
	Main CLI handler.
	"""

	parser = argparse.ArgumentParser(description=__description__)
	parser.add_argument("--version", action="version", version="%(prog)s " + __version__)
	parser.add_argument("src_pdf",
	metavar="SOURCE-PDF",
	type=argparse.FileType('rb'),
	help="path to input pdf file")
	parser.add_argument("first_page",
	metavar="FIRST-PAGE",
	type=int,
	help="number of first page (1-based index: first page is '1')")
	parser.add_argument("last_page",
	metavar="LAST-PAGE",
	type=str,
	help="number of last page; if preceded by '+' (e.g., '+30'), specifies number of pages following first page to extract")
	parser.add_argument("-o", "--output-filepath",
	type=str,
	default=None,
	help="path to output file (if not given, will write to standard output)")

	args = parser.parse_args()
	first_page = args.first_page - 1
	if args.last_page.startswith("+"):
	last_page = args.last_page[1:].replace(" ", "")
	if not last_page:
	sys.exit("Need to specify number of pages")
	last_page = first_page + int(last_page)
	else:
	last_page = int(args.last_page) - 1

	pdf_in = pyPdf.PdfFileReader(args.src_pdf)
	pdf_out = pyPdf.PdfFileWriter()
	for pg_num in range(first_page, last_page + 1):
	pdf_out.addPage(pdf_in.getPage(pg_num))
	if args.output_filepath:
	out_stream = open(os.path.expandvars(os.path.expanduser(args.output_filepath)), "wb")
	else:
	out_stream = sys.stdout
	pdf_out.write(out_stream)
	out_stream.close()

	if __name__ == '__main__':
	main()