giuseppe-testa/pdf_split.py

## pdf_split.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Utils scripts to split the pages of a pdf.

Example:

        $ python pdf_split.py -i "path_to_pdf/sample.pdf" -o "path_to_dir"

 * Author:    Giuseppe Testa
 * Date:      19/09/2019

"""

import os
import argparse

from PyPDF2 import PdfFileWriter, PdfFileReader

parser = argparse.ArgumentParser(description='Split all the image from a pdf')

parser.add_argument('-i', '--input_path', type=str, help='Pdf input filepath')
parser.add_argument('-o', '--output_path', type=str, help='Output folder')

args = parser.parse_args()

if __name__ == '__main__':

    # Parse the args
    pdf_path = args.input_path
    out_dir = args.output_path

    basename = os.path.basename(pdf_path).split('.')[0]

    # Make the dir for single page pdfs
    new_dir = os.path.join(out_dir, basename)
    os.makedirs(new_dir, exist_ok=True)

    with open(pdf_path, "rb") as f:

        inputpdf = PdfFileReader(f)

        for i in range(inputpdf.numPages):

            output = PdfFileWriter()
            output.addPage(inputpdf.getPage(i))

            outputFilename = os.path.join(new_dir,
                                          basename + "_page%s.pdf" % i)

            with open(outputFilename, "wb") as outputStream:
                output.write(outputStream)

    del i
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""Utils scripts to split the pages of a pdf.

	Example:

	$ python pdf_split.py -i "path_to_pdf/sample.pdf" -o "path_to_dir"

	* Author: Giuseppe Testa
	* Date: 19/09/2019

	"""

	import os
	import argparse

	from PyPDF2 import PdfFileWriter, PdfFileReader

	parser = argparse.ArgumentParser(description='Split all the image from a pdf')

	parser.add_argument('-i', '--input_path', type=str, help='Pdf input filepath')
	parser.add_argument('-o', '--output_path', type=str, help='Output folder')

	args = parser.parse_args()

	if __name__ == '__main__':

	# Parse the args
	pdf_path = args.input_path
	out_dir = args.output_path

	basename = os.path.basename(pdf_path).split('.')[0]

	# Make the dir for single page pdfs
	new_dir = os.path.join(out_dir, basename)
	os.makedirs(new_dir, exist_ok=True)

	with open(pdf_path, "rb") as f:

	inputpdf = PdfFileReader(f)

	for i in range(inputpdf.numPages):

	output = PdfFileWriter()
	output.addPage(inputpdf.getPage(i))

	outputFilename = os.path.join(new_dir,
	basename + "_page%s.pdf" % i)

	with open(outputFilename, "wb") as outputStream:
	output.write(outputStream)

	del i