Skip to content

Instantly share code, notes, and snippets.

@giuseppe-testa
Created September 19, 2019 12:52
Show Gist options
  • Save giuseppe-testa/490cdbe8f83293bdcabfedec4500f8ec to your computer and use it in GitHub Desktop.
Save giuseppe-testa/490cdbe8f83293bdcabfedec4500f8ec to your computer and use it in GitHub Desktop.
Useful script to split a given pdf into single pages .
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Utils scripts to split the pages of a pdf.
Example:
$ python pdf_split.py -i "path_to_pdf/sample.pdf" -o "path_to_dir"
* Author: Giuseppe Testa
* Date: 19/09/2019
"""
import os
import argparse
from PyPDF2 import PdfFileWriter, PdfFileReader
parser = argparse.ArgumentParser(description='Split all the image from a pdf')
parser.add_argument('-i', '--input_path', type=str, help='Pdf input filepath')
parser.add_argument('-o', '--output_path', type=str, help='Output folder')
args = parser.parse_args()
if __name__ == '__main__':
# Parse the args
pdf_path = args.input_path
out_dir = args.output_path
basename = os.path.basename(pdf_path).split('.')[0]
# Make the dir for single page pdfs
new_dir = os.path.join(out_dir, basename)
os.makedirs(new_dir, exist_ok=True)
with open(pdf_path, "rb") as f:
inputpdf = PdfFileReader(f)
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
outputFilename = os.path.join(new_dir,
basename + "_page%s.pdf" % i)
with open(outputFilename, "wb") as outputStream:
output.write(outputStream)
del i
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment