Created
September 19, 2019 12:52
-
-
Save giuseppe-testa/490cdbe8f83293bdcabfedec4500f8ec to your computer and use it in GitHub Desktop.
Useful script to split a given pdf into single pages .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
"""Utils scripts to split the pages of a pdf. | |
Example: | |
$ python pdf_split.py -i "path_to_pdf/sample.pdf" -o "path_to_dir" | |
* Author: Giuseppe Testa | |
* Date: 19/09/2019 | |
""" | |
import os | |
import argparse | |
from PyPDF2 import PdfFileWriter, PdfFileReader | |
parser = argparse.ArgumentParser(description='Split all the image from a pdf') | |
parser.add_argument('-i', '--input_path', type=str, help='Pdf input filepath') | |
parser.add_argument('-o', '--output_path', type=str, help='Output folder') | |
args = parser.parse_args() | |
if __name__ == '__main__': | |
# Parse the args | |
pdf_path = args.input_path | |
out_dir = args.output_path | |
basename = os.path.basename(pdf_path).split('.')[0] | |
# Make the dir for single page pdfs | |
new_dir = os.path.join(out_dir, basename) | |
os.makedirs(new_dir, exist_ok=True) | |
with open(pdf_path, "rb") as f: | |
inputpdf = PdfFileReader(f) | |
for i in range(inputpdf.numPages): | |
output = PdfFileWriter() | |
output.addPage(inputpdf.getPage(i)) | |
outputFilename = os.path.join(new_dir, | |
basename + "_page%s.pdf" % i) | |
with open(outputFilename, "wb") as outputStream: | |
output.write(outputStream) | |
del i |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment