Skip to content

Instantly share code, notes, and snippets.

@michalpelka
Created February 12, 2023 18:05
Show Gist options
  • Save michalpelka/2ad07630fb0fb541dca41c7390433eb0 to your computer and use it in GitHub Desktop.
Save michalpelka/2ad07630fb0fb541dca41c7390433eb0 to your computer and use it in GitHub Desktop.
from PyPDF2 import PdfWriter, PdfReader
import glob
import os.path
INPUT_DIR = "data"
OUTPUT_DIR="data_proc"
files = glob.glob(INPUT_DIR+"/*.pdf")
for f in files:
basename = os.path.basename(f)
basename = basename[:-4]
print (basename)
current_output_dir = OUTPUT_DIR+"/"+basename
os.makedirs(current_output_dir, exist_ok=True)
inputpdf = PdfReader(open(f, "rb"))
for i in range(len(inputpdf.pages)):
output = PdfWriter()
output.add_page(inputpdf.pages[i])
with open("%s/sesja%03d.pdf" % (current_output_dir,i), "wb") as outputStream:
output.write(outputStream)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment