Skip to content

Instantly share code, notes, and snippets.

@PolarNick239
Last active August 30, 2017 10:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PolarNick239/154dd67d69d4181deb1903ad2ecd5a66 to your computer and use it in GitHub Desktop.
Save PolarNick239/154dd67d69d4181deb1903ad2ecd5a66 to your computer and use it in GitHub Desktop.
PDF from images. Script downscales all jpg images and then merges them into pdfs with pdftk.
#!/usr/bin/python3.5
import os
import glob
import pathlib
from pathlib import Path
# How to use:
# Run in directory with folders named from 1 and up to 99. Each folder should contain *.JPG files.
# Script will downscale all files that are not yet downscaled to "downscaled" subdirectory and stitch to multiple pdfs: pdf per folder + single full pdf.
# Example of input directories:
#├── 1
#│   ├── DSCF2853.JPG
#│   ├── DSCF2854.JPG
#│   ├── DSCF2855.JPG
#│   ├── DSCF2856.JPG
#│   └── DSCF2857.JPG
#├── 2
#│   ├── DSCF2858.JPG
#│   ├── DSCF2859.JPG
#│   ├── DSCF2860.JPG
#│   └── DSCF2861.JPG
#├── 3
#│   ├── DSCF2862.JPG
#│   ├── DSCF2863.JPG
#│   ├── DSCF2864.JPG
#│   ├── DSCF2865.JPG
#│   └── DSCF2866.JPG
#└── stitch_pdf.py
#
# Example of output:
#├── 1
#│   ├── DSCF2853.JPG
#│   ├── DSCF2854.JPG
#│   ├── DSCF2855.JPG
#│   ├── DSCF2856.JPG
#│   └── DSCF2857.JPG
#├── 2
#│   ├── DSCF2858.JPG
#│   ├── DSCF2859.JPG
#│   ├── DSCF2860.JPG
#│   └── DSCF2861.JPG
#├── 3
#│   ├── DSCF2862.JPG
#│   ├── DSCF2863.JPG
#│   ├── DSCF2864.JPG
#│   ├── DSCF2865.JPG
#│   └── DSCF2866.JPG
#├── downscaled
#│   ├── 1
#│   │   ├── DSCF2853.JPG
#│   │   ├── DSCF2854.JPG
#│   │   ├── DSCF2855.JPG
#│   │   ├── DSCF2856.JPG
#│   │   └── DSCF2857.JPG
#│   ├── 2
#│   │   ├── DSCF2858.JPG
#│   │   ├── DSCF2859.JPG
#│   │   ├── DSCF2860.JPG
#│   │   └── DSCF2861.JPG
#│   └── 3
#│   ├── DSCF2862.JPG
#│   ├── DSCF2863.JPG
#│   ├── DSCF2864.JPG
#│   ├── DSCF2865.JPG
#│   └── DSCF2866.JPG
#├── stitch_pdf.py
#└── pdfs
# ├── theory01.pdf
# ├── theory02.pdf
# ├── theory03.pdf
# └── theory.pdf
filepaths = glob.glob("./*")
day_paths = []
for filepath in filepaths:
filename = os.path.basename(filepath)
try:
day_number = int(filename)
day_paths.append(filepath)
except ValueError:
pass
day_paths = sorted(day_paths)
print("Directories to be processed: ")
for day_path in day_paths:
print(day_path)
pdf_dir = "./pdfs"
pathlib.Path(pdf_dir).mkdir(parents=True, exist_ok=True)
pdf_files = []
for day_path in day_paths:
dir_name = os.path.basename(day_path)
day_number = int(dir_name)
print("Processing day {}...".format(day_number))
day_images = sorted(glob.glob(day_path + "/*.JPG"))
downscaled_dir = "./downscaled/" + str(day_number)
pathlib.Path(downscaled_dir).mkdir(parents=True, exist_ok=True)
skipped = True
downscaled_images = []
for day_image in day_images:
image_name = os.path.basename(day_image)
downscaled_path = downscaled_dir + "/" + image_name
downscaled_images.append(downscaled_path)
if Path(downscaled_path).is_file():
continue
else:
skipped = False
os.system("convert -resize 25% -quality 90% {} {}".format(day_image, downscaled_path))
if skipped:
print("Images already downscaled!")
else:
print("Images downscaled!")
pdf_file = pdf_dir + "/theory{:02}.pdf".format(day_number)
os.system("convert {} {}".format(" ".join(downscaled_images), pdf_file))
pdf_files.append(pdf_file)
pdf_files = sorted(pdf_files)
os.system("pdftk {} cat output {}".format(" ".join(pdf_files), pdf_dir + "/theory.pdf"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment