Skip to content

Instantly share code, notes, and snippets.

@derlin
Created September 11, 2022 16:58
Show Gist options
  • Save derlin/6ac4330c31ff849ea6fb779ceecbac82 to your computer and use it in GitHub Desktop.
Save derlin/6ac4330c31ff849ea6fb779ceecbac82 to your computer and use it in GitHub Desktop.
Generate PDFs out of image directories (for mangas mostly)
"""
Generate PDFs out of directories with images.
If the directories have a sub-structure, the --levels argument can be used. For example, given the following structure:
├─ MyManga_volumes
│ └── Vol1
│ ├── 001.png
│ ├── 002.jpeg
│ ├── ...
│ └── Vol2
│ ├── ...
├─ MyManga_chaps
│ └── Vol1
│ └── chap1
│ ├── 001.png
│ ├── 002.jpeg
│ └── chap2
│ ├── 001.jpg
│ └── Vol2
│ ├── ...
All the following commands will PDFs per volume:
python scripts.py -i MyManga_volumes -l 1
python scripts.py -i . -l 2
python scripts.py -i MyManga_volumes/Vol1 -l 0
python scripts.py -i MyManga_chaps/Vol1 -l 0
Supported image extensions are .jpe?g and .png.
"""
from PIL import Image
from glob import glob
from argparse import ArgumentParser
import logging
import os
MAX_PAGES = 600
SUPPORTED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
logger = logging.getLogger('convert')
def listdir(dir):
return [os.path.join(dir, f) for f in os.listdir(dir)]
def load_image(path):
image = Image.open(path)
if image.mode != 'RGB':
logger.debug(f"Found image with npn-RGB channel: '{path}'. Converting to RGB.")
return image.convert('RGB')
return image
def convert_directory_to_pdf(input_dir, pdf_file, min_pages=40, dry_run=False):
images = [
load_image(img)
for img in sorted(glob(f"{input_dir}/**/*.*", recursive=True))
if img.split(".")[-1] in SUPPORTED_EXTENSIONS
]
if len(images) < min_pages:
logger.debug(f" Not enough images found in directory '{input_dir}' ({len(images)}). Skipping.")
return
if len(images) > MAX_PAGES:
if input(f"Large PDF detected: {len(images)} pages. Do You Want To Continue? [y/n]") != "y":
logger.debug("User didn't confirm. Skipping.")
return
if os.path.exists(pdf_file):
logger.info(f"{pdf_file} already exists and overwrite if False. Skipping.")
return
if not dry_run:
images[0].save(
pdf_file, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
)
logger.info(f"Generated pdf '{pdf_file}': {len(images)} pages.")
def find_dirs_at_level(start, level):
if level == 0:
return [start]
dirs = [d for d in listdir(start) if os.path.isdir(d)]
if level == 1:
return dirs
# visit subdirectories
return [subdir for dir in dirs for subdir in find_dirs_at_level(dir, level - 1)]
if __name__ == "__main__":
parser = ArgumentParser(description = "Generate PDFs out of directories of images.")
parser.add_argument("-i", "--input-dir", default=".", help="Base directory to start looking for mangas.")
parser.add_argument("-o", "--output-dir", default='out', help="Where to save the generated pdfs.")
parser.add_argument("-l", "--levels", type=int, default=0, help="Number of subdirectories (e.g. volumes subdirectory).")
parser.add_argument("-n", "--name", choices=["directory", "path"], default="directory",
help="Use the full path for pdf names (separated with '_', or the last directory for PDf names.")
parser.add_argument("--min-pages", type=int, default=40, help="minimum images to consider the directory a manga.")
parser.add_argument("--dry-run", action="store_true", help="Only print what will happen, don't actually generate the PDFs.")
parser.add_argument("--debug", action="store_true", help="Turn on debug mode.")
args = parser.parse_args()
import sys
logging.basicConfig(format="%(levelname)-8s %(message)s", level=logging.INFO)
if args.debug:
logging.getLogger('convert').setLevel(logging.DEBUG)
os.makedirs(args.output_dir, exist_ok=True)
for directory in sorted(find_dirs_at_level(args.input_dir, args.levels)):
logger.debug(f"Visiting directory {directory}")
segments = os.path.split(directory)
name = (segments[-1] if args.name == "directory" else "_".join(segments)) + ".pdf"
convert_directory_to_pdf(directory, os.path.join(args.output_dir, name), args.min_pages, args.dry_run)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment