derlin/generate_pdf_from_image_directories.py

## generate_pdf_from_image_directories.py
"""
Generate PDFs out of directories with images.

If the directories have a sub-structure, the --levels argument can be used. For example, given the following structure:

    ├─ MyManga_volumes
    │   └── Vol1
    │       ├── 001.png
    │       ├── 002.jpeg
    │       ├── ...
    │   └── Vol2
    │       ├── ...
    ├─ MyManga_chaps
    │   └── Vol1
    │       └── chap1
    │           ├── 001.png
    │           ├── 002.jpeg
    │       └── chap2
    │           ├── 001.jpg
    │   └── Vol2
    │       ├── ...

All the following commands will PDFs per volume:

   python scripts.py -i MyManga_volumes -l 1
   python scripts.py -i . -l 2

   python scripts.py -i MyManga_volumes/Vol1 -l 0
   python scripts.py -i MyManga_chaps/Vol1 -l 0

Supported image extensions are .jpe?g and .png.
"""

from PIL import Image
from glob import glob
from argparse import ArgumentParser
import logging
import os

MAX_PAGES = 600
SUPPORTED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
logger = logging.getLogger('convert')

def listdir(dir):
    return [os.path.join(dir, f) for f in os.listdir(dir)]

def load_image(path):
    image = Image.open(path)
    if image.mode != 'RGB':
        logger.debug(f"Found image with npn-RGB channel: '{path}'. Converting to RGB.")
        return image.convert('RGB')
    return image

def convert_directory_to_pdf(input_dir, pdf_file, min_pages=40, dry_run=False):
    images = [
        load_image(img)
        for img in sorted(glob(f"{input_dir}/**/*.*", recursive=True))
        if img.split(".")[-1] in SUPPORTED_EXTENSIONS
    ]

    if len(images) < min_pages:
        logger.debug(f"  Not enough images found in directory '{input_dir}' ({len(images)}). Skipping.")
        return

    if len(images) > MAX_PAGES:
        if input(f"Large PDF detected: {len(images)} pages. Do You Want To Continue? [y/n]") != "y":
            logger.debug("User didn't confirm. Skipping.")
            return

    if os.path.exists(pdf_file):
        logger.info(f"{pdf_file} already exists and overwrite if False. Skipping.")
        return

    if not dry_run:
        images[0].save(
            pdf_file, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
        )
    logger.info(f"Generated pdf '{pdf_file}': {len(images)} pages.")

def find_dirs_at_level(start, level):
    if level == 0:
        return [start]
    dirs = [d for d in listdir(start) if os.path.isdir(d)]
    if level == 1:
        return dirs
    # visit subdirectories
    return [subdir for dir in dirs for subdir in find_dirs_at_level(dir, level - 1)]

if __name__ == "__main__":
    parser = ArgumentParser(description = "Generate PDFs out of directories of images.")
    parser.add_argument("-i", "--input-dir",  default=".", help="Base directory to start looking for mangas.")
    parser.add_argument("-o", "--output-dir", default='out', help="Where to save the generated pdfs.")
    parser.add_argument("-l", "--levels", type=int, default=0, help="Number of subdirectories (e.g. volumes subdirectory).")
    parser.add_argument("-n", "--name", choices=["directory", "path"], default="directory",
        help="Use the full path for pdf names (separated with '_', or the last directory for PDf names.")
    parser.add_argument("--min-pages", type=int, default=40, help="minimum images to consider the directory a manga.")
    parser.add_argument("--dry-run", action="store_true", help="Only print what will happen, don't actually generate the PDFs.")
    parser.add_argument("--debug", action="store_true", help="Turn on debug mode.")
    args = parser.parse_args()

    import sys
    logging.basicConfig(format="%(levelname)-8s %(message)s", level=logging.INFO)
    if args.debug:
        logging.getLogger('convert').setLevel(logging.DEBUG)

    os.makedirs(args.output_dir, exist_ok=True)

    for directory in sorted(find_dirs_at_level(args.input_dir, args.levels)):
        logger.debug(f"Visiting directory {directory}")
        segments = os.path.split(directory)
        name = (segments[-1] if args.name == "directory" else "_".join(segments)) + ".pdf"
        convert_directory_to_pdf(directory, os.path.join(args.output_dir, name), args.min_pages, args.dry_run)

## requirements.txt
Pillow>=9.2.0
	"""
	Generate PDFs out of directories with images.

	If the directories have a sub-structure, the --levels argument can be used. For example, given the following structure:

	├─ MyManga_volumes
	│ └── Vol1
	│ ├── 001.png
	│ ├── 002.jpeg
	│ ├── ...
	│ └── Vol2
	│ ├── ...
	├─ MyManga_chaps
	│ └── Vol1
	│ └── chap1
	│ ├── 001.png
	│ ├── 002.jpeg
	│ └── chap2
	│ ├── 001.jpg
	│ └── Vol2
	│ ├── ...

	All the following commands will PDFs per volume:

	python scripts.py -i MyManga_volumes -l 1
	python scripts.py -i . -l 2

	python scripts.py -i MyManga_volumes/Vol1 -l 0
	python scripts.py -i MyManga_chaps/Vol1 -l 0

	Supported image extensions are .jpe?g and .png.
	"""

	from PIL import Image
	from glob import glob
	from argparse import ArgumentParser
	import logging
	import os

	MAX_PAGES = 600
	SUPPORTED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
	logger = logging.getLogger('convert')

	def listdir(dir):
	return [os.path.join(dir, f) for f in os.listdir(dir)]

	def load_image(path):
	image = Image.open(path)
	if image.mode != 'RGB':
	logger.debug(f"Found image with npn-RGB channel: '{path}'. Converting to RGB.")
	return image.convert('RGB')
	return image

	def convert_directory_to_pdf(input_dir, pdf_file, min_pages=40, dry_run=False):
	images = [
	load_image(img)
	for img in sorted(glob(f"{input_dir}/*/.*", recursive=True))
	if img.split(".")[-1] in SUPPORTED_EXTENSIONS
	]

	if len(images) < min_pages:
	logger.debug(f" Not enough images found in directory '{input_dir}' ({len(images)}). Skipping.")
	return

	if len(images) > MAX_PAGES:
	if input(f"Large PDF detected: {len(images)} pages. Do You Want To Continue? [y/n]") != "y":
	logger.debug("User didn't confirm. Skipping.")
	return

	if os.path.exists(pdf_file):
	logger.info(f"{pdf_file} already exists and overwrite if False. Skipping.")
	return

	if not dry_run:
	images[0].save(
	pdf_file, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
	)
	logger.info(f"Generated pdf '{pdf_file}': {len(images)} pages.")

	def find_dirs_at_level(start, level):
	if level == 0:
	return [start]
	dirs = [d for d in listdir(start) if os.path.isdir(d)]
	if level == 1:
	return dirs
	# visit subdirectories
	return [subdir for dir in dirs for subdir in find_dirs_at_level(dir, level - 1)]

	if __name__ == "__main__":
	parser = ArgumentParser(description = "Generate PDFs out of directories of images.")
	parser.add_argument("-i", "--input-dir", default=".", help="Base directory to start looking for mangas.")
	parser.add_argument("-o", "--output-dir", default='out', help="Where to save the generated pdfs.")
	parser.add_argument("-l", "--levels", type=int, default=0, help="Number of subdirectories (e.g. volumes subdirectory).")
	parser.add_argument("-n", "--name", choices=["directory", "path"], default="directory",
	help="Use the full path for pdf names (separated with '_', or the last directory for PDf names.")
	parser.add_argument("--min-pages", type=int, default=40, help="minimum images to consider the directory a manga.")
	parser.add_argument("--dry-run", action="store_true", help="Only print what will happen, don't actually generate the PDFs.")
	parser.add_argument("--debug", action="store_true", help="Turn on debug mode.")
	args = parser.parse_args()

	import sys
	logging.basicConfig(format="%(levelname)-8s %(message)s", level=logging.INFO)
	if args.debug:
	logging.getLogger('convert').setLevel(logging.DEBUG)

	os.makedirs(args.output_dir, exist_ok=True)

	for directory in sorted(find_dirs_at_level(args.input_dir, args.levels)):
	logger.debug(f"Visiting directory {directory}")
	segments = os.path.split(directory)
	name = (segments[-1] if args.name == "directory" else "_".join(segments)) + ".pdf"
	convert_directory_to_pdf(directory, os.path.join(args.output_dir, name), args.min_pages, args.dry_run)