Skip to content

Instantly share code, notes, and snippets.

@Wolfenswan
Last active April 25, 2019 15:22
Show Gist options
  • Save Wolfenswan/918e008ff8667af5287d10143f6e134d to your computer and use it in GitHub Desktop.
Save Wolfenswan/918e008ff8667af5287d10143f6e134d to your computer and use it in GitHub Desktop.
Bulk image processor + pdf creator using PIL & Reportlab
"""
Bulk image processor + pdf creator
I had to digitalize, sort and process a large amoung of archival material page by page. Unfortunately all pages only existed as single .jpgs.
I wrote this script to automate the tedious process of renaming + resizing the images, as well as putting them in a single pdf.
It will:
- process all folders in it's current directory
- create a backup of the original images
- rename all images according to the name of the directory containing them
- resize all images (atm. to dpi150 standards), maintaining ratio
- create a pdf containing all images
It is currently very much tailored to a specific job, but should need be can be made more dynamic and possibly cater to user input to some extent.
"""
import os
import shutil
from reportlab.pdfgen.canvas import Canvas
from PIL import Image, ExifTags
BACKUP_DIR_NAME = '_originale'
JPG_COMPRESSION = 80
def process_folders(root_dir, backup_dir):
for dir, dirs, fileList in os.walk(root_dir, topdown=True):
dirs[:] = [d for d in dirs if not d.startswith(BACKUP_DIR_NAME)] # Ignore files in _backup folder
if dir != root_dir:
process_files(dir, fileList)
def process_files(dir, fileList):
dir_name = os.path.basename(dir)
backup_dir_root = os.path.join(root_dir, BACKUP_DIR_NAME)
backup_dir = os.path.join(backup_dir_root, dir_name)
print(f'Accessing {dir_name}, containing {len(fileList)} files.')
if len([file for file in fileList if file[-4:] in ['.pdf','.PDF']]) == 0:
# Create a backup if necessary
if not os.path.isdir(backup_dir):
print(f'Creating backup of {dir_name}')
shutil.copytree(dir, backup_dir)
# Create basic pdf file
pdf_path = os.path.join(dir,f'{dir_name}.pdf')
pdf = Canvas(pdf_path) # pageCompression =
for i,fname in enumerate(fileList): # loop through all image files
file_type = fname[-4:]
file_path = os.path.join(dir,fname)
new_file = os.path.join(dir,f'{dir_name}_{i}{file_type}')
if os.path.isfile(new_file): # Current duplicate checking is awkward, as Python sorts files with _int suffixes as strings (_1,_10,_2 etc.). If this ever causes issues, apply proper sorting with a regex-check.
print('ERROR: file exists')
# TODO break?
elif file_type in ['.jpg','.JPG','.png','.PNG']:
print(f'Renaming{fname} to {dir_name}_{i}{file_type}')
os.rename(file_path,new_file)
image = scale_image(new_file)
width, height = image.size
pdf.setPageSize((width, height))
pdf.drawImage(new_file, 0, 0, width, height,preserveAspectRatio=True)
pdf.showPage()
print('Writing pdf (might take a while...)')
pdf.save()
else:
print(f'PDF found, ignoring {dir_name}...')
def scale_image(image_path):
image = Image.open(image_path)
w, h = image.size
size = (1754,1240) if w > h else (1240,1754) #Dpi150
#size = (842,595) if w > h else (595,842) # Dpi72
exif_data = {} # To properly rotate the image accessing the exif-values is required
exif_data_raw = image._getexif()
for tag, value in exif_data_raw.items():
decoded_tag = ExifTags.TAGS.get(tag, tag)
exif_data[decoded_tag] = value
if exif_data.get('Orientation',None):
orientation = exif_data['Orientation']
if orientation == 6:
image = image.rotate(270, expand=True)
# more orientations can be added as required, see: https://www.impulseadventure.com/photo/exif-orientation.html
image.thumbnail(size, Image.ANTIALIAS)
image.save(image_path,optimize=True,quality=JPG_COMPRESSION)
return image
if __name__ == '__main__':
root_dir = os.getcwd()
backup_dir = os.path.join(root_dir, BACKUP_DIR_NAME)
if not os.path.isdir(backup_dir): # create backup main directory as required
os.mkdir(backup_dir)
process_folders(root_dir, backup_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment