Skip to content

Instantly share code, notes, and snippets.

@bkaankuguoglu
Last active June 4, 2018 08:19
Show Gist options
  • Save bkaankuguoglu/eef6578b228cd583200f06e15cc6fdc3 to your computer and use it in GitHub Desktop.
Save bkaankuguoglu/eef6578b228cd583200f06e15cc6fdc3 to your computer and use it in GitHub Desktop.
This program contains some utility functions for pdf files, e.g. conversion from pdf to jpg, or vice versa.
#=======================================================================#
# pdf_utils.py #
#=======================================================================#
# usage: pdf_utils.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR] [-m METHOD]
#
# This program contains some utility functions for pdf files.
#
# optional arguments:
# -h, --help show this help message and exit
# -i INPUT_DIR, --input_dir INPUT_DIR
# Input directory for the files to be modified
# -o OUTPUT_DIR, --output_dir OUTPUT_DIR
# Output directory for the files to be modified
# -m METHOD, --method METHOD
# The method that is applied, e.g. for convert
# and merge c+m.
#=======================================================================#
#=======================================================================#
# Sample usage: #
#=======================================================================#
# python pdf_utils.py --input_dir docs --output_dir output --method p2j
# python pdf_utils.py --input_dir output --output_dir output --method p2j
#=======================================================================#
from pdf2image import *
import os
import time
from fpdf import FPDF
import argparse
import glob
arg_keywords = {
"pdf2jpg": "p2j",
"jpg2pdf": "j2p"
}
def convert_pdf(file, file_format, dpi):
if file.endswith(".pdf"):
file_name = os.path.basename(file).split(".")[0]
output_file_dir = os.path.join(output_dir, file_name)
file_path = os.path.join(input_dir, file)
if not os.path.exists(output_file_dir):
os.makedirs(output_file_dir)
with tempfile.TemporaryDirectory() as path:
images_from_path = convert_from_path(file_path, dpi=dpi, output_folder=path)
size = len(images_from_path)
i = 1
for image in images_from_path:
prefix = "> " + file + " is now being converted to " + file_format + " format..." + \
" [" + str(i) + "/" + str(size) + "]"
printProgressBar(i, size, prefix=prefix + ' Progress:', suffix='Complete', length=25)
image.save(os.path.join(output_file_dir, str(i) + file_format))
i += 1
def create_pdf(file, image_dir):
image_list = glob.glob(image_dir + '/*.jpg')
image_list = (sorted(image_list))
size = len(image_list)
pdf = FPDF()
i = 1
for image in image_list:
prefix = "> " + os.path.basename(file) + " is now being generated... " + \
" [" + str(i) + "/" + str(size) + "]"
printProgressBar(i, size, prefix=prefix + ' Progress:', suffix='Complete', length=25)
pdf.add_page()
pdf.image(image, 0, 0, 210, 297)
i += 1
pdf.output(file, "F")
print("Congrats! " + file.split("/")[-1] + " is created.")
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'):
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filled_length = int(length * iteration // total)
bar = fill * filled_length + '-' * (length - filled_length)
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
# Print New Line on Complete
if iteration == total:
print()
def show_license():
print("\n# ======================================================================================= #\n"
"# PDF Utils - Converts pdf files into jpg format, or vice versa. #\n" +
"# Copyright © 2018 - Berk Kaan Kuguoglu #\n" +
"# ======================================================================================= #\n" +
"# usage: pdf_utils.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR] [-m METHOD] #\n" +
"# ======================================================================================= #\n" +
"# optional arguments: #\n" +
"# -h, --help show this help message and exit #\n" +
"# -i INPUT_DIR, --input_dir INPUT_DIR #\n" +
"# Input directory for the files to be modified #\n" +
"# -o OUTPUT_DIR, --output_dir OUTPUT_DIR #\n" +
"# Output directory for the files to be modified #\n" +
"# -m METHOD, --method METHOD #\n" +
"# The method that is applied, e.g. p2j denotes 'pdf to jpg'. #\n" +
"# ======================================================================================= #\n")
if __name__ == '__main__':
show_license()
parser = argparse.ArgumentParser(description="This program contains some utility functions for pdf files.")
parser.add_argument("-i", "--input_dir", help="Input directory for the files to be modified")
parser.add_argument("-o", "--output_dir", help="Output directory for the files to be modified")
parser.add_argument("-m", "--method", help="The method that is applied, e.g. for convert and merge c+m.")
args = parser.parse_args()
input_dir = args.input_dir
output_dir = args.output_dir
method = args.method
if arg_keywords["pdf2jpg"] in method:
for file in sorted(os.listdir(input_dir)):
if file == ".DS_Store":
continue
start = time.time()
convert_pdf(file=file, file_format=".jpg", dpi=300)
finish = time.time()
print("It took " + str(finish - start) + " seconds to convert " + file + " into .jpg format.")
elif arg_keywords["jpg2pdf"] in method:
for file in sorted(os.listdir(input_dir)):
if file == ".DS_Store":
continue
start = time.time()
image_dir = os.path.join(output_dir, file)
pdf_name = file + ".pdf"
pdf_path = os.path.join(image_dir, pdf_name)
create_pdf(pdf_path, image_dir)
finish = time.time()
print("It took " + str(finish-start) + " seconds to merge the document " + pdf_name + ".")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment