Last active
June 4, 2018 08:19
-
-
Save bkaankuguoglu/eef6578b228cd583200f06e15cc6fdc3 to your computer and use it in GitHub Desktop.
This program contains some utility functions for pdf files, e.g. conversion from pdf to jpg, or vice versa.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#=======================================================================# | |
# pdf_utils.py # | |
#=======================================================================# | |
# usage: pdf_utils.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR] [-m METHOD] | |
# | |
# This program contains some utility functions for pdf files. | |
# | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -i INPUT_DIR, --input_dir INPUT_DIR | |
# Input directory for the files to be modified | |
# -o OUTPUT_DIR, --output_dir OUTPUT_DIR | |
# Output directory for the files to be modified | |
# -m METHOD, --method METHOD | |
# The method that is applied, e.g. for convert | |
# and merge c+m. | |
#=======================================================================# | |
#=======================================================================# | |
# Sample usage: # | |
#=======================================================================# | |
# python pdf_utils.py --input_dir docs --output_dir output --method p2j | |
# python pdf_utils.py --input_dir output --output_dir output --method p2j | |
#=======================================================================# | |
from pdf2image import * | |
import os | |
import time | |
from fpdf import FPDF | |
import argparse | |
import glob | |
arg_keywords = { | |
"pdf2jpg": "p2j", | |
"jpg2pdf": "j2p" | |
} | |
def convert_pdf(file, file_format, dpi): | |
if file.endswith(".pdf"): | |
file_name = os.path.basename(file).split(".")[0] | |
output_file_dir = os.path.join(output_dir, file_name) | |
file_path = os.path.join(input_dir, file) | |
if not os.path.exists(output_file_dir): | |
os.makedirs(output_file_dir) | |
with tempfile.TemporaryDirectory() as path: | |
images_from_path = convert_from_path(file_path, dpi=dpi, output_folder=path) | |
size = len(images_from_path) | |
i = 1 | |
for image in images_from_path: | |
prefix = "> " + file + " is now being converted to " + file_format + " format..." + \ | |
" [" + str(i) + "/" + str(size) + "]" | |
printProgressBar(i, size, prefix=prefix + ' Progress:', suffix='Complete', length=25) | |
image.save(os.path.join(output_file_dir, str(i) + file_format)) | |
i += 1 | |
def create_pdf(file, image_dir): | |
image_list = glob.glob(image_dir + '/*.jpg') | |
image_list = (sorted(image_list)) | |
size = len(image_list) | |
pdf = FPDF() | |
i = 1 | |
for image in image_list: | |
prefix = "> " + os.path.basename(file) + " is now being generated... " + \ | |
" [" + str(i) + "/" + str(size) + "]" | |
printProgressBar(i, size, prefix=prefix + ' Progress:', suffix='Complete', length=25) | |
pdf.add_page() | |
pdf.image(image, 0, 0, 210, 297) | |
i += 1 | |
pdf.output(file, "F") | |
print("Congrats! " + file.split("/")[-1] + " is created.") | |
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'): | |
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) | |
filled_length = int(length * iteration // total) | |
bar = fill * filled_length + '-' * (length - filled_length) | |
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r') | |
# Print New Line on Complete | |
if iteration == total: | |
print() | |
def show_license(): | |
print("\n# ======================================================================================= #\n" | |
"# PDF Utils - Converts pdf files into jpg format, or vice versa. #\n" + | |
"# Copyright © 2018 - Berk Kaan Kuguoglu #\n" + | |
"# ======================================================================================= #\n" + | |
"# usage: pdf_utils.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR] [-m METHOD] #\n" + | |
"# ======================================================================================= #\n" + | |
"# optional arguments: #\n" + | |
"# -h, --help show this help message and exit #\n" + | |
"# -i INPUT_DIR, --input_dir INPUT_DIR #\n" + | |
"# Input directory for the files to be modified #\n" + | |
"# -o OUTPUT_DIR, --output_dir OUTPUT_DIR #\n" + | |
"# Output directory for the files to be modified #\n" + | |
"# -m METHOD, --method METHOD #\n" + | |
"# The method that is applied, e.g. p2j denotes 'pdf to jpg'. #\n" + | |
"# ======================================================================================= #\n") | |
if __name__ == '__main__': | |
show_license() | |
parser = argparse.ArgumentParser(description="This program contains some utility functions for pdf files.") | |
parser.add_argument("-i", "--input_dir", help="Input directory for the files to be modified") | |
parser.add_argument("-o", "--output_dir", help="Output directory for the files to be modified") | |
parser.add_argument("-m", "--method", help="The method that is applied, e.g. for convert and merge c+m.") | |
args = parser.parse_args() | |
input_dir = args.input_dir | |
output_dir = args.output_dir | |
method = args.method | |
if arg_keywords["pdf2jpg"] in method: | |
for file in sorted(os.listdir(input_dir)): | |
if file == ".DS_Store": | |
continue | |
start = time.time() | |
convert_pdf(file=file, file_format=".jpg", dpi=300) | |
finish = time.time() | |
print("It took " + str(finish - start) + " seconds to convert " + file + " into .jpg format.") | |
elif arg_keywords["jpg2pdf"] in method: | |
for file in sorted(os.listdir(input_dir)): | |
if file == ".DS_Store": | |
continue | |
start = time.time() | |
image_dir = os.path.join(output_dir, file) | |
pdf_name = file + ".pdf" | |
pdf_path = os.path.join(image_dir, pdf_name) | |
create_pdf(pdf_path, image_dir) | |
finish = time.time() | |
print("It took " + str(finish-start) + " seconds to merge the document " + pdf_name + ".") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment