Skip to content

Instantly share code, notes, and snippets.

@Schwusch
Last active September 4, 2017 08:38
Show Gist options
  • Save Schwusch/a0cfb8ec68bf4c9253bc3e814c4b85fe to your computer and use it in GitHub Desktop.
Save Schwusch/a0cfb8ec68bf4c9253bc3e814c4b85fe to your computer and use it in GitHub Desktop.
Compare pdfs with different versions in bulk
# Dependencies: Python 3+, ImageMagick
from os import listdir, system, makedirs
from os.path import isfile, join, splitext
from multiprocessing import Pool
import errno
import argparse
def make_sure_path_exists(path):
try:
makedirs(path)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
def convert(file):
system("convert -density 150 ./{0}/{1} -quality 100 ./{0}_out/{2}-%d.png".format(file[1], file[0],
splitext(file[0])[0]))
def compare(file):
system("compare ./{0}_out/{2} ./{1}_out/{2} ./compare_out/{2}".format(args.new, args.old, file))
def convert_all_pdfs_to_png():
new_pdf_files = [[str(f), args.new] for f in listdir(args.new) if isfile(join(args.new, f))]
old_pdf_files = [[str(f), args.old] for f in listdir(args.old) if isfile(join(args.old, f))]
all_files = []
all_files.extend(new_pdf_files)
all_files.extend(old_pdf_files)
for path in ["./{}_out".format(args.new), "./{}_out".format(args.old), "./compare_out"]:
make_sure_path_exists(path)
with Pool(8) as p:
p.map(convert, all_files)
def compare_old_and_new_pngs():
new_png_files = [str(f) for f in listdir("./{}_out".format(args.new)) if isfile(join("./{}_out".format(args.new), f))]
old_png_files = [str(f) for f in listdir("./{}_out".format(args.old)) if isfile(join("./{}_out".format(args.old), f))]
new_set = set(new_png_files)
old_set = set(old_png_files)
for png in new_set:
if png in old_set:
compare(png)
new_png_files.remove(png)
old_png_files.remove(png)
if len(new_png_files) > 0:
print("---------------------------")
print("Images not processed in \"./{}\":".format(args.new))
for png in new_png_files:
print(png)
if len(old_png_files) > 0:
print("---------------------------")
print("Images not processed in \"./{}\":".format(args.old))
for png in old_png_files:
print(png)
def run():
print("Converting PDF files to PNG...")
convert_all_pdfs_to_png()
print("Done converting.")
print("Comparing outputs from new and old...")
compare_old_and_new_pngs()
print("Done.")
parser = argparse.ArgumentParser(description='Compare different versions of pdfs and see the difference.')
parser.add_argument('--old', nargs='?', help='old pdfs folder', default="old")
parser.add_argument('--new', nargs='?', help='new pdfs folder', default="new")
args = parser.parse_args()
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment