Last active
August 25, 2022 04:41
-
-
Save johnbumgarner/265f3137592a3cf5ba670e84ae404e65 to your computer and use it in GitHub Desktop.
This function is designed to generate comparison scores between two image using aHash from ImageHash.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################################### | |
# The concurrent.futures module is part of the standard Python library which provides | |
# a high level API for launching asynchronous tasks. | |
###################################################################################### | |
import concurrent.futures | |
###################################################################################### | |
# The Python module Pillow is the folk of PIL, the Python Imaging Library | |
# reference: https://pillow.readthedocs.io/en/3.0.x/index.html | |
###################################################################################### | |
# This module is used to load images | |
from PIL import Image | |
####################################################################################################################### | |
# This Python module was developed by Johannes Bucher | |
# source: https://github.com/JohannesBuchner/imagehash | |
# | |
# The module has 4 hashing methods: | |
# | |
# 1. aHash - average hash, for each of the pixels output 1 if the pixel is bigger or equal to the average and | |
# 0 otherwise. | |
# | |
# 2. pHash - perceptive hash, does the same as aHash, but first it does a Discrete Cosine Transformation | |
# | |
# 3. dHash - gradient hash, calculate the difference for each of the pixel and compares the difference with the | |
# average differences. | |
# | |
# 4. wavelet - wavelet hashing, works in the frequency domain as pHash but it uses Discrete Wavelet Transformation | |
# (DWT) instead of DCT | |
# | |
# aHash, pHash and dHash all use the same approach: | |
# 1. Scale an image into a grayscale 8x8 image | |
# 2. Performs some calculations for each of these 64 pixels and assigns a binary 1 or 0 value. These 64 bits form | |
# the output of algorithm | |
# | |
####################################################################################################################### | |
import imagehash | |
############################################################################################# | |
# The OS module in provides functions for interacting with the operating system. | |
# | |
# os.path() provides various functions to handle pathnames. | |
# os.walk() generate the file names in a directory tree by walking the tree. | |
############################################################################################# | |
from os import path, walk | |
def compute_average_hash_computational_score_for_base_image(base_image): | |
""" | |
Calculates the average hash (aHash) for a given image. The aHash (also called Mean Hash) algorithm | |
crunches the an image into a grayscale 8x8 image and sets the 64 bits in the hash based on whether | |
the pixel's value is greater than the average color for the image. | |
:param base_image: image to calculate the average hash for | |
:return: primary image hash | |
""" | |
# load the image and compute its average hash | |
base_hash = imagehash.average_hash(Image.open(base_image)) | |
return base_hash | |
def compute_average_hash_computational_score_for_comparison_image(comparison_image): | |
""" | |
Calculates the average hash (aHash) for a given image. The aHash (also called Mean Hash) algorithm | |
crunches the an image into a grayscale 8x8 image and sets the 64 bits in the hash based on whether | |
the pixel's value is greater than the average color for the image. | |
:param comparison_image: image to calculate the average hash for | |
:return: comparison image hash | |
""" | |
# load the image and compute its average hash | |
comparison_hash = imagehash.average_hash(Image.open(comparison_image)) | |
return comparison_hash | |
def determine_image_similarities(base_hash, comparison_hash): | |
""" | |
Determine the computational score between two different images. | |
:param base_hash: base image hash | |
:param comparison_hash: comparison image hash | |
:return: computational score | |
""" | |
computational_score = (base_hash - comparison_hash) | |
return computational_score | |
image_folder = 'image_directory' | |
primary_hash = compute_average_hash_computational_score_for_base_image('base_image_name.jpeg') | |
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as thread_pool_executor: | |
for (directory_path, directory_names, file_names) in walk(image_folder): | |
for file_name in file_names: | |
accepted_extensions = ('.bmp', '.gif', '.jpg', '.jpeg', '.png', '.svg', '.tiff') | |
if file_name.endswith(accepted_extensions): | |
future = [thread_pool_executor.submit(compute_average_hash_computational_score_for_comparison_image, | |
path.join(directory_path, file_name))] | |
for hash_to_compare in concurrent.futures.as_completed(future): | |
comparison_score = determine_image_similarities(primary_hash, hash_to_compare.result()) | |
print(comparison_score) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment