Created
May 1, 2021 18:36
-
-
Save imneonizer/11ec411605f72c0f3086f1de4c6c41ed to your computer and use it in GitHub Desktop.
Image hashing techniques
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 | |
import hashlib | |
import numpy as np | |
import scipy.fftpack | |
class HashEngine: | |
def __init__(self, htype="dhash", hash_size=8): | |
self.hash_size = hash_size | |
self.htype = htype | |
def load_image(self, image): | |
# either load image from disk or use array from buffer | |
if isinstance(image, str): | |
image = cv2.imread(image, 0) | |
if len(image.shape) >= 3: | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
return cv2.resize(image, (self.hash_size + 1, self.hash_size)) | |
@staticmethod | |
def arr2hex(arr): | |
h, s = 0, [] | |
for i, v in enumerate(arr.flatten()): | |
h += 2**(i % 8) if v else 0 | |
if (i % 8) == 7: | |
s.append(hex(h)[2:].rjust(2, '0')); h = 0 | |
return "".join(s) | |
@staticmethod | |
def hex2arr(hexstr): | |
l = [] | |
if len(hexstr) != 16: | |
raise ValueError('The hex string has the wrong length') | |
for i in range(8): | |
h = hexstr[i*2:i*2+2] | |
v = int("0x" + h, 16) | |
l.append([v & 2**i > 0 for i in range(8)]) | |
return np.array(l) | |
@staticmethod | |
def hex2int(hexstr): | |
return int(hexstr, 16) | |
@staticmethod | |
def distance(a, b): | |
try: | |
# hamming distance between the integers | |
return bin(int(a) ^ int(b)).count("1") | |
except ValueError: | |
# convert hex2int first | |
return bin(int(a, 16) ^ int(b, 16)).count("1") | |
def ahash(self, image): | |
# average hash | |
image = self.load_image(image) | |
avg = image.mean() | |
diff = image > avg | |
return self.arr2hex(diff)[:16] | |
def dhash(self, image): | |
# difference hash | |
image = self.load_image(image) | |
diff = image[:, 1:] > image[:, :-1] | |
return self.arr2hex(diff) | |
def phash(self, image): | |
# perceptual hash | |
image = self.load_image(image) | |
dct = scipy.fftpack.dct(scipy.fftpack.dct(image, axis=0), axis=1) | |
dctlowfreq = dct[:self.hash_size, :self.hash_size] | |
med = np.median(dctlowfreq) | |
diff = dctlowfreq > med | |
return self.arr2hex(diff) | |
def hexdigest(self, image, htype=None): | |
htype = htype or self.htype | |
if htype == "ahash": | |
return self.ahash(image) | |
elif htype == "dhash": | |
return self.dhash(image) | |
elif htype == "phash": | |
return self.phash(image) | |
# all other inbuilt hashing algorithms | |
algorithm = hashlib.new(htype) | |
algorithm.update(self.load_image(image)) | |
return algorithm.hexdigest() | |
H = HashEngine("sha1") | |
hexd1 = H.hexdigest("sample3.jpg") | |
intd1 = H.hex2int(hexd1) | |
print("hexdigest1:", hexd1) | |
print("intdigest1:", intd1) | |
print("-"*10) | |
hexd2 = H.hexdigest("sample3.jpg") | |
intd2 = H.hex2int(hexd2) | |
print("hexdigest2:", hexd2) | |
print("intdigest2:", intd2) | |
print("-"*10) | |
hexdistance = H.distance(hexd1, hexd2) | |
print("hexdistance:", hexdistance) | |
intdistance = H.distance(intd1, intd2) | |
print("intdistance:", intdistance) | |
# hexdigest1: 576f176179c8e3fcb88e86b7f97d82e9b9e2b24f | |
# intdigest1: 499159616530556630584974124528933127117353431631 | |
# ---------- | |
# hexdigest2: 576f176179c8e3fcb88e86b7f97d82e9b9e2b24f | |
# intdigest2: 499159616530556630584974124528933127117353431631 | |
# ---------- | |
# hexdistance: 0 | |
# intdistance: 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment