-
-
Save owlwang/b8f528ee171ac7a70dd3b61b41bf51c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import onnx | |
from onnx_tf.backend import prepare | |
import tensorflow as tf | |
import onnxruntime | |
from PIL import Image | |
import numpy as np | |
import sys | |
#You need to have model.onnx and neuralhash_128x96_seed1.dat in your working directory | |
#See the previous and more simple collisionLSH gist for a simpler and more general case | |
#What's new : LBFGS-B, using the real specific network, an alternative dual loss function | |
#This is a neural hash preimage, some image take longer to converge others converge in 30 LBFGS steps | |
#Using only CPU the amortized time for a collision is between 10 seconds and 10 minutes | |
#To improve it you can add some additional Losses to make the generated image more natural | |
#Or you can use a trained conditionnal image generator and search for the input of such generator so that when you compute the | |
#hash the distance between hashes is minimized | |
#This are the Apple Shape Constant | |
hashLength = 96 | |
featLength = 128 | |
#This are some paramters you can fiddle with to speed-up the convergence process | |
featureScaling = 100.0 #features are divided by this factor | |
gap = 1.0 #How far we want to be from the hyperplane | |
#if this is not enough we may fail by a few bits due to the postprocessing rounding | |
# and truncating float to uint8 and png compression | |
#When not using scipyOpt | |
learning_rate = 1e-1 #Increasing it makes the code take bigger step meaning faster initial convergence but slower final convergence | |
#Increasing the learning rate too much can easily be observed when the loss function is not decreasing at every step | |
#An alternative loss function | |
useDualLoss = False | |
useScipyOpt = True | |
if( useScipyOpt ): | |
import scipy.optimize | |
def distanceBetweenHashes( input,model, seed1, flip , gap ): | |
features = tf.reshape( model(image=input)['leaf/logits'], (1,featLength)) | |
lshfeat = tf.reduce_sum( features*seed1,axis=1) | |
#we scale the features by 100 as a manual preconditionning step as this won't change the sign of the features | |
#flatfeat = tf.nn.l2_normalize( tf.reshape(lshfeat,(-1,)) ) * featureScaling | |
if( useScipyOpt ): | |
flatfeat = tf.reshape(lshfeat,(-1,)) / featureScaling | |
else: | |
flatfeat = tf.reshape(lshfeat, (-1,)) / featureScaling | |
if useDualLoss == False: | |
loss = tf.nn.l2_loss(tf.nn.relu( flatfeat * flip + gap) ) | |
#alternatively we can use a dual_loss | |
#The hard dual loss would be something like | |
#loss = tf.reduce_max(flatfeat * flip) + gap with stopping criteria loss < 0 | |
#but this is not smooth enough so we make a smooth version from it | |
#with a smoothing length | |
else: | |
smoothing_length= 5.0 | |
val = flatfeat * flip | |
#The stopping criterion is only valid if smoothing_length is high enough and gap high enough | |
#To do things correctly we can use the exact hard dual loss stopping criterion | |
loss = tf.reduce_sum( tf.nn.softmax( smoothing_length*val ) * val) + gap | |
#You can add additional loss terms here to make the produced image more natural | |
return loss | |
def gradient(x, model, seed1, flip , gap): | |
input = tf.convert_to_tensor(x, dtype=tf.float32) | |
with tf.GradientTape() as t: | |
t.watch(input) | |
loss = distanceBetweenHashes(input, model ,seed1,flip,gap) | |
return t.gradient(loss, input).numpy() | |
goalvalue = None | |
#This is the wrapper to provide for scipy that needs flat inputs of double precision | |
def npdistanceBetweenHashes( flatinput, model, seed1, flip, gap): | |
# = args | |
global goalvalue | |
input = np.reshape(flatinput,(1,3,360,360)).astype(np.float32) | |
loss = distanceBetweenHashes(input, model, seed1, flip, gap).numpy() | |
print("loss : ") | |
print(loss) | |
if( useDualLoss == False): | |
if( loss < gap*gap): | |
goalvalue = flatinput | |
raise ValueError("Goal Reached") | |
else: | |
if (loss < 0 ): | |
goalvalue = flatinput | |
raise ValueError("Goal Reached") | |
return loss | |
#This is the wrapper to provide for scipy that needs flat inputs of double precision | |
def npgradient(flatinput, model, seed1, flip , gap): | |
input = np.reshape(flatinput, (1,3, 360, 360)).astype(np.float32) | |
grad = gradient(input,model,seed1,flip,gap) | |
flatgrad= np.reshape(grad,(-1,)) | |
return flatgrad.astype(np.float64) | |
def getArrFromImageName( imgname): | |
image = Image.open(imgname).convert('RGB') | |
image = image.resize([360, 360]) | |
arr = np.array(image).astype(np.float32) / 255.0 | |
arr = arr * 2.0 - 1.0 | |
arr = arr.transpose(2, 0, 1).reshape([1, 3, 360, 360]) | |
return arr | |
def computeHashInteractiveSession( imgname, seed1 ): | |
arr = getArrFromImageName( imgname ) | |
# We check that the | |
session = onnxruntime.InferenceSession("model.onnx") | |
inputs = {session.get_inputs()[0].name: arr} | |
outs = session.run(None, inputs) | |
hash_output = seed1.dot(outs[0].flatten()) | |
hash_bits = ''.join(['1' if it >= 0 else '0' for it in hash_output]) | |
hash_hex = '{:0{}x}'.format(int(hash_bits, 2), len(hash_bits) // 4) | |
return hash_bits, hash_hex | |
def computeHashTF(imgname, compute_features,seed1): | |
arr = getArrFromImageName(imgname) | |
out = compute_features(image=arr) | |
res = out['leaf/logits'].numpy() #tf_rep.outputs[0] | |
hash_output = seed1.dot(res.flatten()) | |
hash_bits = ''.join(['1' if it >= 0 else '0' for it in hash_output]) | |
tfhash_hex = '{:0{}x}'.format(int(hash_bits, 2), len(hash_bits) // 4) | |
return hash_bits,tfhash_hex | |
def demo( imageTargetHash, startingImage, outputimage): | |
#warnings.filterwarnings('ignore') # Ignore all the warning messages in this tutorial | |
model = onnx.load('model.onnx') # Load the ONNX file | |
tf_rep = prepare(model) # Import the ONNX model to Tensorflow | |
tf_rep.export_graph("mytfmodel") #We export it to disk | |
print(tf_rep.inputs) # Input nodes to the model | |
print(tf_rep.outputs) # Output nodes from the model | |
seed1 = open("neuralhash_128x96_seed1.dat",'rb').read()[128:] | |
seed1 = np.frombuffer(seed1, dtype=np.float32) | |
seed1 = seed1.reshape([96, 128]) | |
# Preprocess image | |
hash_bits,hash_hex = computeHashInteractiveSession(imageTargetHash,seed1) | |
#We load the converted model from disk | |
mytfmodel = tf.saved_model.load("mytfmodel") | |
compute_features = mytfmodel.signatures["serving_default"] | |
hash_bits_tf,hash_hex_tf = computeHashTF(imageTargetHash,compute_features,seed1) | |
if( hash_hex != hash_hex_tf): | |
print("something went wrong in the tf model export as hash computed with tensorflow isn't the same as hash computed by onnx") | |
exit() | |
print("Target hash : ") | |
print(hash_hex_tf) | |
flip = [1.0 if x == "0" else -1.0 for x in hash_bits_tf] | |
# when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity] | |
# when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap] | |
# Otherwise it get penalized | |
# we use a standard gradient descent | |
# we can do better using l-bfgs-b optimizer and handle bounds constraints | |
# we can also add some additional loss to make the result similar to a provided image | |
# or use a gan-loss to make it look "natural" | |
initialImage = Image.open(startingImage).convert('RGB') | |
initialImage = initialImage.resize([360, 360]) | |
arr = np.array(initialImage).astype(np.float32) / 255.0 | |
arr = arr * 2.0 - 1.0 | |
arr = arr.transpose(2, 0, 1).reshape([1, 3, 360, 360]) | |
# we initialize loss so that we take at least one iteration | |
loss = distanceBetweenHashes(arr,compute_features, seed1, flip, gap).numpy() | |
print("initial loss : ") | |
print(loss) | |
if useScipyOpt : | |
flatarr = np.reshape(arr, (-1,)).astype(np.float64) | |
print("before scipy.optimize.fmin_l_bfgs_b") | |
bounds = [(-1.00,1.00) for x in flatarr] | |
try: | |
optresult = scipy.optimize.minimize(npdistanceBetweenHashes,flatarr,jac=npgradient, bounds=bounds, | |
args=(compute_features,seed1,flip,gap), | |
method='L-BFGS-B', | |
options={'disp': True, 'maxcor':8}, ) | |
arr = np.reshape(optresult.x, [1, 3, 360, 360]) | |
except Exception as e: | |
print(str(e)) | |
arr = np.reshape(goalvalue,[1,3,360,360]) | |
print("after scipy.optimize.fmin_l_bfgs_b") | |
else: | |
print("will finish when loss <= " + str(gap * gap) ) | |
while (True): | |
grad = gradient(arr,compute_features, seed1, flip, gap) | |
arr -= learning_rate * grad | |
#We constrain the image to its domain | |
arr = np.clip(arr,-1.0,1.0) | |
loss = distanceBetweenHashes(arr,compute_features, seed1, flip, gap).numpy() | |
print("loss : ") | |
print(loss) | |
if useDualLoss == False: | |
if loss < gap * gap: | |
break | |
else: | |
if loss < 0: | |
break | |
#arr now contains the result | |
#At this point the neural hash should match | |
#But the neural hash still needs to survive the rounding and compression steps | |
#The greater the gap parameter the more rare this failure will be | |
#We convert from [-1.0,1.0] -> [0 255] | |
reshaped = arr.reshape([3, 360, 360]).transpose(1,2, 0) | |
reshaped = ( (reshaped + 1) / 2 * 255.0).astype(np.uint8) | |
j = Image.fromarray(reshaped) | |
j.save(outputimage) | |
print("Checking the hash of the output image") | |
outhash_bits_tf,outhash_hex_tf = computeHashTF(outputimage, compute_features, seed1) | |
print("output hash : ") | |
print( outhash_hex_tf) | |
print("target hash : ") | |
print(hash_hex_tf) | |
if( outhash_hex_tf == hash_hex_tf ): | |
print("Collision Found !") | |
else: | |
print("Failed to find collision") | |
if __name__ == "__main__": | |
if len(sys.argv) != 4 : | |
print("usage python3 collisionNeuralHash.py imageTarget.png startingImage.png outputImage.png") | |
exit() | |
#We are trying to generate images that have the same hash as imageTarget.png | |
#Some starting point image are more simple than other | |
#If the optimization get stuck in local minima try another startingImage | |
demo(sys.argv[1],sys.argv[2],sys.argv[3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment