Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Proof of Concept : generating collisions on a neural perceptual hash
import tensorflow as tf #We need tensorflow 2.x
import numpy as np
#The hashlength in bits
hashLength = 256
def buildModel():
#we can set the seed to simulate the fact that this network is known and doesn't change between runs
#tf.random.set_seed(42)
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(1000)))
model.add(tf.keras.layers.Dense(300,activation=tf.nn.selu))
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu))
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu))
#The last layer contains the LSH Random hyperplanes
model.add(tf.keras.layers.Dense(hashLength))
return model
#This use Random projection LSH (aka "HyperPlane LSH") on the features generated by the model
def computeNeuralHash(m, img):
hash = m(img).numpy()[0]
targetstringhash = "".join(["1" if x > 0 else "0" for x in hash])
return targetstringhash
def demo( ):
m = buildModel()
#np.random.seed(340)
targetimg = np.expand_dims(np.random.randn(1000),0)
print(targetimg.shape)
targetstringhash = computeNeuralHash(m,targetimg)
print("targetstringhash : ")
print(targetstringhash)
flip = [ 1.0 if x=="0" else -1.0 for x in targetstringhash]
print( "flip : ")
print(flip)
img = np.expand_dims(np.random.randn(1000), 0)
#to make sure the hash is more stable we add a gap
gap = 0.1
#when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity]
#when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap]
#Otherwise it get penalized
loss = 1.0 #we initialize loss so that we take at least one iteration
#we use a standard gradient descent
learning_rate = 1e-2
#we can do better using l-bfgs-b optimizer and handle bounds constraints
#we can also add some additional loss to make the result similar to a provided image
#or use a gan-loss to make it look "natural"
while( loss > gap*gap ):
loss = distanceBetweenHashes( m, img, flip, gap ).numpy()
print("loss : ")
print(loss)
grad = gradient( m,img, flip,gap)
img -= learning_rate * grad
imgstringhash = computeNeuralHash(m,img)
print("img : ")
print( img )
#This is not zero : We have found a totally different image
print("targetimg - img : ")
print(targetimg - img)
print("targetstringhash : ")
print(targetstringhash)
print("imgstringhash : ")
print( imgstringhash)
# We should get True if a collision has been successfully produced
print("targetstringhash == imgstringhash : ")
print(targetstringhash == imgstringhash )
def distanceBetweenHashes( model, input, flip , gap ):
loss = tf.nn.l2_loss(tf.nn.relu(model(input) * flip + gap) )
return loss
def gradient(model, x, flip,gap):
input = tf.convert_to_tensor(x, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(input)
loss = distanceBetweenHashes( model, input,flip,gap)
return t.gradient(loss, input).numpy()
if __name__ == "__main__":
demo()
@cazeip
Copy link

cazeip commented Aug 9, 2021

Impressive, I wonder how sensitive Apple’s recognition algorithm will be.

Seeing what they claim (one in a trillion chances of a false positive), I think it’s just going to select hashes that are very close.

@mlajtos
Copy link

mlajtos commented Aug 18, 2021

@KuromeSan
Copy link

KuromeSan commented Aug 18, 2021

ok, so can you do this in reverse? say have two identical looking images, produce completely different hashes? just curious ;)

@danuker
Copy link

danuker commented Aug 18, 2021

ok, so can you do this in reverse? say have two identical looking images, produce completely different hashes? just curious ;)

My guess is yes, you could start by replacing loss with -loss in distanceBetweenHashes.

@dgutson
Copy link

dgutson commented Aug 20, 2021

To the author of the script, would you be interested in giving an academic tech talk about this?

@unrealwill
Copy link
Author

unrealwill commented Aug 20, 2021

@dgutson no thanks

@simplymathematics
Copy link

simplymathematics commented May 20, 2022

To the author of the script, would you be interested in giving an academic tech talk about this?

The technique outlined is fairly generic and well known. See this link, which is pracitcally identical to this.

I'm a PhD student doing research on subverting Computer Vision models and would happily give a talk on this. I already have slides prepared for 20-60 minute lectures. Send me an email at hello@simplymathematics.xyz if you want to discuss more.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment