Last active
May 24, 2022 13:22
-
-
Save emuccino/675371c393953f306c191d0d68f49075 to your computer and use it in GitHub Desktop.
generate_adversaries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#custom activation function for keeping adversarial pixel values between 0.0 and 1.0 | |
def clip(x): | |
return K.clip(x, 0.0, 1.0) | |
#custom loss funciton for non-targeted misclassification | |
def negative_categorical_crossentropy(yTrue,yPred): | |
return 0.0 - K.categorical_crossentropy(yTrue,yPred) | |
#add custom objects to dictionary | |
get_custom_objects().update({'clip': Activation(clip)}) | |
get_custom_objects().update({'negative_categorical_crossentropy': negative_categorical_crossentropy}) | |
#function for generating an adversarial example given a base image, adversarial class target, classifier, and regularization type | |
def generate_adversary(img,target,model,regularization,loss_function): | |
#input for base image | |
image = Input(shape=(28,28,1),name='image') | |
#unit input for adversarial noise | |
one = Input(shape=(1,),name='unity') | |
#layer for learning adversarial noise to apply to image | |
noise = Dense(28*28,activation = None,use_bias=False,kernel_initializer='random_normal', | |
kernel_regularizer=regularization, name='adversarial_noise')(one) | |
#reshape noise in shape of image | |
noise = Reshape((28,28,1),name='reshape')(noise) | |
#add noise to image | |
net = Add(name='add')([noise,image]) | |
#clip values to be within 0.0 and 1.0 | |
net = Activation('clip',name='clip_values')(net) | |
#feed adversarial image to trained MNIST classifier | |
outputs = model(net) | |
adversarial_model = Model(inputs=[image,one], outputs=outputs) | |
#freeze trained MNIST classifier layers | |
adversarial_model.layers[-1].trainable = False | |
adversarial_model.compile(optimizer='nadam', loss=loss_function, metrics=[categorical_accuracy]) | |
#target adversarial classification | |
target_vector = np.zeros(10) | |
target_vector[target] = 1. | |
#callback for saving weights with smallest loss | |
checkpoint = ModelCheckpoint('./adversarial_weights.h5', monitor='loss', verbose=0, save_best_only=True, save_weights_only=True, | |
mode='auto', period=1) | |
#train adversarial image | |
adversarial_model.fit(x={'image':img,'one':np.ones(shape=(1,1))},y=target_vector.reshape(1,-1),epochs=10000,verbose=0, | |
callbacks=[checkpoint]) | |
#restore best weights | |
adversarial_model.load_weights('./adversarial_weights.h5') | |
#quantize adversarial noise | |
quantized_weights = np.round(adversarial_model.get_weights()[0].reshape((28,28)) * 255.) / 255. | |
#add trained weights to original image and clip values to produce adversarial image | |
adversarial_img = np.clip(img.reshape((28,28)) + quantized_weights, 0., 1.) | |
#display adversarial image | |
plt.imshow(adversarial_img,vmin=0., vmax=1.) | |
plt.show() | |
#classify adversarial image | |
adversarial_prediction = mnist_model.predict(adversarial_img.reshape((1,28,28,1))) | |
print(adversarial_prediction) | |
return adversarial_img | |
generate_adversary(img,5,mnist_model,l1(0.01),'negative_categorical_crossentropy') | |
generate_adversary(img,5,mnist_model,l2(0.01),'negative_categorical_crossentropy') | |
generate_adversary(img,5,mnist_model,l1_l2(l1=0.01,l2=0.01),'negative_categorical_crossentropy') | |
generate_adversary(img,9,mnist_model,l1(0.01),'categorical_crossentropy') | |
generate_adversary(img,9,mnist_model,l2(0.01),'categorical_crossentropy') | |
generate_adversary(img,9,mnist_model,l1_l2(l1=0.01,l2=0.01),'categorical_crossentropy') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Typo on line 5 (funciton -> function) :) Thanks for the example!