Skip to content

Instantly share code, notes, and snippets.

Last active January 24, 2021 07:27
Show Gist options
  • Save mmmikael/0a3d4fae965bdbec1f9d to your computer and use it in GitHub Desktop.
Save mmmikael/0a3d4fae965bdbec1f9d to your computer and use it in GitHub Desktop.
Keras example for siamese training on mnist
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility
import random
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import *
from keras.optimizers import SGD, RMSprop
from keras import backend as K
def euclidean_distance(inputs):
assert len(inputs) == 2, \
'Euclidean distance needs 2 inputs, %d given' % len(inputs)
u, v = inputs
return K.sqrt((K.square(u - v)).sum(axis=1, keepdims=True))
def contrastive_loss(y, d):
""" Contrastive loss from Hadsell-et-al.'06
margin = 1
return K.mean(y * K.square(d) + (1 - y) * K.square(K.maximum(margin - d, 0)))
def create_pairs(x, digit_indices):
""" Positive and negative pair creation.
Alternates between positive and negative pairs.
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def create_base_network(in_dim):
""" Base network to be shared (eq. to feature extraction).
seq = Sequential()
seq.add(Dense(128, input_shape=(in_dim,), activation='relu'))
seq.add(Dense(128, activation='relu'))
seq.add(Dense(128, activation='relu'))
return seq
def compute_accuracy(predictions, labels):
""" Compute classification accuracy with a fixed threshold on distances.
return labels[predictions.ravel() < 0.5].mean()
# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
in_dim = 784
nb_epoch = 20
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
# network definition
# create a Sequential for each element of the pairs
input1 = Sequential()
input2 = Sequential()
# share base network with both inputs
# G_w(input1), G_w(input2) in article
base_network = create_base_network(in_dim)
add_shared_layer(base_network, [input1, input2])
# merge outputs of the base network and compute euclidean distance
# D_w(input1, input2) in article
lambda_merge = LambdaMerge([input1, input2], euclidean_distance)
# create main network
model = Sequential()
# train
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)[tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, batch_size=128, nb_epoch=nb_epoch,
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
# compute final accuracy on training and test sets
pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(pred, tr_y)
pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(pred, te_y)
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
Copy link

vbalnt commented Mar 15, 2016

Hi @mmmikael, any tips on how to extract the individual trained network from the example above for autonomous usage?
E.g something like base_network.predict(input)


Copy link

@vbalnt For that you can use the graph based API instead of using model.add() to add layers. Graph based API will let you define pathways from input to the output, and you compile models from different pathways for your application.

Copy link

In your example, create_base_network() creates a sequential network. If the network is not sequential ( for example Resnet) how can I implement this using Model(). The problem is that Model() need a input which can't be given as we need to share the model

Copy link

have you observed any considerable improvement in performance using siamese neural network.(other than MNIST as a simple CNN also gives good accuracy )

Copy link

FYI ... This code needs some fixing since Keras doesn't have add_shared_layer anymore ... see the same example fixed here in Keras repo:

Copy link

Thank you very much, it helps me a lot for codding and understanding...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment