yusuke0519/temped_softmax_gradient.py

## temped_softmax_gradient.py
import keras.backend as K
import numpy as np
from keras.layers import Input, Dense, Lambda, Activation
from keras.models import Model


def temped_softmax(x, T=1):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x/T) / np.sum(np.exp(x/T), axis=0)

def get_model(T):

    input = Input(shape=[2])
    probs = Dense(100, activation='tanh')(input)
    probs = Dense(10)(probs)
    probs = Lambda(lambda x: x/T)(probs)
    probs = Activation('softmax')(probs)

    model = Model(input=input, output=probs)
    model.compile(optimizer='sgd', loss='binary_crossentropy')

    return model


def print_gradients(model, T):

    X = [[1, 2]]
    z = np.array([0.1, 1.0, 0.5, 0.01, -0.2, 0.7, 2.0, -0.3, -0.1, 0.01])
    s = [temped_softmax(z, T)]


    weights = model.trainable_weights # weight tensors
    weights = [weight for weight in weights if model.get_layer(weight.name[:-2]).trainable] # filter down weights tensors to only ones which are trainable
    gradients = model.optimizer.get_gradients(model.total_loss, weights) # gradient tensors
    input_tensors = [model.inputs[0], # input data
                     model.sample_weights[0], # how much to weight each sample by
                     model.targets[0], # labels
                     K.learning_phase(), # train or test mode
    ]

    get_gradients = K.function(inputs=input_tensors, outputs=gradients)


    inputs = [X, # X
              [1], # sample weights
              s, # y
              0 # learning phase in TEST mode
    ]
    gradients = get_gradients(inputs)

    print("Result (T={})".format(T))
    for weight, grad in zip(weights, get_gradients(inputs)):
        print np.mean(grad)


model1 = get_model(T=1)
model2 = get_model(T=10)
model2.set_weights(model1.get_weights())

print_gradients(model1, T=1)
print_gradients(model2, T=10)
	import keras.backend as K
	import numpy as np
	from keras.layers import Input, Dense, Lambda, Activation
	from keras.models import Model


	def temped_softmax(x, T=1):
	"""Compute softmax values for each sets of scores in x."""
	return np.exp(x/T) / np.sum(np.exp(x/T), axis=0)

	def get_model(T):

	input = Input(shape=[2])
	probs = Dense(100, activation='tanh')(input)
	probs = Dense(10)(probs)
	probs = Lambda(lambda x: x/T)(probs)
	probs = Activation('softmax')(probs)

	model = Model(input=input, output=probs)
	model.compile(optimizer='sgd', loss='binary_crossentropy')

	return model


	def print_gradients(model, T):

	X = [[1, 2]]
	z = np.array([0.1, 1.0, 0.5, 0.01, -0.2, 0.7, 2.0, -0.3, -0.1, 0.01])
	s = [temped_softmax(z, T)]


	weights = model.trainable_weights # weight tensors
	weights = [weight for weight in weights if model.get_layer(weight.name[:-2]).trainable] # filter down weights tensors to only ones which are trainable
	gradients = model.optimizer.get_gradients(model.total_loss, weights) # gradient tensors
	input_tensors = [model.inputs[0], # input data
	model.sample_weights[0], # how much to weight each sample by
	model.targets[0], # labels
	K.learning_phase(), # train or test mode
	]

	get_gradients = K.function(inputs=input_tensors, outputs=gradients)


	inputs = [X, # X
	[1], # sample weights
	s, # y
	0 # learning phase in TEST mode
	]
	gradients = get_gradients(inputs)

	print("Result (T={})".format(T))
	for weight, grad in zip(weights, get_gradients(inputs)):
	print np.mean(grad)


	model1 = get_model(T=1)
	model2 = get_model(T=10)
	model2.set_weights(model1.get_weights())

	print_gradients(model1, T=1)
	print_gradients(model2, T=10)