Last active
November 14, 2016 07:29
-
-
Save yusuke0519/0a408aaa98f6e899632d0fe24a24312b to your computer and use it in GitHub Desktop.
温度付きsoftmaxの勾配の大きさの確認
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras.backend as K | |
import numpy as np | |
from keras.layers import Input, Dense, Lambda, Activation | |
from keras.models import Model | |
def temped_softmax(x, T=1): | |
"""Compute softmax values for each sets of scores in x.""" | |
return np.exp(x/T) / np.sum(np.exp(x/T), axis=0) | |
def get_model(T): | |
input = Input(shape=[2]) | |
probs = Dense(100, activation='tanh')(input) | |
probs = Dense(10)(probs) | |
probs = Lambda(lambda x: x/T)(probs) | |
probs = Activation('softmax')(probs) | |
model = Model(input=input, output=probs) | |
model.compile(optimizer='sgd', loss='binary_crossentropy') | |
return model | |
def print_gradients(model, T): | |
X = [[1, 2]] | |
z = np.array([0.1, 1.0, 0.5, 0.01, -0.2, 0.7, 2.0, -0.3, -0.1, 0.01]) | |
s = [temped_softmax(z, T)] | |
weights = model.trainable_weights # weight tensors | |
weights = [weight for weight in weights if model.get_layer(weight.name[:-2]).trainable] # filter down weights tensors to only ones which are trainable | |
gradients = model.optimizer.get_gradients(model.total_loss, weights) # gradient tensors | |
input_tensors = [model.inputs[0], # input data | |
model.sample_weights[0], # how much to weight each sample by | |
model.targets[0], # labels | |
K.learning_phase(), # train or test mode | |
] | |
get_gradients = K.function(inputs=input_tensors, outputs=gradients) | |
inputs = [X, # X | |
[1], # sample weights | |
s, # y | |
0 # learning phase in TEST mode | |
] | |
gradients = get_gradients(inputs) | |
print("Result (T={})".format(T)) | |
for weight, grad in zip(weights, get_gradients(inputs)): | |
print np.mean(grad) | |
model1 = get_model(T=1) | |
model2 = get_model(T=10) | |
model2.set_weights(model1.get_weights()) | |
print_gradients(model1, T=1) | |
print_gradients(model2, T=10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment