I hereby claim:
- I am dhruvp on github.
- I am dhruvp (https://keybase.io/dhruvp) on keybase.
- I have a public key ASC0fur_0LU-eVzu1bB6HenqIogLLpB2_eUYrs-MqoO3AAo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
def update_weights(weights, expectation_g_squared, g_dict, decay_rate, learning_rate): | |
""" See here: http://sebastianruder.com/optimizing-gradient-descent/index.html#rmsprop""" | |
epsilon = 1e-5 | |
for layer_name in weights.keys(): | |
g = g_dict[layer_name] | |
expectation_g_squared[layer_name] = decay_rate * expectation_g_squared[layer_name] + (1 - decay_rate) * g**2 | |
weights[layer_name] += (learning_rate * g)/(np.sqrt(expectation_g_squared[layer_name] + epsilon)) | |
g_dict[layer_name] = np.zeros_like(weights[layer_name]) # reset batch gradient buffer |
if episode_number % batch_size == 0: | |
update_weights(weights, expectation_g_squared, g_dict, decay_rate, learning_rate) |
def compute_gradient(gradient_log_p, hidden_layer_values, observation_values, weights): | |
""" See here: http://neuralnetworksanddeeplearning.com/chap2.html""" | |
delta_L = gradient_log_p | |
dC_dw2 = np.dot(hidden_layer_values.T, delta_L).ravel() | |
delta_l2 = np.outer(delta_L, weights['2']) | |
delta_l2 = relu(delta_l2) | |
dC_dw1 = np.dot(delta_l2.T, observation_values) | |
return { | |
'1': dC_dw1, | |
'2': dC_dw2 |
delta_l2 = np.outer(delta_L, weights['2']) | |
delta_l2 = relu(delta_l2) | |
dC_dw1 = np.dot(delta_l2.T, observation_values) | |
return { | |
'1': dC_dw1, | |
'2': dC_dw2 | |
} |
def compute_gradient(gradient_log_p, hidden_layer_values, observation_values, weights): | |
""" See here: http://neuralnetworksanddeeplearning.com/chap2.html""" | |
delta_L = gradient_log_p | |
dC_dw2 = np.dot(hidden_layer_values.T, delta_L).ravel() |
# Tweak the gradient of the log_ps based on the discounted rewards | |
episode_gradient_log_ps_discounted = discount_with_rewards(episode_gradient_log_ps, episode_rewards, gamma) | |
gradient = compute_gradient( | |
episode_gradient_log_ps_discounted, | |
episode_hidden_layer_values, | |
episode_observations, | |
weights | |
) |
# Combine the following values for the episode | |
episode_hidden_layer_values = np.vstack(episode_hidden_layer_values) | |
episode_observations = np.vstack(episode_observations) | |
episode_gradient_log_ps = np.vstack(episode_gradient_log_ps) | |
episode_rewards = np.vstack(episode_rewards) |
# see here: http://cs231n.github.io/neural-networks-2/#losses | |
fake_label = 1 if action == 2 else 0 | |
loss_function_gradient = fake_label - up_probability | |
episode_gradient_log_ps.append(loss_function_gradient) |