Skip to content

Instantly share code, notes, and snippets.

View dhruvp's full-sized avatar

Dhruv Parthasarathy dhruvp

View GitHub Profile

Keybase proof

I hereby claim:

  • I am dhruvp on github.
  • I am dhruvp (https://keybase.io/dhruvp) on keybase.
  • I have a public key ASC0fur_0LU-eVzu1bB6HenqIogLLpB2_eUYrs-MqoO3AAo

To claim this, I am signing this object:

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
def update_weights(weights, expectation_g_squared, g_dict, decay_rate, learning_rate):
""" See here: http://sebastianruder.com/optimizing-gradient-descent/index.html#rmsprop"""
epsilon = 1e-5
for layer_name in weights.keys():
g = g_dict[layer_name]
expectation_g_squared[layer_name] = decay_rate * expectation_g_squared[layer_name] + (1 - decay_rate) * g**2
weights[layer_name] += (learning_rate * g)/(np.sqrt(expectation_g_squared[layer_name] + epsilon))
g_dict[layer_name] = np.zeros_like(weights[layer_name]) # reset batch gradient buffer
if episode_number % batch_size == 0:
update_weights(weights, expectation_g_squared, g_dict, decay_rate, learning_rate)
def compute_gradient(gradient_log_p, hidden_layer_values, observation_values, weights):
""" See here: http://neuralnetworksanddeeplearning.com/chap2.html"""
delta_L = gradient_log_p
dC_dw2 = np.dot(hidden_layer_values.T, delta_L).ravel()
delta_l2 = np.outer(delta_L, weights['2'])
delta_l2 = relu(delta_l2)
dC_dw1 = np.dot(delta_l2.T, observation_values)
return {
'1': dC_dw1,
'2': dC_dw2
delta_l2 = np.outer(delta_L, weights['2'])
delta_l2 = relu(delta_l2)
dC_dw1 = np.dot(delta_l2.T, observation_values)
return {
'1': dC_dw1,
'2': dC_dw2
}
def compute_gradient(gradient_log_p, hidden_layer_values, observation_values, weights):
""" See here: http://neuralnetworksanddeeplearning.com/chap2.html"""
delta_L = gradient_log_p
dC_dw2 = np.dot(hidden_layer_values.T, delta_L).ravel()
# Tweak the gradient of the log_ps based on the discounted rewards
episode_gradient_log_ps_discounted = discount_with_rewards(episode_gradient_log_ps, episode_rewards, gamma)
gradient = compute_gradient(
episode_gradient_log_ps_discounted,
episode_hidden_layer_values,
episode_observations,
weights
)
# Combine the following values for the episode
episode_hidden_layer_values = np.vstack(episode_hidden_layer_values)
episode_observations = np.vstack(episode_observations)
episode_gradient_log_ps = np.vstack(episode_gradient_log_ps)
episode_rewards = np.vstack(episode_rewards)
# see here: http://cs231n.github.io/neural-networks-2/#losses
fake_label = 1 if action == 2 else 0
loss_function_gradient = fake_label - up_probability
episode_gradient_log_ps.append(loss_function_gradient)