Created
March 16, 2016 15:32
-
-
Save czotti/55f8432d475c613c598e to your computer and use it in GitHub Desktop.
Pylearn2 RMSProp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_updates(self, learning_rate, grads, lr_scalers=None): | |
""" | |
Provides the symbolic (theano) description of the updates needed to | |
perform this learning rule. See Notes for side-effects. | |
Parameters | |
---------- | |
learning_rate : float | |
Learning rate coefficient. | |
grads : dict | |
A dictionary mapping from the model's parameters to their | |
gradients. | |
lr_scalers : dict | |
A dictionary mapping from the model's parameters to a learning | |
rate multiplier. | |
Returns | |
------- | |
updates : OrderdDict | |
A dictionary mapping from the old model parameters, to their new | |
values after a single iteration of the learning rule. | |
Notes | |
----- | |
This method has the side effect of storing the moving average | |
of the square gradient in `self.mean_square_grads`. This is | |
necessary in order for the monitoring channels to be able | |
to track the value of these moving averages. | |
Therefore, this method should only get called once for each | |
instance of RMSProp. | |
""" | |
updates = OrderedDict() | |
for param in grads: | |
# mean_squared_grad := E[g^2]_{t-1} | |
mean_square_grad = sharedX(param.get_value() * 0.) | |
if param.name is None: | |
raise ValueError("Model parameters must be named.") | |
mean_square_grad.name = 'mean_square_grad_' + param.name | |
if param.name in self.mean_square_grads: | |
warnings.warn("Calling get_updates more than once on the " | |
"gradients of `%s` may make monitored values " | |
"incorrect." % param.name) | |
# Store variable in self.mean_square_grads for monitoring. | |
self.mean_square_grads[param.name] = mean_square_grad | |
# Accumulate gradient | |
new_mean_squared_grad = (self.decay * mean_square_grad + | |
(1 - self.decay) * T.sqr(grads[param])) | |
# Compute update | |
scaled_lr = lr_scalers.get(param, 1.) * learning_rate | |
rms_grad_t = T.sqrt(new_mean_squared_grad) | |
rms_grad_t = T.maximum(rms_grad_t, self.epsilon) | |
delta_x_t = - scaled_lr * grads[param] / rms_grad_t | |
# Apply update | |
updates[mean_square_grad] = new_mean_squared_grad | |
updates[param] = param + delta_x_t | |
return updates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment