Skip to content

Instantly share code, notes, and snippets.

Created January 29, 2015 12:29
Show Gist options
  • Save xiaohan2012/c5ee8e36a2767f6fa4b1 to your computer and use it in GitHub Desktop.
Save xiaohan2012/c5ee8e36a2767f6fa4b1 to your computer and use it in GitHub Desktop.
Comparing adagrad, adadelta in gradient descent in Theano
Comparing adagrad, adadelta and constant learning in gradient descent(the seddle point function y^2 - x^2)
1. comparison on several learning rate update scheme:
2. Saddle point,
import numpy as np
import theano
import theano.tensor as T
rho = 0.95
epsilon = 0.00001
gamma = 0.1
const_lr = 0.01
init_x = [0.1, 0.1]
x = theano.shared(
np.array(init_x, dtype = theano.config.floatX),
borrow = True,
name = "x"
tolorate = 0.01
params = [x]
param_shapes = [(2,)]
# cost = 0.5 * (x[0]-2) ** 2 + (x[1]-2) ** 2
cost = x[0] ** 2 - x[1] ** 2
param_grads = [T.grad(cost, param) for param in params]
def make_func(x, cost, updates, init_x):
f = theano.function(
inputs = [],
outputs = [x, cost],
updates = updates
return f
def simulate(f, n_epoch_max = 100):
epoch = 0
used_epochs = 0
xs = []
print "##################"
while epoch < n_epoch_max:
x_val, cost_val = f()
# if abs(cost_val) < tolorate:
# break
epoch += 1
used_epochs += 1
return xs, used_epochs
print "Using AdaDelta with rho = %f and epsilon = %f" %(rho, epsilon)
egs = [
value = np.zeros(param_shape,
dtype = theano.config.floatX
borrow = True,
name = "Eg:" +
for param_shape, param in zip(param_shapes, params)
exs = [
value = np.zeros(param_shape,
dtype = theano.config.floatX
borrow = True,
name = "Ex:" +
for param_shape, param in zip(param_shapes, params)
new_egs = [
rho * eg + (1 - rho) * g ** 2
for eg, g in zip(egs, param_grads)
delta_x = [
-(T.sqrt(ex + epsilon) / T.sqrt(new_eg + epsilon)) * g
for new_eg, ex, g in zip(new_egs, exs, param_grads)
new_exs = [
rho * ex + (1 - rho) * (dx ** 2)
for ex, dx in zip(exs, delta_x)
egs_updates = zip(egs, new_egs)
exs_updates = zip(exs, new_exs)
param_updates = [
(p, p + dx)
for dx, g, p in zip(delta_x, param_grads, params)
updates = egs_updates + exs_updates + param_updates
f = make_func(x, cost, updates, init_x)
adadelta_xs, adadelta_epochs = simulate(f)
print "Using AdaGrad with gamma = %f and epsilon = %f" %(gamma, epsilon)
grad_hists = [
value = np.zeros(param_shape,
dtype = theano.config.floatX
borrow = True,
name = "grad_hist:" +
for param_shape, param in zip(param_shapes, params)
new_grad_hists = [
g_hist + g ** 2
for g_hist, g in zip(grad_hists, param_grads)
param_updates = [
(param, param - theano.printing.Print("lr")(gamma * epsilon / (T.sqrt(g_hist) + epsilon)) * param_grad)
for param, param_grad in zip(params, param_grads)
grad_hist_update = zip(grad_hists, new_grad_hists)
updates = grad_hist_update + param_updates
f = make_func(x, cost, updates, init_x)
adagrad_xs, adagrad_epochs = simulate(f)
# constant lr #
print "Usin constant learning rate %f" %(const_lr)
updates = [
(param, param - const_lr * param_grad)
for param, param_grad in zip(params, param_grads)
f = make_func(x, cost, updates, init_x)
const_lr_xs, const_lr_epochs = simulate(f)
from matplotlib import pyplot as plt
def myplot(data, style, title, plot_number, total):
x, y = zip(*data)
plt.plot(x, y, 'ro-')
plt.xlim([-10, 10]); plt.ylim([-10, 10])
"AdaDelta(%d epochs)" %(adadelta_epochs),
1, 3)
"AdaGrad(%d epochs)" %(adagrad_epochs),
2, 3)
"ConstLR(%d epochs)" %(const_lr_epochs),
3, 3)
Copy link

davidsj commented Feb 2, 2015

Any results worth sharing? Thanks. :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment