Skip to content

Instantly share code, notes, and snippets.

@xiaohan2012
Created January 29, 2015 12:29
Show Gist options
  • Save xiaohan2012/c5ee8e36a2767f6fa4b1 to your computer and use it in GitHub Desktop.
Save xiaohan2012/c5ee8e36a2767f6fa4b1 to your computer and use it in GitHub Desktop.
Comparing adagrad, adadelta in gradient descent in Theano
"""
Comparing adagrad, adadelta and constant learning in gradient descent(the seddle point function y^2 - x^2)
Reference:
1. comparison on several learning rate update scheme: http://ml.memect.com/archive/2014-12-12/short.html#3786866375172817
2. Saddle point, http://en.wikipedia.org/wiki/Saddle_point
"""
import numpy as np
import theano
import theano.tensor as T
rho = 0.95
epsilon = 0.00001
gamma = 0.1
const_lr = 0.01
init_x = [0.1, 0.1]
x = theano.shared(
np.array(init_x, dtype = theano.config.floatX),
borrow = True,
name = "x"
)
tolorate = 0.01
params = [x]
param_shapes = [(2,)]
# cost = 0.5 * (x[0]-2) ** 2 + (x[1]-2) ** 2
cost = x[0] ** 2 - x[1] ** 2
param_grads = [T.grad(cost, param) for param in params]
def make_func(x, cost, updates, init_x):
x.set_value(init_x)
f = theano.function(
inputs = [],
outputs = [x, cost],
updates = updates
)
return f
def simulate(f, n_epoch_max = 100):
epoch = 0
used_epochs = 0
xs = []
print "##################"
while epoch < n_epoch_max:
x_val, cost_val = f()
xs.append(x_val)
# if abs(cost_val) < tolorate:
# break
epoch += 1
used_epochs += 1
return xs, used_epochs
###############
# ADADELTA #
###############
print "Using AdaDelta with rho = %f and epsilon = %f" %(rho, epsilon)
egs = [
theano.shared(
value = np.zeros(param_shape,
dtype = theano.config.floatX
),
borrow = True,
name = "Eg:" + param.name
)
for param_shape, param in zip(param_shapes, params)
]
exs = [
theano.shared(
value = np.zeros(param_shape,
dtype = theano.config.floatX
),
borrow = True,
name = "Ex:" + param.name
)
for param_shape, param in zip(param_shapes, params)
]
new_egs = [
rho * eg + (1 - rho) * g ** 2
for eg, g in zip(egs, param_grads)
]
delta_x = [
-(T.sqrt(ex + epsilon) / T.sqrt(new_eg + epsilon)) * g
for new_eg, ex, g in zip(new_egs, exs, param_grads)
]
new_exs = [
rho * ex + (1 - rho) * (dx ** 2)
for ex, dx in zip(exs, delta_x)
]
egs_updates = zip(egs, new_egs)
exs_updates = zip(exs, new_exs)
param_updates = [
(p, p + dx)
for dx, g, p in zip(delta_x, param_grads, params)
]
updates = egs_updates + exs_updates + param_updates
f = make_func(x, cost, updates, init_x)
adadelta_xs, adadelta_epochs = simulate(f)
##############
# ADAGRAD #
##############
print "Using AdaGrad with gamma = %f and epsilon = %f" %(gamma, epsilon)
grad_hists = [
theano.shared(
value = np.zeros(param_shape,
dtype = theano.config.floatX
),
borrow = True,
name = "grad_hist:" + param.name
)
for param_shape, param in zip(param_shapes, params)
]
new_grad_hists = [
g_hist + g ** 2
for g_hist, g in zip(grad_hists, param_grads)
]
param_updates = [
(param, param - theano.printing.Print("lr")(gamma * epsilon / (T.sqrt(g_hist) + epsilon)) * param_grad)
for param, param_grad in zip(params, param_grads)
]
grad_hist_update = zip(grad_hists, new_grad_hists)
updates = grad_hist_update + param_updates
f = make_func(x, cost, updates, init_x)
adagrad_xs, adagrad_epochs = simulate(f)
###############
# constant lr #
###############
print "Usin constant learning rate %f" %(const_lr)
updates = [
(param, param - const_lr * param_grad)
for param, param_grad in zip(params, param_grads)
]
f = make_func(x, cost, updates, init_x)
const_lr_xs, const_lr_epochs = simulate(f)
from matplotlib import pyplot as plt
def myplot(data, style, title, plot_number, total):
plt.subplot(1,total,plot_number)
x, y = zip(*data)
plt.plot(x, y, 'ro-')
plt.title(title)
plt.xlim([-10, 10]); plt.ylim([-10, 10])
myplot(adadelta_xs,
'ro-',
"AdaDelta(%d epochs)" %(adadelta_epochs),
1, 3)
myplot(adagrad_xs,
'ro-',
"AdaGrad(%d epochs)" %(adagrad_epochs),
2, 3)
myplot(const_lr_xs,
'ro-',
"ConstLR(%d epochs)" %(const_lr_epochs),
3, 3)
plt.show()
@davidsj
Copy link

davidsj commented Feb 2, 2015

Any results worth sharing? Thanks. :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment