Skip to content

Instantly share code, notes, and snippets.

@yukoba
Last active September 10, 2016 06:12
Show Gist options
  • Save yukoba/9a4fa1be54992f1d600b79069c9674e2 to your computer and use it in GitHub Desktop.
Save yukoba/9a4fa1be54992f1d600b79069c9674e2 to your computer and use it in GitHub Desktop.
AdaGrad + stochastic gradient descent using theano.scan()
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
# AdaGrad + stochastic gradient descent using theano.scan()
train_x = np.random.rand(100)
train_y = train_x + np.random.rand(100) * 0.01
def fn(i, params, r, data_x, data_y, learning_rate):
y = params[0] * data_x[i] + params[1]
cost = (data_y[i] - y) ** 2
g = T.grad(cost, params)
r += g ** 2
return params - learning_rate / T.sqrt(r) * g, r
init_params = T.dvector()
init_r = T.dvector()
data_x = T.dvector()
data_y = T.dvector()
indices = RandomStreams().permutation([100], data_y.shape[0])
result, updates = theano.scan(fn=fn,
sequences=indices.flatten(),
outputs_info=(init_params, init_r),
non_sequences=(data_x, data_y, 3))
f = theano.function([init_params],
result[0][-1],
givens={
init_r: np.array([1e-8, 1e-8]),
data_x: train_x,
data_y: train_y
},
updates=updates)
print(f(np.array([0.5, 0.5]))) # [about 1, about 0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment