Skip to content

Instantly share code, notes, and snippets.

@sisp
Created October 22, 2013 13:15
Show Gist options
  • Save sisp/7100561 to your computer and use it in GitHub Desktop.
Save sisp/7100561 to your computer and use it in GitHub Desktop.
ERROR (theano.gof.opt): Optimization failure due to: remove_constants_and_unused_inputs_scan ERROR (theano.gof.opt): TRACEBACK: ERROR (theano.gof.opt): Traceback (most recent call last): File "/home/sigurd/.local/lib/python2.7/site-packages/Theano-0.6.0rc3-py2.7.egg/theano/gof/opt.py", line 1216, in process_node replacements = lopt.transform(nod…
import numpy as np
import theano
import theano.tensor as T
floatX = theano.config.floatX
class GaussNewtonMatrix(object):
def __init__(self, s):
# `s` is the linear network outputs, i.e. the network output
# without having applied the activation function
self._s = s
def __call__(self, v, cost, parameters, damp):
# compute Gauss-Newton Matrix right-multiplied by `v`
Jv = T.Rop(self._s, parameters, v)
HJv = T.grad(T.sum(T.grad(cost, self._s) * Jv), self._s, consider_constant=[Jv])
JHJv = T.grad(T.sum(HJv * self._s), parameters, consider_constant=[HJv, Jv])
# apply Tikhonov damping
JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)]
return JHJv
def run(num_features, num_timesteps, batch_size=1):
# determine shapes of inputs and targets depending on the batch size
if batch_size == 1:
inputs_size = (num_timesteps, num_features)
targets_size = (num_timesteps, 1)
else:
inputs_size = (num_timesteps, batch_size, num_features)
targets_size = (num_timesteps, batch_size, 1)
# make inputs and targets shared variables
inputs = theano.shared(np.random.uniform(size=inputs_size).astype(floatX), borrow=True)
targets = theano.shared(np.random.uniform(size=targets_size).astype(floatX), borrow=True)
# create symbolic inputs and targets variables
x = T.matrix('inputs') if batch_size == 1 else T.tensor3('inputs')
t = T.matrix('targets') if batch_size == 1 else T.tensor3('inputs')
# create a set of parameters for a simple RNN
W_xh = theano.shared(0.01 * np.random.uniform(size=(num_features, 10)).astype(floatX), borrow=True)
W_hh = theano.shared(0.01 * np.random.uniform(size=(10, 10)).astype(floatX), borrow=True)
W_hy = theano.shared(0.01 * np.random.uniform(size=(10, 1)).astype(floatX), borrow=True)
b_h = theano.shared(np.zeros(10).astype(floatX), borrow=True)
b_y = theano.shared(np.zeros(1).astype(floatX), borrow=True)
params = [W_xh, W_hh, W_hy, b_h, b_y]
# recurrent function
def step(x_t, h_tm1):
h = T.tanh(T.dot(h_tm1, W_hh) + T.dot(x_t, W_xh) + b_h)
return h
# build recurrent graph
h_0 = T.alloc(0.0, 10) if batch_size == 1 else T.alloc(0.0, batch_size, 10)
h, updates = theano.scan(step,
sequences=[x],
outputs_info=[h_0])
# network output
y = T.dot(h, W_hy) + b_y
# Create Gauss-Newton-Matrix object. Not really of any use here, but I
# need it for Hessian-Free optimization.
gn = GaussNewtonMatrix(y)
# compute MSE
cost = ((t - y)**2).sum(axis=1).mean()
# Compute the cost at some other point in the parameter space. Not really
# of any use here, but this is how I do it during certain iterations of CG
# in the HF algorithm. There, it's in fact `pi + current update proposal`.
# For simplicity, I just multiply by 2 here.
# ! NOTE: If you comment out the next line and remove the function output
# in line 85, it works for both cases. !
cost_ = theano.clone(cost, replace=dict([(pi, 2*pi) for pi in params]))
# Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG,
# but for simplicity, I just take the parameters vector because it's
# already there.
Gv = gn(v=params, cost=cost, parameters=params, damp=T.constant(1.0))
# compile Theano function
f = theano.function([], [cost_] + Gv, givens={x: inputs, t: targets})
# execute
f()
if __name__ == '__main__':
# This runs fine. The batch size is set to something greater than 1, i.e.
# the data is represented by a tensor3 object.
run(100, 10, batch_size=5)
# This gives an error:
# ERROR (theano.gof.opt): Optimization failure due to: remove_constants_and_unused_inputs_scan
# The batch size is set to 1 and the data is represented by a matrix object.
run(100, 10, batch_size=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment