Created November 20, 2012 16:49
Early snippet of chart-parsing recursive autoencoder
import theano
import theano.tensor as T
import numpy as np
import cPickle as pickle
#theano.config.compute_test_value = 'warn'
class Meta(object):
def __init__(self):
self.activation = T.nnet.sigmoid
self.rng = np.random.RandomState(1234)
vocab_size = 25000
self.n_in = 100
self.n_hidden = 100
Initialising weights for variables
r = np.sqrt(6. / (self.n_in + self.n_hidden))
if self.activation == T.nnet.sigmoid:
r *= 4
self.We = theano.shared(value=np.asarray(self.rng.uniform(low=-r,high=r,size=(vocab_size, self.n_in)), dtype=theano.config.floatX),name='We')
self.W12 = theano.shared(value=np.asarray(self.rng.uniform(low=-r,high=r,size=(2 * self.n_in, self.n_hidden)), dtype=theano.config.floatX),name='W12')
self.W34 = theano.shared(value=np.asarray(self.rng.uniform(low=-r,high=r,size=(self.n_hidden, 2 * self.n_in)), dtype=theano.config.floatX),name='W34')
self.b1 = theano.shared(value=np.zeros((self.n_hidden,),dtype=theano.config.floatX),name='b1')
self.b23 = theano.shared(value=np.zeros((2 * self.n_in,),dtype=theano.config.floatX),name='b23')
self.W12grad = theano.shared(value=np.zeros((2 * self.n_in, self.n_hidden), dtype=theano.config.floatX),name='W12grad')
self.W34grad = theano.shared(value=np.zeros((self.n_hidden, 2 * self.n_in), dtype=theano.config.floatX),name='W34grad')
self.b1grad = theano.shared(value=np.zeros((self.n_hidden,),dtype=theano.config.floatX),name='b1grad')
self.b23grad = theano.shared(value=np.zeros((2 * self.n_in,),dtype=theano.config.floatX),name='b23grad')
def resetGradients(self):
self.W12grad = theano.shared(value=np.zeros((2 * self.n_in, self.n_hidden), dtype=theano.config.floatX),name='W12grad')
self.W34grad = theano.shared(value=np.zeros((self.n_hidden, 2 * self.n_in), dtype=theano.config.floatX),name='W34grad')
self.b1grad = theano.shared(value=np.zeros((self.n_hidden,),dtype=theano.config.floatX),name='b1grad')
self.b23grad = theano.shared(value=np.zeros((2 * self.n_in,),dtype=theano.config.floatX),name='b23grad')
def getTreeDict(self,sent_length):
ae = {}
cb = {}
counter = 0
for i in xrange(0,sent_length):
cb[(i,1)] = counter
counter += 1
for p_length in xrange(2,sent_length+1):
for p_left in xrange(0,sent_length - p_length + 1):
for p_split in xrange(p_left+1, p_left+p_length):
ae[(p_left,p_length,p_split)] = counter
counter += 1
cb[(p_left,p_length)] = counter
counter += 1
return counter,ae,cb
def sharedForwardProp(self,data,sent_length,ae,cb):
W = self.Winternal
# Put sentence data into the bottom cb layer
for i in xrange(0,sent_length):
# propagate everything up
for p_length in xrange(2,sent_length+1):
for p_left in xrange(0,sent_length - p_length + 1):
comb_children = []
for p_split in xrange(p_left+1, p_left+p_length):
Forward Propagate autoencoders
y = self.get_shr_enc( ae[(p_left,p_length,p_split)], cb[(p_left,p_split-p_left)], cb[(p_split,p_length+p_left-p_split)])
Forward Propagate combinator
self.set_combined( cb[(p_left,p_length)], np.vstack(comb_children))
def backPropAndCost(self,sent_length,ae,cb):
# CrossEntropy Error at the label level (tree root)
self.deltas[cb[(0,sent_length)]] = 0
for p_length in xrange(sent_length,1,-1):
for p_left in xrange(sent_length - p_length,-1,-1):
nothing to do here. my children can pick up my delta easily
\eta_k &= \sum_{p \in \text{Downstream}} w_{pk} \delta_{p} \\
child_delta = weight * (own_delta)
as weights are shared, we can already multiply them inside the delta matrix
for p_split in xrange(p_left+p_length-1,p_left,-1):
# \delta_{ks} &= o_{ks} (1 - o_{ks}) \left(\eta_k + \delta_{\text{rec}} w_{\text{rec}}\right) \\
# Get eta from parent combinator
delta_tree = self.deltas[cb[(p_left,p_length)]]
# Add reconstruction error
# \delta_{\text{rec}} &= - (r_\text{rec} - o_\text{rec}) (1 - o_\text{rec}) o_\text{rec} * ?
# reconstruction_delta = (my_input - my_reconstruction) (-rec) (1-rec)
my_input = T.concatenate([self.Winternal[cb[(p_left,p_split-p_left)],:],self.Winternal[cb[(p_split,p_left+p_length-p_split)],:]],axis=0)
my_reconstruction =[ae[(p_left,p_length,p_split)],:],self.W34) + self.b23
reconstruction_error = (my_input - my_reconstruction)
self.error += T.sum(T.sqr(reconstruction_error))
reconstruction_delta = reconstruction_error * (- my_reconstruction) * (1 - my_reconstruction)
self.W34grad += T.outer(self.Winternal[ae[(p_left,p_length,p_split)]],reconstruction_delta)
self.b23grad += reconstruction_delta
reconstruction_error = T.sum(self.W34 * reconstruction_delta,axis=1)
delta_combined = delta_tree + reconstruction_error
# Now, multiply with o_ks (1 - o_ks) [ which is my embedding ]
delta_combined *= np.multiply((1 - self.Winternal[ae[(p_left,p_length,p_split)]]),self.Winternal[ae[(p_left,p_length,p_split)]]) #.eval()
z = T.outer(delta_combined,my_input)
self.W12grad += T.outer(my_input,delta_combined)
self.b1grad += delta_combined
delta_p = np.sum(delta_combined * self.W12,axis=1)
#print delta_p.eval().shape
## give delta to my specific children: [0,3,1] gives to [0,1] and [1,2].
self.deltas[cb[(p_left,p_split-p_left)]] += delta_p[:(self.n_hidden)].eval()
self.deltas[cb[(p_split,p_left+p_length-p_split)]] += delta_p[(self.n_hidden):].eval()
print "."
def run(self):
Loading Data
print "loading data ..."
data_shape = 100 * 50 # sentences x words/sentence
data = theano.shared(value=np.zeros(data_shape, dtype=np.int32))
data_np = np.zeros(data_shape, dtype=np.int32)
len_np = np.zeros(100, dtype=np.int32)
data_loc = "../../data/movies"
f = open("%s/data.pkl"%data_loc)
data_list, classes = pickle.load(f), pickle.load(f)
for sent in xrange(0,99):
for word in xrange(0,min(50,len(data_list[sent]))):
data_np[sent*50+word] = data_list[sent][word]
len_np[sent] = min(50,len(data_list[sent]))
data.set_value(np.concatenate([x.ravel() for x in (data_np)]))
data = data.reshape((100,50))
print "... done"
m = 0
for sentence in xrange(0,45):
counter,a,c = self.getTreeDict(len_np[sentence])
if counter > m: m = counter
print "creating data ..."
self.Winternal = theano.shared(value=np.zeros((m,self.n_hidden),dtype=theano.config.floatX),borrow=True)
self.deltas = np.zeros((m,self.n_hidden),dtype=theano.config.floatX)
print ".. done"
self.A = T.vector()
self.B = T.vector()
self.b = T.lscalar()
self.a = T.lscalar()
self.y = T.scalar(dtype='int32')
self.x = T.scalar(dtype='int32')
self.C = T.matrix()
self.cb_shared_combinator = T.mean(self.C,axis=0)
#self.get_shr_comb = theano.function([self.C],self.cb_shared_combinator)
self.set_combined = theano.function([self.a,self.C], [],
updates={self.Winternal: T.set_subtensor(self.Winternal[self.a],self.cb_shared_combinator)})
self.cb_shared_encoding = self.activation([self.A,self.B]),self.W12) + self.b1)
self.get_shr_enc = theano.function([self.a, self.x,self.y],self.cb_shared_encoding,on_unused_input='warn',
givens={self.A: self.Winternal[self.x,:],
self.B: self.Winternal[self.y,:]},
updates={self.Winternal: T.set_subtensor(self.Winternal[self.a,:],self.cb_shared_encoding)})
self.initAE = theano.function([self.a,self.b], [], updates={self.Winternal: T.set_subtensor(self.Winternal[self.a,:], self.We[self.b])})
for sentence in xrange(0,45):
print "Length %d" % len_np[sentence]
counter,a,c = self.getTreeDict(len_np[sentence])
print "Counter %d" % counter
print "forward prop: learning outputs and encoding %d" % sentence
print "backprop: accumulating deltas and error"
self.error = 0
print "Errors", self.error.eval()
print "Updating W12"
print self.W12.eval().shape
print self.W12grad.eval().shape
self.W12 = self.W12 - 0.1 * self.W12grad
print self.W12.eval().shape
print "done"
self.W34 = self.W34 - 0.1 * self.W34grad
self.b1 = self.b1 - 0.1 * self.b1grad
self.b23 = self.b23 - 0.1 * self.b23grad
self.error = 0
print "Errors", self.error.eval()
meta = Meta()
klb3713 commented Oct 15, 2013

Hi, I am researching on this model. Can you share the training data "data.pkl". I didn't know its correct format. Thanks.

Already have an account? Sign in to comment