Skip to content

Instantly share code, notes, and snippets.

@minhlab
Last active November 5, 2015 16:02
Show Gist options
  • Save minhlab/109b878c2e50784cbd1e to your computer and use it in GitHub Desktop.
Save minhlab/109b878c2e50784cbd1e to your computer and use it in GitHub Desktop.
I compare the performance of Theano and Torch by two scripts performing multi-classification based on categorical input. These kind of model is important for NLP, similar to Chen & Manning (2014). Surprisingly, Theano is much slower and less accurate than Torch.
Running Torch...
Last cost: 0.01029977761209
Time: 0
Using gpu device 0: GeForce GTX 980
Compiling function...
Running Theano...
Last cost: 3.135782
Time: 6.343242
require('nn')
require('cunn')
function model(emb_dims, inp_num, out_dims)
local mlp = nn.Sequential()
local para = nn.ParallelTable()
for i = 1, inp_num do
para:add(nn.LookupTable(10000, emb_dims))
end
mlp:add(para)
mlp:add(nn.JoinTable(2))
mlp:add(nn.Linear(emb_dims*inp_num, out_dims))
mlp:add(nn.LogSoftMax())
return mlp
end
function train(mlp, criterion, ds_x, ds_y)
local cost = criterion:forward(mlp:forward(ds_x), ds_y)
mlp:zeroGradParameters()
mlp:backward(ds_x, criterion:backward(mlp.output, ds_y))
mlp:updateParameters(0.01)
return cost
end
local mlp = model(10, 5, 23)
local criterion = nn.ClassNLLCriterion()
local ds_x = torch.IntTensor(10000, 5)
local ds_y = torch.IntTensor(10000)
ds_x:random(10000)
ds_y:random(23)
mlp:cuda()
criterion:cuda()
ds_x = ds_x:cuda()
ds_y = ds_y:cuda()
print('Running Torch...')
local start = os.time()
local cost
for i = 1, 1000 do
cost = train(mlp, criterion, ds_x, ds_y)
end
local stop = os.time()
print('Last cost: ' .. cost)
print('Time: ' .. (stop-start))
import theano
from theano import tensor as T
from theano import function
import numpy as np
import time
def model(x, emb_dims, num_inp, out_dims):
E = []
for i in range(num_inp):
E_values = np.asarray(np.random.uniform(low=-0.1, high=0.1,
size=(10000, emb_dims)),
dtype='float32')
E.append(theano.shared(E_values, 'emb-%d' %i))
W_values = np.asarray(np.random.uniform(low=-0.1, high=0.1,
size=(out_dims, emb_dims*num_inp)),
dtype='float32')
W = theano.shared(W_values, 'W')
z = T.concatenate([E[i][x[:,i]] for i in range(num_inp)], axis=1)
a = T.nnet.softmax(T.dot(z, W.T))
return a, E+[W]
if __name__ == '__main__':
x = T.matrix('x', dtype='int64')
y = T.vector('x', dtype='int64')
prob, params = model(x, 10, 5, 23)
cost = T.mean(-T.log(prob[T.arange(y.shape[0]), y]))
grads = T.grad(cost, params)
print('Compiling function...')
train = function([x, y], cost, updates=[(p, p-0.01*g)
for p, g in zip(params, grads)])
ds_x = np.random.randint(10000, size=(10000, 5))
ds_y = np.random.randint(23, size=(10000))
print('Running Theano...')
start = time.time()
for i in range(1000):
cost = train(ds_x, ds_y)
stop = time.time()
print('Last cost: %f' %cost)
print('Time: %f' %(stop-start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment