Last active
October 8, 2016 15:13
-
-
Save aelaguiz/4455549 to your computer and use it in GitHub Desktop.
Quick & Dirty Theano benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import theano | |
import theano.tensor as T | |
import config | |
import time | |
def theano_softmax(): | |
x = T.fmatrix('x') | |
_y = T.nnet.softmax(x) | |
f = theano.function([x], _y) | |
return f | |
def theano_p_y_given_x(): | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
y = T.nnet.softmax(input) | |
f = theano.function([x, w, b], y) | |
return f | |
def theano_argmax(): | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
y = T.nnet.softmax(input) | |
a = T.argmax(y, axis=1) | |
f = theano.function([x, w, b], a) | |
return f | |
def theano_neg_log_likelihood(): | |
y = T.ivector('y') | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
p_y_given_x = T.nnet.softmax(input) | |
neg_like =\ | |
-T.mean( | |
T.log( | |
p_y_given_x)[ | |
T.arange(y.shape[0]), y]) | |
return theano.function([x, w, b, y], neg_like) | |
def theano_neg_log_likelihood_prime(): | |
y = T.ivector('y') | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
p_y_given_x = T.nnet.softmax(input) | |
neg_like =\ | |
-T.mean( | |
T.log( | |
p_y_given_x)[ | |
T.arange(y.shape[0]), y]) | |
g_W = T.grad(cost=neg_like, wrt=w) | |
return theano.function([x, w, b, y], g_W) | |
def softmax(w): | |
w = numpy.array(w) | |
#print "w = ", w | |
maxes = numpy.amax(w, axis=1) | |
maxes = maxes.reshape(maxes.shape[0], 1) | |
e = numpy.exp(w - maxes) | |
#print "e =", e | |
dist = e / numpy.sum(e, axis=1).reshape(maxes.shape[0], 1) | |
return dist | |
def p_y_given_x(X, w, b): | |
dt = numpy.dot(X, w) + b | |
return softmax(dt) | |
def argmax(X, w, b): | |
return numpy.argmax(p_y_given_x(X, w, b), axis=1) | |
def neg_log_likelihood(X, w, b, y): | |
r = numpy.arange(y.shape[0]) | |
l = numpy.log(p_y_given_x(X, w, b)) | |
return -numpy.mean( | |
numpy.log( | |
p_y_given_x(X, w, b))[ | |
numpy.arange(y.shape[0]), y]) | |
theano_time = 0 | |
our_time = 0 | |
theano_procs = { | |
'softmax': theano_softmax(), | |
'argmax': theano_argmax(), | |
'p_y_given_x': theano_p_y_given_x(), | |
'neg_log_likelihood': theano_neg_log_likelihood() | |
} | |
num_samples = 20000 | |
num_features = 784 | |
num_outputs = 10 | |
for i in range(1): | |
# floatX = float32 in config | |
X = numpy.array(numpy.random.rand(num_samples, num_features), dtype=config.floatX) | |
w = numpy.array(numpy.random.rand(num_features, num_outputs), dtype=config.floatX) | |
b = numpy.array(numpy.random.rand(num_outputs), dtype=config.floatX) | |
y = numpy.array(numpy.random.random_integers(0, num_outputs-1, num_samples), dtype='int32') | |
start_time = time.time() | |
theirs = theano_procs['softmax'](X) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = softmax(X) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano" | |
#print theirs | |
#print "Ours" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['p_y_given_x'](X, w, b) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = p_y_given_x(X, w, b) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano P(y) given X:" | |
#print theirs | |
#print "Our P(y) given X:" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['argmax'](X, w, b) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = argmax(X, w, b) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano argmax:" | |
#print theirs | |
#print "Our argmax" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['neg_log_likelihood'](X, w, b, y) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = neg_log_likelihood(X, w, b, y) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano negative log likelihood:" | |
#print theirs | |
#print "Our negative log likelihood" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
print "Theano Time", theano_time | |
print "Our time", our_time |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I updated the code per your remarks, switched off of double precision math - using large inputs rather than many small calls.
num_samples = 20k
Device = cpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Theano Time 0.359999895096
Our time 0.466942310333
Device = gpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Using gpu device 0: GeForce GT 650M
Theano Time 0.327509880066
Our time 0.481206655502
with num_samples = 50k
Device = cpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Theano Time 0.964176416397
Our time 1.20006990433
Device = gpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Using gpu device 0: GeForce GT 650M
Theano Time 0.729407787323
Our time 1.18586134911
All is right in the world, thank you!