-
-
Save aelaguiz/4455549 to your computer and use it in GitHub Desktop.
import numpy | |
import theano | |
import theano.tensor as T | |
import config | |
import time | |
def theano_softmax(): | |
x = T.fmatrix('x') | |
_y = T.nnet.softmax(x) | |
f = theano.function([x], _y) | |
return f | |
def theano_p_y_given_x(): | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
y = T.nnet.softmax(input) | |
f = theano.function([x, w, b], y) | |
return f | |
def theano_argmax(): | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
y = T.nnet.softmax(input) | |
a = T.argmax(y, axis=1) | |
f = theano.function([x, w, b], a) | |
return f | |
def theano_neg_log_likelihood(): | |
y = T.ivector('y') | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
p_y_given_x = T.nnet.softmax(input) | |
neg_like =\ | |
-T.mean( | |
T.log( | |
p_y_given_x)[ | |
T.arange(y.shape[0]), y]) | |
return theano.function([x, w, b, y], neg_like) | |
def theano_neg_log_likelihood_prime(): | |
y = T.ivector('y') | |
x = T.fmatrix('x') | |
w = T.fmatrix('w') | |
b = T.dvector('b') | |
input = T.dot(x, w) + b | |
p_y_given_x = T.nnet.softmax(input) | |
neg_like =\ | |
-T.mean( | |
T.log( | |
p_y_given_x)[ | |
T.arange(y.shape[0]), y]) | |
g_W = T.grad(cost=neg_like, wrt=w) | |
return theano.function([x, w, b, y], g_W) | |
def softmax(w): | |
w = numpy.array(w) | |
#print "w = ", w | |
maxes = numpy.amax(w, axis=1) | |
maxes = maxes.reshape(maxes.shape[0], 1) | |
e = numpy.exp(w - maxes) | |
#print "e =", e | |
dist = e / numpy.sum(e, axis=1).reshape(maxes.shape[0], 1) | |
return dist | |
def p_y_given_x(X, w, b): | |
dt = numpy.dot(X, w) + b | |
return softmax(dt) | |
def argmax(X, w, b): | |
return numpy.argmax(p_y_given_x(X, w, b), axis=1) | |
def neg_log_likelihood(X, w, b, y): | |
r = numpy.arange(y.shape[0]) | |
l = numpy.log(p_y_given_x(X, w, b)) | |
return -numpy.mean( | |
numpy.log( | |
p_y_given_x(X, w, b))[ | |
numpy.arange(y.shape[0]), y]) | |
theano_time = 0 | |
our_time = 0 | |
theano_procs = { | |
'softmax': theano_softmax(), | |
'argmax': theano_argmax(), | |
'p_y_given_x': theano_p_y_given_x(), | |
'neg_log_likelihood': theano_neg_log_likelihood() | |
} | |
num_samples = 20000 | |
num_features = 784 | |
num_outputs = 10 | |
for i in range(1): | |
# floatX = float32 in config | |
X = numpy.array(numpy.random.rand(num_samples, num_features), dtype=config.floatX) | |
w = numpy.array(numpy.random.rand(num_features, num_outputs), dtype=config.floatX) | |
b = numpy.array(numpy.random.rand(num_outputs), dtype=config.floatX) | |
y = numpy.array(numpy.random.random_integers(0, num_outputs-1, num_samples), dtype='int32') | |
start_time = time.time() | |
theirs = theano_procs['softmax'](X) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = softmax(X) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano" | |
#print theirs | |
#print "Ours" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['p_y_given_x'](X, w, b) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = p_y_given_x(X, w, b) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano P(y) given X:" | |
#print theirs | |
#print "Our P(y) given X:" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['argmax'](X, w, b) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = argmax(X, w, b) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano argmax:" | |
#print theirs | |
#print "Our argmax" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
start_time = time.time() | |
theirs = theano_procs['neg_log_likelihood'](X, w, b, y) | |
theano_time += (time.time() - start_time) | |
start_time = time.time() | |
ours = neg_log_likelihood(X, w, b, y) | |
our_time += (time.time() - start_time) | |
#print "---------------------" | |
#print "Theano negative log likelihood:" | |
#print theirs | |
#print "Our negative log likelihood" | |
#print ours | |
#print "---------------------" | |
#print "" | |
assert numpy.array_equal(numpy.around(theirs), numpy.around(ours)) | |
print "Theano Time", theano_time | |
print "Our time", our_time |
Fantastic feedback, I'll update this and try again. Thanks!
I updated the code per your remarks, switched off of double precision math - using large inputs rather than many small calls.
num_samples = 20k
Device = cpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Theano Time 0.359999895096
Our time 0.466942310333
Device = gpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Using gpu device 0: GeForce GT 650M
Theano Time 0.327509880066
Our time 0.481206655502
with num_samples = 50k
Device = cpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Theano Time 0.964176416397
Our time 1.20006990433
Device = gpu
(venv)amir-laptop:trans_code aelaguiz$ python sgd_notes.py
Using gpu device 0: GeForce GT 650M
Theano Time 0.729407787323
Our time 1.18586134911
All is right in the world, thank you!
A couple things: