Skip to content

Instantly share code, notes, and snippets.

@JesseLivezey
Last active May 8, 2017 23:20
Show Gist options
  • Save JesseLivezey/42cabcf87aa0033410f7520933942127 to your computer and use it in GitHub Desktop.
Save JesseLivezey/42cabcf87aa0033410f7520933942127 to your computer and use it in GitHub Desktop.
import numpy as np
import theano
import theano.tensor as T
def numpy_version(prob, cases):
return np.matmul(prob.transpose(2,0,1), cases.T).T
last_dim = 50
dot_dim = 1000
other_dim = 1000
prob = np.random.random((1, dot_dim, last_dim)).astype('float32')
cases = np.random.random((other_dim, dot_dim, last_dim)).astype('float32')
# Setup theano function
prob_s = theano.shared(prob, broadcastable=[True, False, False])
cases_s = theano.shared(cases)
result1 = T.batched_dot(prob_s.dimshuffle(2, 0, 1), cases_s.dimshuffle(2, 1, 0)).dimshuffle(2, 1, 0)
f1 = theano.function([], result1)
result2 = (prob_s * cases_s).sum(axis=1, keepdims=True)
f2 = theano.function([], result2)
# Timing on k40
%timeit numpy_version(prob, cases)
# 10 loops, best of 3: 23.4 ms per loop
%timeit f1()
# 10 loops, best of 3: 35.9 ms per loop
%timeit f2()
# 100 loops, best of 3: 7.95 ms per loop
assert np.allclose(numpy_version(prob, cases), f1())
assert np.allclose(numpy_version(prob, cases), f2())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment