Skip to content

Instantly share code, notes, and snippets.

@gaebor
Last active February 16, 2018 10:05
Show Gist options
  • Save gaebor/3e55c85a318040acdbf6897264321a8d to your computer and use it in GitHub Desktop.
Save gaebor/3e55c85a318040acdbf6897264321a8d to your computer and use it in GitHub Desktop.
this example shows that narrow matrix product or batched dot product is poorly parallelized
from __future__ import print_function
import numpy
import theano
import sys
import time
n = 100000
i = 10
j = 10
k = 10
def batch_dot(X, Y):
"""discussed with Tim Cooijmans via email
@see http://www.mila.umontreal.ca/Home/people
@see https://github.com/Theano/Theano/pull/3508
"""
if len(sys.argv) < 2 or sys.argv[1] == "builtin":
return theano.tensor.batched_dot(X, Y)
else:
return (M[:,:,:,None]*N[:,None,:,:]).sum(axis=2)
if len(sys.argv) > 4:
n, i, j, k = map(int, sys.argv[1:5])
M=theano.shared(numpy.ones((n, i, j)).astype(theano.config.floatX))
N=theano.shared(numpy.ones((n, j, k)).astype(theano.config.floatX))
f=theano.function([], batch_dot(M,N).sum())
times=[]
for i in range(10):
t1 = time.time()
f_ret = f()
times.append(time.time()-t1)
print("\rf=%g, time=%g, avg_t=%g\t" % (f_ret, times[-1], sum(times)/len(times)), end=" ")
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment