Skip to content

Instantly share code, notes, and snippets.

@syed-ahmed
Created May 8, 2019 23:09
Show Gist options
  • Save syed-ahmed/f8b7384d642f4bce484228b508b4bc68 to your computer and use it in GitHub Desktop.
Save syed-ahmed/f8b7384d642f4bce484228b508b4bc68 to your computer and use it in GitHub Desktop.
import torch
size = 128*512
nrep =100
import time
for i in range(10):
a=torch.Tensor(size).cuda().uniform_()
torch.cuda.synchronize()
start = time.time()
#dry run to alloc
out = a.uniform_()
torch.cuda.synchronize()
start = time.time()
for i in range(nrep):
out = a.uniform_()
torch.cuda.synchronize()
end = time.time()
timec = (end-start)/nrep
print("uniform, size, elements", size, "forward", timec, "bandwidth (GB/s)", size*(4)*1e-9/timec)
size *=2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment