Skip to content

Instantly share code, notes, and snippets.

@ailzhang
Created November 29, 2017 05:54
Show Gist options
  • Save ailzhang/568991eb2a2f685c09562699b849d9fd to your computer and use it in GitHub Desktop.
Save ailzhang/568991eb2a2f685c09562699b849d9fd to your computer and use it in GitHub Desktop.
import torch
import torch.nn as nn
import time
import subprocess
import argparse
import numpy as np
from torch.autograd import Variable
import pdb
def linearforward(batchsize, dim_in, dim_out):
data = np.random.random_sample([batchsize, dim_in])
data = torch.FloatTensor(data)
data_in = data.cuda()
data_in = Variable(data_in)
net = nn.Linear(dim_in, dim_out).cuda()
torch.cuda.synchronize()
start = time.time()
data_out = net(data_in)
torch.cuda.synchronize()
end = time.time()
return end -start
if __name__=="__main__":
shapes = [(22764, 2276), (2276, 30), (21740, 2174), (2174, 1024), (1024, 63), (21740, 2174), (2174, 1087), (1024, 1024)]
sizes = []
for i in (32, 64, 128, 256):
for dim_in, dim_out in shapes:
sizes.append((i, dim_in, dim_out))
sizes.append((i, dim_out, dim_in))
sizes.append((dim_in, i, dim_out))
rep = 200
warmup = 100
result = {}
for batchsize, dim_in, dim_out in sizes:
time_acc= 0.0
for i in range(rep):
t = linearforward(batchsize, dim_in, dim_out)
if i >= warmup:
time_acc = time_acc + t
avg = time_acc / (rep - warmup)
flops = batchsize * dim_in * dim_out * 2 / avg
# cublas baseline
cu_out = subprocess.run(['./matrixMulCUBLAS', '--hA='+str(batchsize), '--wA='+str(dim_in), '--wB='+str(dim_out)], stdout=subprocess.PIPE)
cu_out = str(cu_out.stdout)
anchor1 = cu_out.find("Performance=")
anchor2 = cu_out.find("GFlop/s")
cu_flops = float(cu_out[anchor1+13: anchor2-1])
p100_peak = 9.3
result[(batchsize, dim_in, dim_out)] = (flops, flops / (p100_peak * 10**12), cu_flops, cu_flops / 1000 / p100_peak)
print(result)
np.save('p100_pt3.npy', result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment