Skip to content

Instantly share code, notes, and snippets.

@ngimel
Created December 20, 2019 01:08
Show Gist options
  • Save ngimel/cac41f2f92cd95e8bb6781457a2bbce8 to your computer and use it in GitHub Desktop.
Save ngimel/cac41f2f92cd95e8bb6781457a2bbce8 to your computer and use it in GitHub Desktop.
import torch
import time
nlayers = 10
params = []
size = 1024
for _ in range(nlayers):
params.append(torch.randn(size, device="cuda", requires_grad=True))
params.append(torch.randn((size, size), device="cuda", requires_grad=True))
def bench(fn, nrep, a):
#dry tun
c=fn(a)
torch.cuda.synchronize()
start = time.time()
for _ in range(nrep):
c=fn(a)
torch.cuda.synchronize()
end = time.time()
return((end-start)/nrep)
def norm_orig(parameters):
total_norm = 0.0
norm_type = 2
for p in parameters:
param_norm = p.data.norm(2)
total_norm += param_norm ** norm_type
return total_norm
def norm_1(parameters):
norm_type = 2
total_norm = torch.sum(torch.stack([torch.norm(p.data,norm_type) for p in parameters]) ** norm_type)
return total_norm
nrep = 100
t0 = bench(norm_orig, nrep, params)
t1 = bench(norm_1, nrep, params)
print(t0, t1)
size = 1024
for _ in range(20):
nrep = 1000
a=torch.randn(size, device="cuda")
t = bench(torch.norm, nrep, a)
print(size, t, size*a.element_size()*1e-9/t)
size *= 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment