Skip to content

Instantly share code, notes, and snippets.

@bryant1410
Forked from ptrblck/layernorm_vs_fused
Created September 22, 2019 23:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryant1410/d88a42a4b1a3c2989a1db6c79f07e045 to your computer and use it in GitHub Desktop.
Save bryant1410/d88a42a4b1a3c2989a1db6c79f07e045 to your computer and use it in GitHub Desktop.
import time
import torch
import torch.nn as nn
from apex.normalization import FusedLayerNorm
torch.backends.cudnn.benchmark = True
nb_iters = 10000
# Create data
x = torch.randn(64, 16, 224, 224, device='cuda')
# upstream layernorm
norm = nn.LayerNorm(x.size()[1:]).cuda()
# apex fusedlayernorm
fused_norm = FusedLayerNorm(x.size()[1:]).cuda()
def run(name, module, input_):
# cudnn warmup
for _ in range(50):
_ = module(input_)
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = module(input_)
torch.cuda.synchronize()
t1 = time.time()
print("{} layernorm {:.3f}".format(name, t1 - t0))
run('upstream', norm, x)
run('apex', fused_norm, x)
x = x.half()
run('upstream half', norm.half(), x)
run('apex half', fused_norm.half(), x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment