Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save myleott/071da5c68c884b750ca4ec81dc414183 to your computer and use it in GitHub Desktop.
Save myleott/071da5c68c884b750ca4ec81dc414183 to your computer and use it in GitHub Desktop.
import time
import torch
import torch.nn as nn
from apex.normalization import FusedLayerNorm
torch.backends.cudnn.benchmark = True
nb_iters = 10000
# Create data
x = torch.randn(512, 16, 1024, device='cuda')
# upstream layernorm
norm = nn.LayerNorm(x.size()[-1]).cuda()
# apex fusedlayernorm
fused_norm = FusedLayerNorm(x.size()[-1]).cuda()
def run(name, module, input_):
# cudnn warmup
for _ in range(50):
_ = module(input_)
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = module(input_)
torch.cuda.synchronize()
t1 = time.time()
print("{} layernorm {:.3f}".format(name, t1 - t0))
run('upstream', norm, x)
run('apex', fused_norm, x)
x = x.half()
run('upstream half', norm.half(), x)
run('apex half', fused_norm.half(), x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment