Skip to content

Instantly share code, notes, and snippets.

@ptrblck
Created August 30, 2019 15:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save ptrblck/8b1c6a7efd97604a7dedbf2c3edd1019 to your computer and use it in GitHub Desktop.
Save ptrblck/8b1c6a7efd97604a7dedbf2c3edd1019 to your computer and use it in GitHub Desktop.
import torch
import torch.nn as nn
torch.backends.cudnn.benchmark = True
from apex.normalization import FusedLayerNorm
import time
# Create data
x = torch.randn(64, 16, 224, 224, device='cuda')
# upstream layernorm
norm = nn.LayerNorm(x.size()[1:]).cuda()
# cudnn warmup
for _ in range(50):
_ = norm(x)
nb_iters = 1000
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = norm(x)
torch.cuda.synchronize()
t1 = time.time()
print('upstream layernorm {:.3f}'.format(t1 -t0))
# apex fusedlayernorm
fused_norm = FusedLayerNorm(x.size()[1:]).cuda()
# cudnn warmup
for _ in range(50):
_ = fused_norm(x)
nb_iters = 1000
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = fused_norm(x)
torch.cuda.synchronize()
t1 = time.time()
print('apex layernorm {:.3f}'.format(t1 -t0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment