Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import torch
import torch.nn as nn
torch.backends.cudnn.benchmark = True
from apex.normalization import FusedLayerNorm
import time
# Create data
x = torch.randn(64, 16, 224, 224, device='cuda')
# upstream layernorm
norm = nn.LayerNorm(x.size()[1:]).cuda()
# cudnn warmup
for _ in range(50):
_ = norm(x)
nb_iters = 1000
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = norm(x)
torch.cuda.synchronize()
t1 = time.time()
print('upstream layernorm {:.3f}'.format(t1 -t0))
# apex fusedlayernorm
fused_norm = FusedLayerNorm(x.size()[1:]).cuda()
# cudnn warmup
for _ in range(50):
_ = fused_norm(x)
nb_iters = 1000
torch.cuda.synchronize()
t0 = time.time()
for _ in range(nb_iters):
_ = fused_norm(x)
torch.cuda.synchronize()
t1 = time.time()
print('apex layernorm {:.3f}'.format(t1 -t0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.