Skip to content

Instantly share code, notes, and snippets.

@cchan
Created December 28, 2022 03:52
Show Gist options
  • Save cchan/f2bb9a907b4539060becf18aab3d3c34 to your computer and use it in GitHub Desktop.
Save cchan/f2bb9a907b4539060becf18aab3d3c34 to your computer and use it in GitHub Desktop.
A janky benchmark script for autocasted convolution in pytorch
import time
import torch
torch.backends.cuda.matmul.allow_tf32 = False
GROUPS = 64
for k in [3, 5, 7]:
x = torch.rand(16,64,1024,1024, device="cuda", dtype=torch.float32)
weight = torch.rand(64,64//GROUPS,k,k, device="cuda", dtype=torch.float32)
for ctx in [
torch.autocast("cuda", enabled=False),
torch.autocast("cuda", dtype=torch.float16),
torch.autocast("cuda", dtype=torch.bfloat16),
]:
with ctx:
# Warmup
for _ in range(10):
torch.nn.functional.conv2d(x,weight,groups=GROUPS)
torch.cuda.synchronize()
start = time.time()
for _ in range(100):
torch.nn.functional.conv2d(x,weight,groups=GROUPS)
torch.cuda.synchronize()
print((time.time() - start) / 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment