Skip to content

Instantly share code, notes, and snippets.

@colesbury
Created March 27, 2019 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save colesbury/63b2b0ce4eda11ba70db9f3135641cc9 to your computer and use it in GitHub Desktop.
Save colesbury/63b2b0ce4eda11ba70db9f3135641cc9 to your computer and use it in GitHub Desktop.
import torch
import time
N = 1 # batch size
x = torch.randn(N, 64, 160, 120).cuda()
baseline_layer = torch.nn.Conv2d(64, 64, kernel_size=(3, 3), bias=False).cuda()
distilled_layer = torch.nn.Sequential(
torch.nn.Conv2d(64, 14, kernel_size=(1, 1), bias=False),
torch.nn.Conv2d(14, 15, kernel_size=(3, 3), bias=False),
torch.nn.Conv2d(15, 64, kernel_size=(1, 1), bias=False)).cuda()
#x = torch.randn(N, 256, 160, 120).cuda()
#baseline_layer = torch.nn.Conv2d(256, 128, kernel_size=(1, 1), bias=False).cuda()
#distilled_layer = torch.nn.Sequential(
# torch.nn.Conv2d(256, 1, kernel_size=(1, 1), bias=False),
# torch.nn.Conv2d(1, 128, kernel_size=(1, 1), bias=False)).cuda()
# Forward-only (may reduce some bookeeping overhead)
torch.set_grad_enabled(False)
# without these lines cuDNN sometimes chooses suboptimal algos
# (need to investigate further)
baseline_layer(x)
distilled_layer(x)
# With torch.backends.cudnn.benchmark the first call
# may be much slower due to benchmarking all cuDNN algos
torch.backends.cudnn.benchmark = True
baseline_layer(x)
distilled_layer(x)
# time 1000 forward passes
def benchmark(layer, x):
torch.cuda.synchronize()
start = time.time()
for _ in range(1000):
layer(x)
torch.cuda.synchronize()
end = time.time()
return end - start
for _ in range(4):
print('baseline', benchmark(baseline_layer, x))
for _ in range(4):
print('distilled', benchmark(distilled_layer, x))
# Run with cuProfiler enabled
def loop_with_profiler(layer, x):
cudart = torch.cuda._load_cudart()
torch.cuda.synchronize()
cudart.cuProfilerStart()
for _ in range(1000):
layer(x)
torch.cuda.synchronize()
cudart.cuProfilerStop()
# uncomment a line below and run with `nvprof --profile-from-start off -- python script.py`
# only do one at a time
#loop_with_profiler(baseline_layer, x)
#loop_with_profiler(distilled_layer, x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment