Created
March 27, 2019 20:51
-
-
Save colesbury/63b2b0ce4eda11ba70db9f3135641cc9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import time | |
N = 1 # batch size | |
x = torch.randn(N, 64, 160, 120).cuda() | |
baseline_layer = torch.nn.Conv2d(64, 64, kernel_size=(3, 3), bias=False).cuda() | |
distilled_layer = torch.nn.Sequential( | |
torch.nn.Conv2d(64, 14, kernel_size=(1, 1), bias=False), | |
torch.nn.Conv2d(14, 15, kernel_size=(3, 3), bias=False), | |
torch.nn.Conv2d(15, 64, kernel_size=(1, 1), bias=False)).cuda() | |
#x = torch.randn(N, 256, 160, 120).cuda() | |
#baseline_layer = torch.nn.Conv2d(256, 128, kernel_size=(1, 1), bias=False).cuda() | |
#distilled_layer = torch.nn.Sequential( | |
# torch.nn.Conv2d(256, 1, kernel_size=(1, 1), bias=False), | |
# torch.nn.Conv2d(1, 128, kernel_size=(1, 1), bias=False)).cuda() | |
# Forward-only (may reduce some bookeeping overhead) | |
torch.set_grad_enabled(False) | |
# without these lines cuDNN sometimes chooses suboptimal algos | |
# (need to investigate further) | |
baseline_layer(x) | |
distilled_layer(x) | |
# With torch.backends.cudnn.benchmark the first call | |
# may be much slower due to benchmarking all cuDNN algos | |
torch.backends.cudnn.benchmark = True | |
baseline_layer(x) | |
distilled_layer(x) | |
# time 1000 forward passes | |
def benchmark(layer, x): | |
torch.cuda.synchronize() | |
start = time.time() | |
for _ in range(1000): | |
layer(x) | |
torch.cuda.synchronize() | |
end = time.time() | |
return end - start | |
for _ in range(4): | |
print('baseline', benchmark(baseline_layer, x)) | |
for _ in range(4): | |
print('distilled', benchmark(distilled_layer, x)) | |
# Run with cuProfiler enabled | |
def loop_with_profiler(layer, x): | |
cudart = torch.cuda._load_cudart() | |
torch.cuda.synchronize() | |
cudart.cuProfilerStart() | |
for _ in range(1000): | |
layer(x) | |
torch.cuda.synchronize() | |
cudart.cuProfilerStop() | |
# uncomment a line below and run with `nvprof --profile-from-start off -- python script.py` | |
# only do one at a time | |
#loop_with_profiler(baseline_layer, x) | |
#loop_with_profiler(distilled_layer, x) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment