Created
June 12, 2019 18:35
-
-
Save lopuhin/255992a255810407e2c42a5513e20a13 to your computer and use it in GitHub Desktop.
Repro script for https://github.com/intel/mkl-dnn/issues/489
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ['OMP_NUM_THREADS'] = '1' | |
import argparse | |
import resource | |
import sys | |
import time | |
import numpy as np | |
import torch | |
from torch import nn | |
from torchvision.models import resnet34 | |
def main(): | |
parser = argparse.ArgumentParser() | |
arg = parser.add_argument | |
arg('--mkldnn', action='store_true') | |
arg('--bin', action='store_true') | |
arg('--width', type=int, default=320) | |
arg('--min-height', type=int, default=200) | |
arg('--max-height', type=int, default=7680) | |
arg('--n', type=int, default=1000) | |
arg('--seed', type=int, default=42) | |
args = parser.parse_args() | |
print(f'Running with mkldnn={args.mkldnn} bin={args.bin}') | |
rng = np.random.RandomState(args.seed) | |
heights = [np.clip(int(100 * np.exp(rng.normal(1, 1.8))), | |
args.min_height, args.max_height) | |
for _ in range(args.n)] | |
hp50, hp95 = np.percentile(heights, [50, 95]) | |
print(f'heights: mean={np.mean(heights):.0f}, p50={hp50:.0f} ' | |
f'p95={hp95:.0f} max={np.max(heights):.0f}') | |
model = Model() | |
model.eval() | |
if args.mkldnn: | |
model._apply(lambda x: x.to_mkldnn() if x.dtype == torch.float32 else x) | |
start_memory = get_ru_maxrss() | |
times = [] | |
for i, height in enumerate(heights): | |
if i and i % 100 == 0: | |
print(f'n={i} memory growth (kb): ' | |
f'{get_ru_maxrss() - start_memory:,}') | |
if args.bin: | |
if height < args.min_height * 3: | |
pad_to = args.min_height // 4 | |
elif height < args.min_height * 6: | |
pad_to = args.min_height // 2 | |
elif height < args.min_height * 12: | |
pad_to = args.min_height | |
else: | |
pad_to = args.min_height * 2 | |
height += (pad_to - height % pad_to) | |
assert height % pad_to == 0 | |
x = torch.randn((1, 3, height, args.width)) | |
t0 = time.perf_counter() | |
with torch.no_grad(): | |
if args.mkldnn: | |
x = x.to_mkldnn() | |
y = model(x) | |
if args.mkldnn: | |
y = y.to_dense() | |
assert y.mean() != 42 # to be extra sure it's evaluated | |
times.append(time.perf_counter() - t0) | |
end_memory = get_ru_maxrss() | |
tp50, tp95 = np.percentile(times, [50, 95]) | |
print(f'time: mean={np.mean(times):.3f} s, ' | |
f'p50={tp50:.3f} s, p95={tp95:.3f} s') | |
print(f'memory (kb): {start_memory:,} initial, ' | |
f'{end_memory - start_memory:,} growth') | |
class Model(nn.Module): | |
""" ResNet 34 without average pooling | |
""" | |
def __init__(self): | |
super().__init__() | |
self.base = resnet34() | |
def forward(self, x): | |
m = self.base | |
x = m.conv1(x) | |
x = m.bn1(x) | |
x = m.relu(x) | |
x = m.maxpool(x) | |
x = m.layer1(x) | |
x = m.layer2(x) | |
x = m.layer3(x) | |
x = m.layer4(x) | |
return x | |
def get_ru_maxrss(): | |
""" Return max RSS usage (in kilobytes) """ | |
size = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
if sys.platform == 'darwin': | |
# on Mac OS X ru_maxrss is in bytes, on Linux it is in KB | |
size //= 1024 | |
return size | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment