Created
November 8, 2018 17:27
-
-
Save mrdrozdov/37123eed34eeaa7d1c6640d7ad2c5278 to your computer and use it in GitHub Desktop.
spoiler-alert.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import argparse | |
from tqdm import tqdm | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--flip', action='store_true') | |
options = parser.parse_args() | |
x = torch.FloatTensor(3, 2, 1000, 1000).uniform_() | |
index = torch.LongTensor([0, 1]) | |
t0, t1 = 0, 0 | |
def f0(): | |
with torch.autograd.profiler.profile(enabled=True) as prof0: | |
x0 = x[:, 0, [0, 1], :] | |
return sum([t.cpu_time for t in prof0.key_averages()]), x0, prof0 | |
def f1(): | |
with torch.autograd.profiler.profile(enabled=True) as prof1: | |
x1 = x[:, 0].index_select(index=index, dim=1) | |
return sum([t.cpu_time for t in prof1.key_averages()]), x1, prof1 | |
rounds = 50 | |
for _ in tqdm(range(rounds)): | |
if options.flip: | |
_t0, x0, prof0 = f0() | |
_t1, x1, prof1 = f1() | |
else: | |
_t1, x1, prof1 = f1() | |
_t0, x0, prof0 = f0() | |
t0 += _t0 | |
t1 += _t1 | |
assert torch.equal(x0, x1) | |
print('Profiler Output after 1 call') | |
print('----------------------------') | |
print() | |
print('# Method A') | |
print(prof0) | |
print() | |
print('# Method B') | |
print(prof1) | |
print() | |
print('Total time over {} calls'.format(rounds)) | |
print('-------------------------') | |
print() | |
print('Method A (micro-seconds):', t0) | |
print('Method B (micro-seconds):', t1) | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Profiler Output after 1 call | |
---------------------------- | |
# Method A | |
----------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------- --------------- --------------- --------------- --------------- --------------- | |
select 15.768us 0.000us 1 15.768us 0.000us | |
as_strided 10.561us 0.000us 1 10.561us 0.000us | |
_cast_int64_t 0.559us 0.000us 1 0.559us 0.000us | |
_cast_int64_t 0.094us 0.000us 1 0.094us 0.000us | |
index 314.540us 0.000us 1 314.540us 0.000us | |
mul 4.623us 0.000us 1 4.623us 0.000us | |
arange 5.677us 0.000us 1 5.677us 0.000us | |
mul 4.575us 0.000us 1 4.575us 0.000us | |
view 5.270us 0.000us 1 5.270us 0.000us | |
view 3.863us 0.000us 1 3.863us 0.000us | |
arange 57.651us 0.000us 1 57.651us 0.000us | |
view 4.721us 0.000us 1 4.721us 0.000us | |
view 4.660us 0.000us 1 4.660us 0.000us | |
view 1.745us 0.000us 1 1.745us 0.000us | |
expand 2.213us 0.000us 1 2.213us 0.000us | |
expand 2.632us 0.000us 1 2.632us 0.000us | |
add 5.489us 0.000us 1 5.489us 0.000us | |
expand 1.296us 0.000us 1 1.296us 0.000us | |
expand 1.069us 0.000us 1 1.069us 0.000us | |
add 24.704us 0.000us 1 24.704us 0.000us | |
take 165.936us 0.000us 1 165.936us 0.000us | |
# Method B | |
---------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
---------------- --------------- --------------- --------------- --------------- --------------- | |
select 12.835us 0.000us 1 12.835us 0.000us | |
as_strided 8.331us 0.000us 1 8.331us 0.000us | |
index_select 20.428us 0.000us 1 20.428us 0.000us | |
Total time over 50 calls | |
------------------------- | |
Method A (micro-seconds): 23263.213399999993 | |
Method B (micro-seconds): 2429.4809999999998 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment