Skip to content

Instantly share code, notes, and snippets.

@mrdrozdov
Created November 8, 2018 17:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdrozdov/37123eed34eeaa7d1c6640d7ad2c5278 to your computer and use it in GitHub Desktop.
Save mrdrozdov/37123eed34eeaa7d1c6640d7ad2c5278 to your computer and use it in GitHub Desktop.
spoiler-alert.txt
import torch
import argparse
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('--flip', action='store_true')
options = parser.parse_args()
x = torch.FloatTensor(3, 2, 1000, 1000).uniform_()
index = torch.LongTensor([0, 1])
t0, t1 = 0, 0
def f0():
with torch.autograd.profiler.profile(enabled=True) as prof0:
x0 = x[:, 0, [0, 1], :]
return sum([t.cpu_time for t in prof0.key_averages()]), x0, prof0
def f1():
with torch.autograd.profiler.profile(enabled=True) as prof1:
x1 = x[:, 0].index_select(index=index, dim=1)
return sum([t.cpu_time for t in prof1.key_averages()]), x1, prof1
rounds = 50
for _ in tqdm(range(rounds)):
if options.flip:
_t0, x0, prof0 = f0()
_t1, x1, prof1 = f1()
else:
_t1, x1, prof1 = f1()
_t0, x0, prof0 = f0()
t0 += _t0
t1 += _t1
assert torch.equal(x0, x1)
print('Profiler Output after 1 call')
print('----------------------------')
print()
print('# Method A')
print(prof0)
print()
print('# Method B')
print(prof1)
print()
print('Total time over {} calls'.format(rounds))
print('-------------------------')
print()
print('Method A (micro-seconds):', t0)
print('Method B (micro-seconds):', t1)
print()
Profiler Output after 1 call
----------------------------
# Method A
----------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------- --------------- --------------- --------------- --------------- ---------------
select 15.768us 0.000us 1 15.768us 0.000us
as_strided 10.561us 0.000us 1 10.561us 0.000us
_cast_int64_t 0.559us 0.000us 1 0.559us 0.000us
_cast_int64_t 0.094us 0.000us 1 0.094us 0.000us
index 314.540us 0.000us 1 314.540us 0.000us
mul 4.623us 0.000us 1 4.623us 0.000us
arange 5.677us 0.000us 1 5.677us 0.000us
mul 4.575us 0.000us 1 4.575us 0.000us
view 5.270us 0.000us 1 5.270us 0.000us
view 3.863us 0.000us 1 3.863us 0.000us
arange 57.651us 0.000us 1 57.651us 0.000us
view 4.721us 0.000us 1 4.721us 0.000us
view 4.660us 0.000us 1 4.660us 0.000us
view 1.745us 0.000us 1 1.745us 0.000us
expand 2.213us 0.000us 1 2.213us 0.000us
expand 2.632us 0.000us 1 2.632us 0.000us
add 5.489us 0.000us 1 5.489us 0.000us
expand 1.296us 0.000us 1 1.296us 0.000us
expand 1.069us 0.000us 1 1.069us 0.000us
add 24.704us 0.000us 1 24.704us 0.000us
take 165.936us 0.000us 1 165.936us 0.000us
# Method B
---------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
---------------- --------------- --------------- --------------- --------------- ---------------
select 12.835us 0.000us 1 12.835us 0.000us
as_strided 8.331us 0.000us 1 8.331us 0.000us
index_select 20.428us 0.000us 1 20.428us 0.000us
Total time over 50 calls
-------------------------
Method A (micro-seconds): 23263.213399999993
Method B (micro-seconds): 2429.4809999999998
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment