Skip to content

Instantly share code, notes, and snippets.

@bddppq
Created December 4, 2019 01:51
Show Gist options
  • Save bddppq/216fc8abfa8b68fa6b861974e977ded6 to your computer and use it in GitHub Desktop.
Save bddppq/216fc8abfa8b68fa6b861974e977ded6 to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <torch/torch.h>
namespace {
template <int row, int col>
class Contiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& /* unused */) override {
input = torch::randn({row, col}).to(at::kFloat);
indexes = torch::randint(0, row, {row / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
template <int row, int col>
class NonContiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& /* unused */) override {
input = torch::randn({col, row}).to(at::kFloat).transpose(0, 1);
indexes = torch::randint(0, row, {row / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
} // namespace
#define BenchSize(row, col) \
BENCHMARK_TEMPLATE_F(Contiguous, index_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(Contiguous, index_select_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(NonContiguous, index_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(NonContiguous, index_select_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
}
BenchSize(64, 16);
BenchSize(128, 16);
BenchSize(256, 16);
BenchSize(512, 16);
BenchSize(1024, 16);
BenchSize(2048, 16);
BenchSize(64, 32);
BenchSize(128, 32);
BenchSize(256, 32);
BenchSize(512, 32);
BenchSize(1024, 32);
BenchSize(2048, 32);
BenchSize(64, 64);
BenchSize(128, 64);
BenchSize(256, 64);
BenchSize(512, 64);
BenchSize(1024, 64);
BenchSize(2048, 64);
BenchSize(64, 65);
BenchSize(128, 65);
BenchSize(256, 65);
BenchSize(512, 65);
BenchSize(1024, 65);
BenchSize(2048, 65);
BenchSize(64, 80);
BenchSize(128, 80);
BenchSize(256, 80);
BenchSize(512, 80);
BenchSize(1024, 80);
BenchSize(2048, 80);
BenchSize(64, 90);
BenchSize(128, 90);
BenchSize(256, 90);
BenchSize(512, 90);
BenchSize(1024, 90);
BenchSize(2048, 90);
BenchSize(64, 100);
BenchSize(128, 100);
BenchSize(256, 100);
BenchSize(512, 100);
BenchSize(1024, 100);
BenchSize(2048, 100);
BenchSize(64, 128);
BenchSize(128, 128);
BenchSize(256, 128);
BenchSize(512, 128);
BenchSize(1024, 128);
BenchSize(2048, 128);
BenchSize(64, 256);
BenchSize(128, 256);
BenchSize(256, 256);
BenchSize(512, 256);
BenchSize(1024, 256);
BenchSize(2048, 256);
#undef BenchSize
// Run the benchmark
BENCHMARK_MAIN();
@bddppq
Copy link
Author

bddppq commented Dec 4, 2019

Run on (24 X 2394.36 MHz CPU s)
2019-12-03 18:15:16
--------------------------------------------------------------------------------------
Benchmark                                               Time           CPU Iterations
--------------------------------------------------------------------------------------
Contiguous<64, 16>/index_64x16                       3431 ns       3430 ns     206289
Contiguous<64, 16>/index_select_64x16                 819 ns        818 ns     640451
NonContiguous<64, 16>/index_64x16                    3450 ns       3449 ns     173849
NonContiguous<64, 16>/index_select_64x16            27675 ns      27558 ns      27431
Contiguous<128, 16>/index_128x16                     4154 ns       4154 ns     169343
Contiguous<128, 16>/index_select_128x16               911 ns        908 ns     739041
NonContiguous<128, 16>/index_128x16                  4159 ns       4142 ns     172809
NonContiguous<128, 16>/index_select_128x16          55512 ns      55482 ns      13515
Contiguous<256, 16>/index_256x16                     5130 ns       5129 ns     136877
Contiguous<256, 16>/index_select_256x16              1063 ns       1063 ns     641446
NonContiguous<256, 16>/index_256x16                  5062 ns       5060 ns     138421
NonContiguous<256, 16>/index_select_256x16         103796 ns     103780 ns       6865
Contiguous<512, 16>/index_512x16                     7262 ns       7261 ns      93910
Contiguous<512, 16>/index_select_512x16              1299 ns       1299 ns     542359
NonContiguous<512, 16>/index_512x16                  7457 ns       7455 ns      91514
NonContiguous<512, 16>/index_select_512x16         210779 ns     210722 ns       3313
Contiguous<1024, 16>/index_1024x16                  12939 ns      12936 ns      53295
Contiguous<1024, 16>/index_select_1024x16            4207 ns       4206 ns     167832
NonContiguous<1024, 16>/index_1024x16               16662 ns      16658 ns      42906
NonContiguous<1024, 16>/index_select_1024x16       445591 ns     445390 ns       1589
Contiguous<2048, 16>/index_2048x16                  26413 ns      26407 ns      26275
Contiguous<2048, 16>/index_select_2048x16            6778 ns       6776 ns     101795
NonContiguous<2048, 16>/index_2048x16               31446 ns      31437 ns      22792
NonContiguous<2048, 16>/index_select_2048x16       865218 ns     864919 ns        790
Contiguous<64, 32>/index_64x32                       4338 ns       4336 ns     160262
Contiguous<64, 32>/index_select_64x32                 937 ns        936 ns     750541
NonContiguous<64, 32>/index_64x32                    4589 ns       4587 ns     153149
NonContiguous<64, 32>/index_select_64x32            27172 ns      27162 ns      25353
Contiguous<128, 32>/index_128x32                     4721 ns       4719 ns     148631
Contiguous<128, 32>/index_select_128x32              1009 ns       1008 ns     722525
NonContiguous<128, 32>/index_128x32                  5461 ns       5460 ns     136048
NonContiguous<128, 32>/index_select_128x32          52201 ns      52183 ns      13634
Contiguous<256, 32>/index_256x32                     5595 ns       5592 ns     123936
Contiguous<256, 32>/index_select_256x32              1323 ns       1323 ns     580196
NonContiguous<256, 32>/index_256x32                  5724 ns       5723 ns     124316
NonContiguous<256, 32>/index_select_256x32         102084 ns     102047 ns       6946
Contiguous<512, 32>/index_512x32                     7660 ns       7658 ns      91073
Contiguous<512, 32>/index_select_512x32              3056 ns       3056 ns     234471
NonContiguous<512, 32>/index_512x32                 11823 ns      11821 ns      61198
NonContiguous<512, 32>/index_select_512x32         210105 ns     210066 ns       3294
Contiguous<1024, 32>/index_1024x32                  12520 ns      12516 ns      57173
Contiguous<1024, 32>/index_select_1024x32            4908 ns       4907 ns     141150
NonContiguous<1024, 32>/index_1024x32               23158 ns      23153 ns      30040
NonContiguous<1024, 32>/index_select_1024x32       413817 ns     413650 ns       1603
Contiguous<2048, 32>/index_2048x32                  21078 ns      21074 ns      33638
Contiguous<2048, 32>/index_select_2048x32            7091 ns       7089 ns     105035
NonContiguous<2048, 32>/index_2048x32               48431 ns      48402 ns      14040
NonContiguous<2048, 32>/index_select_2048x32       832571 ns     831776 ns        831
Contiguous<64, 64>/index_64x64                       3535 ns       3535 ns     194713
Contiguous<64, 64>/index_select_64x64                 871 ns        869 ns     814816
NonContiguous<64, 64>/index_64x64                    3876 ns       3875 ns     174427
NonContiguous<64, 64>/index_select_64x64            25919 ns      25919 ns      26077
Contiguous<128, 64>/index_128x64                     4154 ns       4154 ns     172241
Contiguous<128, 64>/index_select_128x64              1020 ns       1020 ns     687704
NonContiguous<128, 64>/index_128x64                  5343 ns       5338 ns     131717
NonContiguous<128, 64>/index_select_128x64          53614 ns      53599 ns      13197
Contiguous<256, 64>/index_256x64                     6671 ns       6669 ns     100664
Contiguous<256, 64>/index_select_256x64              2746 ns       2746 ns     253251
NonContiguous<256, 64>/index_256x64                 10978 ns      10963 ns      64853
NonContiguous<256, 64>/index_select_256x64         107845 ns     107796 ns       6298
Contiguous<512, 64>/index_512x64                     9187 ns       9187 ns      76165
Contiguous<512, 64>/index_select_512x64              4306 ns       4303 ns     162404
NonContiguous<512, 64>/index_512x64                 19649 ns      19646 ns      35028
NonContiguous<512, 64>/index_select_512x64         214876 ns     214862 ns       3242
Contiguous<1024, 64>/index_1024x64                  15340 ns      15337 ns      46818
Contiguous<1024, 64>/index_select_1024x64            5723 ns       5722 ns      87473
NonContiguous<1024, 64>/index_1024x64               48045 ns      48040 ns      15759
NonContiguous<1024, 64>/index_select_1024x64       431440 ns     431406 ns       1590
Contiguous<2048, 64>/index_2048x64                  28317 ns      28310 ns      24866
Contiguous<2048, 64>/index_select_2048x64           11808 ns      11807 ns      56622
NonContiguous<2048, 64>/index_2048x64               94126 ns      94114 ns       7398
NonContiguous<2048, 64>/index_select_2048x64       881466 ns     881296 ns        790
Contiguous<64, 65>/index_64x65                       3535 ns       3534 ns     201292
Contiguous<64, 65>/index_select_64x65                5805 ns       5798 ns     130966
NonContiguous<64, 65>/index_64x65                    3890 ns       3889 ns     177823
NonContiguous<64, 65>/index_select_64x65            27346 ns      27286 ns      26636
Contiguous<128, 65>/index_128x65                     4454 ns       4448 ns     166213
Contiguous<128, 65>/index_select_128x65              9372 ns       9370 ns      73936
NonContiguous<128, 65>/index_128x65                  5153 ns       5152 ns     137096
NonContiguous<128, 65>/index_select_128x65          54174 ns      54146 ns      13299
Contiguous<256, 65>/index_256x65                     6365 ns       6364 ns     112984
Contiguous<256, 65>/index_select_256x65             21786 ns      21784 ns      32657
NonContiguous<256, 65>/index_256x65                 10561 ns      10559 ns      65402
NonContiguous<256, 65>/index_select_256x65         111809 ns     111787 ns       6598
Contiguous<512, 65>/index_512x65                     9175 ns       9174 ns      74999
Contiguous<512, 65>/index_select_512x65             46511 ns      46509 ns      14693
NonContiguous<512, 65>/index_512x65                 19562 ns      19562 ns      35553
NonContiguous<512, 65>/index_select_512x65         215115 ns     215107 ns       3312
Contiguous<1024, 65>/index_1024x65                  15272 ns      15258 ns      47067
Contiguous<1024, 65>/index_select_1024x65           69478 ns      69380 ns      10334
NonContiguous<1024, 65>/index_1024x65               46563 ns      46549 ns      15496
NonContiguous<1024, 65>/index_select_1024x65       451268 ns     451267 ns       1544
Contiguous<2048, 65>/index_2048x65                  30175 ns      30175 ns      23459
Contiguous<2048, 65>/index_select_2048x65          141076 ns     141076 ns       5120
NonContiguous<2048, 65>/index_2048x65               99655 ns      99656 ns       6946
NonContiguous<2048, 65>/index_select_2048x65       906814 ns     906818 ns        773
Contiguous<64, 80>/index_64x80                       3709 ns       3709 ns     186582
Contiguous<64, 80>/index_select_64x80                4678 ns       4678 ns     151891
NonContiguous<64, 80>/index_64x80                    4164 ns       4164 ns     171016
NonContiguous<64, 80>/index_select_64x80            26323 ns      26324 ns      25705
Contiguous<128, 80>/index_128x80                     4416 ns       4416 ns     163771
Contiguous<128, 80>/index_select_128x80              8059 ns       8059 ns      85304
NonContiguous<128, 80>/index_128x80                  5833 ns       5833 ns     116233
NonContiguous<128, 80>/index_select_128x80          53621 ns      53621 ns      13295
Contiguous<256, 80>/index_256x80                     6961 ns       6961 ns     103465
Contiguous<256, 80>/index_select_256x80             20502 ns      20501 ns      39551
NonContiguous<256, 80>/index_256x80                 12186 ns      12186 ns      58490
NonContiguous<256, 80>/index_select_256x80         113297 ns     113297 ns       6366
Contiguous<512, 80>/index_512x80                    10069 ns      10070 ns      69995
Contiguous<512, 80>/index_select_512x80             40130 ns      40130 ns      20957
NonContiguous<512, 80>/index_512x80                 23688 ns      23688 ns      29713
NonContiguous<512, 80>/index_select_512x80         218726 ns     218725 ns       3097
Contiguous<1024, 80>/index_1024x80                  18207 ns      18207 ns      38877
Contiguous<1024, 80>/index_select_1024x80           79759 ns      79752 ns       8751
NonContiguous<1024, 80>/index_1024x80               63486 ns      63486 ns      11302
NonContiguous<1024, 80>/index_select_1024x80       475383 ns     475385 ns       1417
Contiguous<2048, 80>/index_2048x80                  33948 ns      33947 ns      20078
Contiguous<2048, 80>/index_select_2048x80          122807 ns     122805 ns       5589
NonContiguous<2048, 80>/index_2048x80              122339 ns     122339 ns       5679
NonContiguous<2048, 80>/index_select_2048x80       973177 ns     973174 ns        744
Contiguous<64, 90>/index_64x90                       4488 ns       4488 ns     145163
Contiguous<64, 90>/index_select_64x90                7899 ns       7899 ns      90965
NonContiguous<64, 90>/index_64x90                    5234 ns       5234 ns     126644
NonContiguous<64, 90>/index_select_64x90            26950 ns      26950 ns      26020
Contiguous<128, 90>/index_128x90                     5286 ns       5286 ns     128918
Contiguous<128, 90>/index_select_128x90             14474 ns      14473 ns      47551
NonContiguous<128, 90>/index_128x90                  8035 ns       8035 ns      90105
NonContiguous<128, 90>/index_select_128x90          55360 ns      55361 ns      12275
Contiguous<256, 90>/index_256x90                     9405 ns       9405 ns      73065
Contiguous<256, 90>/index_select_256x90             31786 ns      31786 ns      21161
NonContiguous<256, 90>/index_256x90                 15663 ns      15663 ns      45078
NonContiguous<256, 90>/index_select_256x90         117458 ns     117457 ns       6040
Contiguous<512, 90>/index_512x90                    13941 ns      13941 ns      50336
Contiguous<512, 90>/index_select_512x90             59457 ns      59457 ns      12220
NonContiguous<512, 90>/index_512x90                 29618 ns      29614 ns      23728
NonContiguous<512, 90>/index_select_512x90         233470 ns     233129 ns       3010
Contiguous<1024, 90>/index_1024x90                  20735 ns      20731 ns      33699
Contiguous<1024, 90>/index_select_1024x90          106522 ns     106504 ns       6487
NonContiguous<1024, 90>/index_1024x90               72138 ns      72100 ns       9903
NonContiguous<1024, 90>/index_select_1024x90       480628 ns     480515 ns       1426
Contiguous<2048, 90>/index_2048x90                  38972 ns      38968 ns      17732
Contiguous<2048, 90>/index_select_2048x90          212407 ns     212395 ns       2511
NonContiguous<2048, 90>/index_2048x90              141797 ns     141778 ns       4882
NonContiguous<2048, 90>/index_select_2048x90       951142 ns     950839 ns        719
Contiguous<64, 100>/index_64x100                     3707 ns       3705 ns     193335
Contiguous<64, 100>/index_select_64x100              7827 ns       7824 ns      90870
NonContiguous<64, 100>/index_64x100                  4536 ns       4535 ns     155198
NonContiguous<64, 100>/index_select_64x100          27681 ns      27668 ns      25564
Contiguous<128, 100>/index_128x100                   4397 ns       4396 ns     161728
Contiguous<128, 100>/index_select_128x100           14700 ns      14698 ns      49048
NonContiguous<128, 100>/index_128x100                7490 ns       7489 ns      94218
NonContiguous<128, 100>/index_select_128x100        55336 ns      55323 ns      12303
Contiguous<256, 100>/index_256x100                   7247 ns       7246 ns      93351
Contiguous<256, 100>/index_select_256x100           28700 ns      28695 ns      23695
NonContiguous<256, 100>/index_256x100               13855 ns      13850 ns      48943
NonContiguous<256, 100>/index_select_256x100       108892 ns     108857 ns       6350
Contiguous<512, 100>/index_512x100                  10847 ns      10845 ns      61267
Contiguous<512, 100>/index_select_512x100           56218 ns      56205 ns      12726
NonContiguous<512, 100>/index_512x100               29860 ns      29855 ns      20928
NonContiguous<512, 100>/index_select_512x100       233161 ns     233129 ns       2977
Contiguous<1024, 100>/index_1024x100                18424 ns      18420 ns      39196
Contiguous<1024, 100>/index_select_1024x100         71401 ns      71356 ns       9255
NonContiguous<1024, 100>/index_1024x100             79105 ns      79104 ns       8263
NonContiguous<1024, 100>/index_select_1024x100     481441 ns     481152 ns       1458
Contiguous<2048, 100>/index_2048x100                38808 ns      38777 ns      18832
Contiguous<2048, 100>/index_select_2048x100        218674 ns     218638 ns       2904
NonContiguous<2048, 100>/index_2048x100            166663 ns     166630 ns       4233
NonContiguous<2048, 100>/index_select_2048x100     997842 ns     997754 ns        702
Contiguous<64, 128>/index_64x128                     3802 ns       3801 ns     186704
Contiguous<64, 128>/index_select_64x128              5750 ns       5749 ns     120028
NonContiguous<64, 128>/index_64x128                  5136 ns       5134 ns     129914
NonContiguous<64, 128>/index_select_64x128          28779 ns      28764 ns      24650
Contiguous<128, 128>/index_128x128                   5621 ns       5620 ns      93289
Contiguous<128, 128>/index_select_128x128           10593 ns      10592 ns      76689
NonContiguous<128, 128>/index_128x128                9663 ns       9651 ns      69585
NonContiguous<128, 128>/index_select_128x128        57376 ns      57320 ns      12109
Contiguous<256, 128>/index_256x128                   7875 ns       7874 ns      91464
Contiguous<256, 128>/index_select_256x128           17901 ns      17900 ns      38217
NonContiguous<256, 128>/index_256x128               16505 ns      16505 ns      42913
NonContiguous<256, 128>/index_select_256x128       113450 ns     113443 ns       6132
Contiguous<512, 128>/index_512x128                  10843 ns      10843 ns      67256
Contiguous<512, 128>/index_select_512x128           32352 ns      32274 ns      21326
NonContiguous<512, 128>/index_512x128               41368 ns      41365 ns      16858
NonContiguous<512, 128>/index_select_512x128       235085 ns     234995 ns       2889
Contiguous<1024, 128>/index_1024x128                20629 ns      20608 ns      33157
Contiguous<1024, 128>/index_select_1024x128        110777 ns     110736 ns       7609
NonContiguous<1024, 128>/index_1024x128             99604 ns      99604 ns       6894
NonContiguous<1024, 128>/index_select_1024x128     500275 ns     500266 ns       1000
Contiguous<2048, 128>/index_2048x128                33658 ns      33658 ns      20664
Contiguous<2048, 128>/index_select_2048x128        143965 ns     143965 ns       4170
NonContiguous<2048, 128>/index_2048x128            190590 ns     190587 ns       3514
NonContiguous<2048, 128>/index_select_2048x128     989053 ns     988646 ns        704
Contiguous<64, 256>/index_64x256                     5270 ns       5268 ns     130479
Contiguous<64, 256>/index_select_64x256              5941 ns       5941 ns     120762
NonContiguous<64, 256>/index_64x256                  9686 ns       9683 ns      74965
NonContiguous<64, 256>/index_select_64x256          32097 ns      32097 ns      21923
Contiguous<128, 256>/index_128x256                   9337 ns       9335 ns      77460
Contiguous<128, 256>/index_select_128x256           16950 ns      16942 ns      43047
NonContiguous<128, 256>/index_128x256               18339 ns      18339 ns      35300
NonContiguous<128, 256>/index_select_128x256        69692 ns      69692 ns      10592
Contiguous<256, 256>/index_256x256                  11841 ns      11840 ns      59913
Contiguous<256, 256>/index_select_256x256           19354 ns      19347 ns      35069
NonContiguous<256, 256>/index_256x256               38719 ns      38719 ns      17364
NonContiguous<256, 256>/index_select_256x256       148051 ns     148065 ns       5108
Contiguous<512, 256>/index_512x256                  21441 ns      21431 ns      33368
Contiguous<512, 256>/index_select_512x256           58900 ns      58875 ns      11742
NonContiguous<512, 256>/index_512x256               99296 ns      99276 ns       6683
NonContiguous<512, 256>/index_select_512x256       308821 ns     308819 ns       2312
Contiguous<1024, 256>/index_1024x256                37568 ns      37568 ns      18275
Contiguous<1024, 256>/index_select_1024x256        125987 ns     125987 ns       5468
NonContiguous<1024, 256>/index_1024x256            200215 ns     200142 ns       3473
NonContiguous<1024, 256>/index_select_1024x256     630506 ns     630503 ns       1087
Contiguous<2048, 256>/index_2048x256                65778 ns      65778 ns      10882
Contiguous<2048, 256>/index_select_2048x256        235542 ns     235541 ns       2931
NonContiguous<2048, 256>/index_2048x256            391745 ns     391744 ns       1793
NonContiguous<2048, 256>/index_select_2048x256    1226312 ns    1226309 ns        575

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment