Skip to content

Instantly share code, notes, and snippets.

@bddppq
Created December 4, 2019 00:54
Show Gist options
  • Save bddppq/96b8f5407e42bc6ec20beed34190562b to your computer and use it in GitHub Desktop.
Save bddppq/96b8f5407e42bc6ec20beed34190562b to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <torch/torch.h>
namespace {
template <int row, int col>
class Contiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& /* unused */) override {
input = torch::randn({row, col}).to(at::kFloat);
indexes = torch::randint(0, row, {row / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
template <int row, int col>
class NonContiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& /* unused */) override {
input = torch::randn({col, row}).to(at::kFloat).transpose(0, 1);
indexes = torch::randint(0, row, {row / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
} // namespace
#define BenchSize(row, col) \
BENCHMARK_TEMPLATE_F(Contiguous, index_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(Contiguous, index_select_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(NonContiguous, index_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_TEMPLATE_F(NonContiguous, index_select_##row##x##col, row, col) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
}
BenchSize(64, 16);
BenchSize(128, 16);
BenchSize(256, 16);
BenchSize(512, 16);
BenchSize(1024, 16);
BenchSize(2048, 16);
BenchSize(64, 32);
BenchSize(128, 32);
BenchSize(256, 32);
BenchSize(512, 32);
BenchSize(1024, 32);
BenchSize(2048, 32);
BenchSize(64, 64);
BenchSize(128, 64);
BenchSize(256, 64);
BenchSize(512, 64);
BenchSize(1024, 64);
BenchSize(2048, 64);
BenchSize(64, 128);
BenchSize(128, 128);
BenchSize(256, 128);
BenchSize(512, 128);
BenchSize(1024, 128);
BenchSize(2048, 128);
BenchSize(64, 256);
BenchSize(128, 256);
BenchSize(256, 256);
BenchSize(512, 256);
BenchSize(1024, 256);
BenchSize(2048, 256);
#undef BenchSize
// Run the benchmark
BENCHMARK_MAIN();
@bddppq
Copy link
Author

bddppq commented Dec 4, 2019

Run on (24 X 2394.36 MHz CPU s)
2019-12-03 16:38:15
--------------------------------------------------------------------------------------
Benchmark                                               Time           CPU Iterations
--------------------------------------------------------------------------------------
Contiguous<64, 16>/index_64x16                       3585 ns       3582 ns     177405
Contiguous<64, 16>/index_select_64x16                 896 ns        895 ns     832747
NonContiguous<64, 16>/index_64x16                    3775 ns       3775 ns     179689
NonContiguous<64, 16>/index_select_64x16            29807 ns      29803 ns      23286
Contiguous<128, 16>/index_128x16                     4351 ns       4351 ns     165833
Contiguous<128, 16>/index_select_128x16              1011 ns        998 ns     714063
NonContiguous<128, 16>/index_128x16                  4453 ns       4451 ns     160943
NonContiguous<128, 16>/index_select_128x16          61058 ns      61050 ns      11657
Contiguous<256, 16>/index_256x16                     6099 ns       6098 ns     128356
Contiguous<256, 16>/index_select_256x16              1135 ns       1135 ns     601675
NonContiguous<256, 16>/index_256x16                  6992 ns       6992 ns     101187
NonContiguous<256, 16>/index_select_256x16         124286 ns     124271 ns       5823
Contiguous<512, 16>/index_512x16                     9535 ns       9533 ns      73665
Contiguous<512, 16>/index_select_512x16              1342 ns       1342 ns     501253
NonContiguous<512, 16>/index_512x16                  9646 ns       9645 ns      73081
NonContiguous<512, 16>/index_select_512x16         244365 ns     244353 ns       2892
Contiguous<1024, 16>/index_1024x16                  16052 ns      16050 ns      43124
Contiguous<1024, 16>/index_select_1024x16            4171 ns       4171 ns     165349
NonContiguous<1024, 16>/index_1024x16               17273 ns      17272 ns      40433
NonContiguous<1024, 16>/index_select_1024x16       474987 ns     474940 ns       1445
Contiguous<2048, 16>/index_2048x16                  26495 ns      26493 ns      25886
Contiguous<2048, 16>/index_select_2048x16            6972 ns       6971 ns     103200
NonContiguous<2048, 16>/index_2048x16               32050 ns      32048 ns      21937
NonContiguous<2048, 16>/index_select_2048x16       981060 ns     980968 ns        699
Contiguous<64, 32>/index_64x32                       5070 ns       5069 ns     138438
Contiguous<64, 32>/index_select_64x32                 943 ns        943 ns     748617
NonContiguous<64, 32>/index_64x32                    5385 ns       5385 ns     127148
NonContiguous<64, 32>/index_select_64x32            30189 ns      30186 ns      23183
Contiguous<128, 32>/index_128x32                     4103 ns       4103 ns     170604
Contiguous<128, 32>/index_select_128x32              1114 ns       1114 ns     689412
NonContiguous<128, 32>/index_128x32                  4528 ns       4528 ns     154965
NonContiguous<128, 32>/index_select_128x32          58303 ns      58301 ns      12010
Contiguous<256, 32>/index_256x32                     6683 ns       6683 ns     105786
Contiguous<256, 32>/index_select_256x32              1391 ns       1391 ns     564382
NonContiguous<256, 32>/index_256x32                  7811 ns       7811 ns      88876
NonContiguous<256, 32>/index_select_256x32         122087 ns     122085 ns       5662
Contiguous<512, 32>/index_512x32                    10252 ns      10251 ns      70863
Contiguous<512, 32>/index_select_512x32              3466 ns       3465 ns     178367
NonContiguous<512, 32>/index_512x32                 14711 ns      14710 ns      46934
NonContiguous<512, 32>/index_select_512x32         247500 ns     247474 ns       2829
Contiguous<1024, 32>/index_1024x32                  15171 ns      15170 ns      45269
Contiguous<1024, 32>/index_select_1024x32            5610 ns       5609 ns     135106
NonContiguous<1024, 32>/index_1024x32               28944 ns      28942 ns      25107
NonContiguous<1024, 32>/index_select_1024x32       519939 ns     519889 ns       1000
Contiguous<2048, 32>/index_2048x32                  25933 ns      25931 ns      26815
Contiguous<2048, 32>/index_select_2048x32            8201 ns       8198 ns      85014
NonContiguous<2048, 32>/index_2048x32               56303 ns      56298 ns      12446
NonContiguous<2048, 32>/index_select_2048x32      1038653 ns    1038609 ns        669
Contiguous<64, 64>/index_64x64                       5313 ns       5312 ns     133288
Contiguous<64, 64>/index_select_64x64                 955 ns        955 ns     719262
NonContiguous<64, 64>/index_64x64                    6371 ns       6370 ns     122599
NonContiguous<64, 64>/index_select_64x64            30293 ns      30292 ns      22606
Contiguous<128, 64>/index_128x64                     5680 ns       5680 ns     124436
Contiguous<128, 64>/index_select_128x64              1081 ns       1081 ns     660185
NonContiguous<128, 64>/index_128x64                  7233 ns       7233 ns      97353
NonContiguous<128, 64>/index_select_128x64          61827 ns      61828 ns      10884
Contiguous<256, 64>/index_256x64                     8654 ns       8654 ns      80137
Contiguous<256, 64>/index_select_256x64              3095 ns       3095 ns     226521
NonContiguous<256, 64>/index_256x64                 13340 ns      13339 ns      52928
NonContiguous<256, 64>/index_select_256x64         128290 ns     128287 ns       5634
Contiguous<512, 64>/index_512x64                    12261 ns      12258 ns      56005
Contiguous<512, 64>/index_select_512x64              4559 ns       4559 ns     149848
NonContiguous<512, 64>/index_512x64                 20207 ns      20178 ns      27975
NonContiguous<512, 64>/index_select_512x64         235697 ns     235547 ns       2950
Contiguous<1024, 64>/index_1024x64                  16223 ns      16209 ns      47648
Contiguous<1024, 64>/index_select_1024x64            6237 ns       6234 ns     114833
NonContiguous<1024, 64>/index_1024x64               51147 ns      51142 ns      13525
NonContiguous<1024, 64>/index_select_1024x64       500279 ns     500224 ns       1000
Contiguous<2048, 64>/index_2048x64                  28525 ns      28524 ns      20240
Contiguous<2048, 64>/index_select_2048x64           12868 ns      12856 ns      57125
NonContiguous<2048, 64>/index_2048x64               96567 ns      96563 ns       7365
NonContiguous<2048, 64>/index_select_2048x64      1005265 ns    1005254 ns        702
Contiguous<64, 128>/index_64x128                     3982 ns       3982 ns     180766
Contiguous<64, 128>/index_select_64x128              8224 ns       8222 ns      84056
NonContiguous<64, 128>/index_64x128                  5305 ns       5304 ns     128205
NonContiguous<64, 128>/index_select_64x128          32461 ns      32450 ns      22402
Contiguous<128, 128>/index_128x128                   5754 ns       5754 ns     122807
Contiguous<128, 128>/index_select_128x128           13431 ns      13430 ns      55052
NonContiguous<128, 128>/index_128x128                9946 ns       9945 ns      64673
NonContiguous<128, 128>/index_select_128x128        64124 ns      64121 ns      10718
Contiguous<256, 128>/index_256x128                   7882 ns       7882 ns      91306
Contiguous<256, 128>/index_select_256x128           26444 ns      26442 ns      25873
NonContiguous<256, 128>/index_256x128               17203 ns      17201 ns      41684
NonContiguous<256, 128>/index_select_256x128       129291 ns     129133 ns       5150
Contiguous<512, 128>/index_512x128                  11492 ns      11482 ns      63217
Contiguous<512, 128>/index_select_512x128           49308 ns      49291 ns      14088
NonContiguous<512, 128>/index_512x128               39818 ns      39811 ns      17325
NonContiguous<512, 128>/index_select_512x128       271838 ns     271286 ns       2636
Contiguous<1024, 128>/index_1024x128                22033 ns      22030 ns      32875
Contiguous<1024, 128>/index_select_1024x128         90959 ns      90959 ns       7252
NonContiguous<1024, 128>/index_1024x128            107658 ns     107650 ns       6633
NonContiguous<1024, 128>/index_select_1024x128     556148 ns     556146 ns       1290
Contiguous<2048, 128>/index_2048x128                36258 ns      36255 ns      18406
Contiguous<2048, 128>/index_select_2048x128        218668 ns     218644 ns       3128
NonContiguous<2048, 128>/index_2048x128            205027 ns     205027 ns       3606
NonContiguous<2048, 128>/index_select_2048x128    1121911 ns    1121897 ns        624
Contiguous<64, 256>/index_64x256                     5562 ns       5562 ns     131675
Contiguous<64, 256>/index_select_64x256              8320 ns       8319 ns      83837
NonContiguous<64, 256>/index_64x256                  9605 ns       9604 ns      70456
NonContiguous<64, 256>/index_select_64x256          34782 ns      34780 ns      20325
Contiguous<128, 256>/index_128x256                   7299 ns       7299 ns      97970
Contiguous<128, 256>/index_select_128x256           15636 ns      15636 ns      44371
NonContiguous<128, 256>/index_128x256               16004 ns      16004 ns      42990
NonContiguous<128, 256>/index_select_128x256        68573 ns      68552 ns      10033
Contiguous<256, 256>/index_256x256                   9591 ns       9590 ns      75851
Contiguous<256, 256>/index_select_256x256           29657 ns      29656 ns      24356
NonContiguous<256, 256>/index_256x256               41103 ns      41102 ns      17127
NonContiguous<256, 256>/index_select_256x256       151869 ns     151860 ns       4681
Contiguous<512, 256>/index_512x256                  18383 ns      18381 ns      36458
Contiguous<512, 256>/index_select_512x256           64466 ns      64466 ns      12072
NonContiguous<512, 256>/index_512x256               99844 ns      99836 ns       6967
NonContiguous<512, 256>/index_select_512x256       322891 ns     322862 ns       2182
Contiguous<1024, 256>/index_1024x256                31405 ns      31403 ns      21702
Contiguous<1024, 256>/index_select_1024x256        110707 ns     110706 ns       5973
NonContiguous<1024, 256>/index_1024x256            187090 ns     187079 ns       3849
NonContiguous<1024, 256>/index_select_1024x256     635806 ns     635794 ns       1098
Contiguous<2048, 256>/index_2048x256                58864 ns      58861 ns      12090
Contiguous<2048, 256>/index_select_2048x256        224580 ns     224581 ns       3066
NonContiguous<2048, 256>/index_2048x256            380915 ns     380887 ns       1862
NonContiguous<2048, 256>/index_select_2048x256    1305645 ns    1305258 ns        549

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment