Skip to content

Instantly share code, notes, and snippets.

@bddppq
Created December 3, 2019 00:08
Show Gist options
  • Save bddppq/508555897a185ad9a85574eed00b93f6 to your computer and use it in GitHub Desktop.
Save bddppq/508555897a185ad9a85574eed00b93f6 to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <torch/torch.h>
template <int size>
class Contiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& state) {
input = torch::randn({size, size}).to(at::kFloat);
indexes = torch::randint(0, size, {size / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
template <int size>
class NonContiguous : public benchmark::Fixture {
void SetUp(const ::benchmark::State& state) {
input = torch::randn({size, size}).to(at::kFloat).transpose(0, 1);
indexes = torch::randint(0, size, {size / 4}).to(at::kLong);
}
public:
torch::Tensor input;
torch::Tensor indexes;
};
#define BenchSize(size) \
BENCHMARK_TEMPLATE_DEFINE_F(Contiguous, index_##size, size) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_REGISTER_F(Contiguous, index_##size) \
->Unit(benchmark::kNanosecond); \
BENCHMARK_TEMPLATE_DEFINE_F(Contiguous, index_select_##size, size) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
} \
BENCHMARK_REGISTER_F(Contiguous, index_select_##size) \
->Unit(benchmark::kNanosecond); \
BENCHMARK_TEMPLATE_DEFINE_F(NonContiguous, index_##size, size) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index(indexes)); \
} \
} \
BENCHMARK_REGISTER_F(NonContiguous, index_##size) \
->Unit(benchmark::kNanosecond); \
BENCHMARK_TEMPLATE_DEFINE_F(NonContiguous, index_select_##size, size) \
(benchmark::State & state) { \
for (auto _ : state) { \
benchmark::DoNotOptimize(input.index_select(0, indexes)); \
} \
} \
BENCHMARK_REGISTER_F(NonContiguous, index_select_##size) \
->Unit(benchmark::kNanosecond);
BenchSize(64);
BenchSize(128);
BenchSize(256);
BenchSize(512);
BenchSize(1024);
BenchSize(2048);
#undef BenchSize
// Run the benchmark
BENCHMARK_MAIN();
@bddppq
Copy link
Author

bddppq commented Dec 3, 2019

Run on (24 X 2394.42 MHz CPU s)
2019-12-02 16:02:43
-----------------------------------------------------------------------------
Benchmark                                      Time           CPU Iterations
-----------------------------------------------------------------------------
Contiguous<64>/index_64                     3877 ns       3871 ns     187864
Contiguous<64>/index_select_64               966 ns        966 ns     751247
NonContiguous<64>/index_64                  5657 ns       5657 ns     121703
NonContiguous<64>/index_select_64          30809 ns      30809 ns      22619
Contiguous<128>/index_128                   8305 ns       8304 ns      84927
Contiguous<128>/index_select_128           13305 ns      13172 ns      45040
NonContiguous<128>/index_128               10663 ns      10657 ns      69002
NonContiguous<128>/index_select_128        63543 ns      63543 ns      11409
Contiguous<256>/index_256                  13924 ns      13918 ns      50214
Contiguous<256>/index_select_256           34822 ns      34822 ns      19851
NonContiguous<256>/index_256               46786 ns      46782 ns      15710
NonContiguous<256>/index_select_256       157534 ns     157533 ns       4418
Contiguous<512>/index_512                  37008 ns      36998 ns      19786
Contiguous<512>/index_select_512           74236 ns      74208 ns       9283
NonContiguous<512>/index_512              195606 ns     195488 ns       3614
NonContiguous<512>/index_select_512       412587 ns     412502 ns       1665
Contiguous<1024>/index_1024               105950 ns     105907 ns       6639
Contiguous<1024>/index_select_1024        181358 ns     181296 ns       3802
NonContiguous<1024>/index_1024            750279 ns     750090 ns        965
NonContiguous<1024>/index_select_1024    1229192 ns    1228963 ns        574
Contiguous<2048>/index_2048               392678 ns     392661 ns       1724
Contiguous<2048>/index_select_2048        554955 ns     554725 ns       1279
NonContiguous<2048>/index_2048          10443905 ns   10441740 ns         73
NonContiguous<2048>/index_select_2048   11573477 ns   11570427 ns         59

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment