Created
June 5, 2020 15:10
-
-
Save dmah42/f88f3f22dbbbfcb9530187ea206bae1b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "benchmark/benchmark.h" | |
#include "cblas.h" | |
#include <memory> | |
static void BM_GEMM_unique_ptr(benchmark::State& state) { | |
std::unique_ptr<float> A(new float[state.range(0) * state.range(0)]); | |
std::unique_ptr<float> B(new float[state.range(0) * state.range(0)]); | |
std::unique_ptr<float> C(new float[state.range(0) * state.range(0)]); | |
//openblas_set_num_threads(1); | |
for (auto _ : state) { | |
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, | |
state.range(0), | |
state.range(0), | |
state.range(0), | |
1, // alpha | |
A.get(), | |
state.range(0), | |
B.get(), | |
state.range(0), | |
1, // beta, | |
C.get(), | |
state.range(0)); | |
benchmark::DoNotOptimize(C.get()); | |
} | |
} | |
static void BM_GEMM_raw_ptr(benchmark::State& state) { | |
float* A = new float[state.range(0) * state.range(0)]; | |
float* B = new float[state.range(0) * state.range(0)]; | |
float* C = new float[state.range(0) * state.range(0)]; | |
//openblas_set_num_threads(1); | |
for (auto _ : state) { | |
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, | |
state.range(0), | |
state.range(0), | |
state.range(0), | |
1, // alpha | |
A, | |
state.range(0), | |
B, | |
state.range(0), | |
1, // beta, | |
C, | |
state.range(0)); | |
benchmark::DoNotOptimize(C); | |
} | |
delete[] C; | |
delete[] B; | |
delete[] A; | |
} | |
BENCHMARK(BM_GEMM_unique_ptr)->Range(4, 1024); | |
BENCHMARK(BM_GEMM_raw_ptr)->Range(4, 1024); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ clang++-5.0 -std=c++11 ./blas_bm.cc -I ~/git/benchmark/include/ -L ~/git/benchmark/build/src/ -lbenchmark_main -lbenchmark -lopenblas -pthread |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./a.out | |
2020-06-05 16:09:38 | |
Running ./a.out | |
Run on (4 X 3900 MHz CPU s) | |
CPU Caches: | |
L1 Data 32 KiB (x2) | |
L1 Instruction 32 KiB (x2) | |
L2 Unified 256 KiB (x2) | |
L3 Unified 4096 KiB (x1) | |
Load Average: 3.57, 3.55, 3.11 | |
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. | |
------------------------------------------------------------------ | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------ | |
BM_GEMM_unique_ptr/4 465 ns 448 ns 1974826 | |
BM_GEMM_unique_ptr/8 321 ns 321 ns 1889871 | |
BM_GEMM_unique_ptr/64 10502 ns 10499 ns 65323 | |
BM_GEMM_unique_ptr/512 3810300 ns 2597048 ns 387 | |
BM_GEMM_unique_ptr/1024 31456758 ns 18933383 ns 49 | |
BM_GEMM_raw_ptr/4 255 ns 254 ns 2978025 | |
BM_GEMM_raw_ptr/8 337 ns 322 ns 2221654 | |
BM_GEMM_raw_ptr/64 10354 ns 10121 ns 72125 | |
BM_GEMM_raw_ptr/512 6574122 ns 2887697 ns 339 | |
BM_GEMM_raw_ptr/1024 22505692 ns 17119469 ns 47 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment