Skip to content

Instantly share code, notes, and snippets.

@vittorioromeo
Last active March 1, 2022 02:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vittorioromeo/efa005d44ccd4ec7279181768a0c1f0b to your computer and use it in GitHub Desktop.
Save vittorioromeo/efa005d44ccd4ec7279181768a0c1f0b to your computer and use it in GitHub Desktop.
`libstdc++` debug performance benchmarks with `[[gnu::always_inline]]`
#define NDEBUG 1
#include <benchmark/benchmark.h>
#include <vector>
#include <algorithm>
#include <numeric>
static void vector_squareop(benchmark::State& state)
{
std::vector<int> v;
v.resize(1000);
for(auto _ : state)
{
for(int i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(++(v[i]));
}
}
}
static void carray_squareop(benchmark::State& state)
{
int* v = new int[1000]{};
for(auto _ : state)
{
for(int i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(++(v[i]));
}
}
delete[] v;
}
static void vector_iter(benchmark::State& state)
{
std::vector<int> v;
v.resize(1000);
for(auto _ : state)
{
for(int i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(++(*(v.begin() + i)));
}
}
}
static void carray_iter(benchmark::State& state)
{
int v[1000]{};
for(auto _ : state)
{
for(int i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(++(*(v + i)));
}
}
}
static std::vector<unsigned int> make_test_container()
{
std::vector<unsigned int> v;
v.reserve(64000);
for(std::size_t i = 0; i < 64000; ++i)
{
v.push_back(i);
}
return v;
}
static unsigned int sum_vector_accumulate(const std::vector<unsigned int>& v)
{
return std::accumulate(std::begin(v), std::end(v), 0u);
}
static unsigned int sum_vector_rawloop(const std::vector<unsigned int>& v)
{
unsigned int result = 0;
const std::size_t size = v.size();
const unsigned int* const data = v.data();
for(std::size_t i = 0; i < size; ++i)
{
result += data[i];
}
return result;
}
static void sumvec_accumulate(benchmark::State& state)
{
std::vector<unsigned int> v = make_test_container();
for(auto _ : state)
{
benchmark::DoNotOptimize(sum_vector_accumulate(v));
}
}
static void sumvec_rawloop(benchmark::State& state)
{
std::vector<unsigned int> v = make_test_container();
for(auto _ : state)
{
benchmark::DoNotOptimize(sum_vector_rawloop(v));
}
}
BENCHMARK(vector_squareop);
BENCHMARK(carray_squareop);
BENCHMARK(vector_iter);
BENCHMARK(carray_iter);
BENCHMARK(sumvec_accumulate);
BENCHMARK(sumvec_rawloop);
BENCHMARK_MAIN();
On MSYS2/MinGW on Windows 10 x64
gcc version 11.2.0 (Rev9, Built by MSYS2 project)
Run on (16 X 3600 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 256 KiB (x8)
L3 Unified 16384 KiB (x1)
CPU: Intel Core i9-9900K
MOBO: GIGABYTE Z390 AORUS MASTER-C
RAM: Corsair CMK16GX4M2B3000C15, 4x8GB
g++ -std=c++20 -O<X>
-pthread
./bench0.cpp
-lbenchmark -o bench0.exe
&& ./bench0.exe
--benchmark_repetitions=3
--benchmark_min_time=1
--benchmark_enable_random_interleaving=true
--benchmark_report_aggregates_only=true
===============================================================================
-O0, without `[[gnu::always_inline]]` on `operator[]`
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
carray_squareop_mean 1738 ns 1739 ns 3
carray_squareop_median 1699 ns 1707 ns 3
carray_squareop_stddev 70.8 ns 72.6 ns 3
carray_squareop_cv 4.07 % 4.18 % 3
vector_squareop_mean 2398 ns 2390 ns 3
vector_squareop_median 2333 ns 2344 ns 3
vector_squareop_stddev 114 ns 106 ns 3
vector_squareop_cv 4.77 % 4.42 % 3
-Og, without `[[gnu::always_inline]]` on `operator[]`
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
carray_squareop_mean 440 ns 439 ns 3
carray_squareop_median 439 ns 439 ns 3
carray_squareop_stddev 2.53 ns 0.000 ns 3
carray_squareop_cv 0.57 % 0.00 % 3
vector_squareop_mean 661 ns 662 ns 3
vector_squareop_median 660 ns 660 ns 3
vector_squareop_stddev 2.66 ns 4.33 ns 3
vector_squareop_cv 0.40 % 0.65 % 3
-O0, with `[[gnu::always_inline]]` on `operator[]`
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
vector_squareop_mean 1956 ns 1953 ns 3
vector_squareop_median 1956 ns 1946 ns 3
vector_squareop_stddev 5.65 ns 12.1 ns 3
vector_squareop_cv 0.29 % 0.62 % 3
carray_squareop_mean 1790 ns 1784 ns 3
carray_squareop_median 1787 ns 1784 ns 3
carray_squareop_stddev 9.78 ns 19.2 ns 3
carray_squareop_cv 0.55 % 1.08 % 3
-Og, with `[[gnu::always_inline]]` on `operator[]`
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
carray_squareop_mean 440 ns 439 ns 3
carray_squareop_median 440 ns 439 ns 3
carray_squareop_stddev 1.18 ns 0.000 ns 3
carray_squareop_cv 0.27 % 0.00 % 3
vector_squareop_mean 494 ns 491 ns 3
vector_squareop_median 491 ns 491 ns 3
vector_squareop_stddev 5.52 ns 0.000 ns 3
vector_squareop_cv 1.12 % 0.00 % 3
===============================================================================
-O0, without any change to `libstdc++` implementation
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
vector_iter_mean 7652 ns 7633 ns 3
vector_iter_median 7631 ns 7605 ns 3
vector_iter_stddev 37.8 ns 49.3 ns 3
vector_iter_cv 0.49 % 0.65 % 3
carray_iter_mean 1894 ns 1897 ns 3
carray_iter_median 1896 ns 1904 ns 3
carray_iter_stddev 5.48 ns 12.1 ns 3
carray_iter_cv 0.29 % 0.64 % 3
-Og, without any change to `libstdc++` implementation
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
carray_iter_mean 455 ns 453 ns 3
carray_iter_median 454 ns 450 ns 3
carray_iter_stddev 4.27 ns 6.04 ns 3
carray_iter_cv 0.94 % 1.33 % 3
vector_iter_mean 506 ns 506 ns 3
vector_iter_median 495 ns 497 ns 3
vector_iter_stddev 22.1 ns 20.5 ns 3
vector_iter_cv 4.37 % 4.04 % 3
-O0, mark `std::vector::begin` as `[[gnu::always_inline]]`
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
carray_iter_mean 1892 ns 1889 ns 3
carray_iter_median 1889 ns 1882 ns 3
carray_iter_stddev 6.93 ns 13.1 ns 3
carray_iter_cv 0.37 % 0.69 % 3
vector_iter_mean 6827 ns 6849 ns 3
vector_iter_median 6832 ns 6824 ns 3
vector_iter_stddev 19.0 ns 43.3 ns 3
vector_iter_cv 0.28 % 0.63 % 3
-O0, mark `std::vector::begin` as `[[gnu::always_inline]]` and
mark `__normal_iterator` constructor, `operator+`, and `operator*`
as `[[gnu::always_inline]]`
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
carray_iter_mean 1889 ns 1890 ns 3
carray_iter_median 1890 ns 1883 ns 3
carray_iter_stddev 7.14 ns 12.1 ns 3
carray_iter_cv 0.38 % 0.64 % 3
vector_iter_mean 5424 ns 5435 ns 3
vector_iter_median 5420 ns 5455 ns 3
vector_iter_stddev 22.8 ns 34.2 ns 3
vector_iter_cv 0.42 % 0.63 % 3
-Og, mark `std::vector::begin` as `[[gnu::always_inline]]`
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
carray_iter_mean 459 ns 459 ns 3
carray_iter_median 462 ns 460 ns 3
carray_iter_stddev 5.70 ns 7.72 ns 3
carray_iter_cv 1.24 % 1.68 % 3
vector_iter_mean 495 ns 495 ns 3
vector_iter_median 494 ns 495 ns 3
vector_iter_stddev 2.10 ns 5.75 ns 3
vector_iter_cv 0.42 % 1.16 % 3
-Og, mark `std::vector::begin` as `[[gnu::always_inline]]` and
mark `__normal_iterator` constructor, `operator+`, and `operator*`
as `[[gnu::always_inline]]`
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
carray_iter_mean 454 ns 455 ns 3
carray_iter_median 454 ns 455 ns 3
carray_iter_stddev 2.97 ns 5.06 ns 3
carray_iter_cv 0.65 % 1.11 % 3
vector_iter_mean 493 ns 493 ns 3
vector_iter_median 492 ns 491 ns 3
vector_iter_stddev 2.51 ns 3.22 ns 3
vector_iter_cv 0.51 % 0.65 % 3
===============================================================================
-O0, without any change to `libstdc++` implementation
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
sumvec_rawloop_mean 111015 ns 111084 ns 3
sumvec_rawloop_median 111040 ns 111084 ns 3
sumvec_rawloop_stddev 95.0 ns 0.000 ns 3
sumvec_rawloop_cv 0.09 % 0.00 % 3
sumvec_accumulate_mean 448967 ns 450040 ns 3
sumvec_accumulate_median 449337 ns 450040 ns 3
sumvec_accumulate_stddev 812 ns 0.000 ns 3
sumvec_accumulate_cv 0.18 % 0.00 % 3
-Og, without any change to `libstdc++` implementation
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
sumvec_accumulate_mean 28104 ns 28041 ns 3
sumvec_accumulate_median 28275 ns 28250 ns 3
sumvec_accumulate_stddev 328 ns 362 ns 3
sumvec_accumulate_cv 1.17 % 1.29 % 3
sumvec_rawloop_mean 17851 ns 17787 ns 3
sumvec_rawloop_median 17710 ns 17578 ns 3
sumvec_rawloop_stddev 356 ns 362 ns 3
sumvec_rawloop_cv 2.00 % 2.04 % 3
-O0, mark `std::move` as `[[gnu::always_inline]]`
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
sumvec_rawloop_mean 110983 ns 110677 ns 3
sumvec_rawloop_median 110880 ns 110212 ns 3
sumvec_rawloop_stddev 218 ns 805 ns 3
sumvec_rawloop_cv 0.20 % 0.73 % 3
sumvec_accumulate_mean 372487 ns 372522 ns 3
sumvec_accumulate_median 372706 ns 372522 ns 3
sumvec_accumulate_stddev 693 ns 0.000 ns 3
sumvec_accumulate_cv 0.19 % 0.00 % 3
-O0, mark `std::move` as `[[gnu::always_inline]]` and mark
`__normal_iterator` constructor, `operator+`, `operator*`, and
`operator!=`, as `[[gnu::always_inline]]`
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
sumvec_rawloop_mean 112385 ns 112305 ns 3
sumvec_rawloop_median 112405 ns 112305 ns 3
sumvec_rawloop_stddev 126 ns 0.000 ns 3
sumvec_rawloop_cv 0.11 % 0.00 % 3
sumvec_accumulate_mean 164825 ns 164339 ns 3
sumvec_accumulate_median 163806 ns 163060 ns 3
sumvec_accumulate_stddev 1970 ns 2215 ns 3
sumvec_accumulate_cv 1.19 % 1.35 % 3
-Og, mark `std::move` as `[[gnu::always_inline]]` and mark
`__normal_iterator` constructor, `operator+`, `operator*`, and
`operator!=`, as `[[gnu::always_inline]]`
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
sumvec_rawloop_mean 17541 ns 17520 ns 3
sumvec_rawloop_median 17553 ns 17456 ns 3
sumvec_rawloop_stddev 38.9 ns 111 ns 3
sumvec_rawloop_cv 0.22 % 0.63 % 3
sumvec_accumulate_mean 27646 ns 27768 ns 3
sumvec_accumulate_median 27704 ns 27867 ns 3
sumvec_accumulate_stddev 102 ns 171 ns 3
sumvec_accumulate_cv 0.37 % 0.62 % 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment