-
-
Save LucHermitte/3934a9d436cbf513866f8aa2eb329fb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Code inspired by Chandler Carruth's presentation at CppCon17: | |
// "Going Nowhere Faster" | |
// Written from scratch by Luc Hermitte, 2018-2020 | |
// Licence: Boost Software Licence | |
#ifndef RNG_H | |
#define RNG_H | |
#include <random> | |
#include <cassert> | |
#include <limits> | |
template <typename T> | |
class RNG | |
{ | |
public: | |
explicit RNG(T min_, T max_, int count_) | |
: m_device() | |
, m_engine(m_device()) | |
, m_distribution(min_, max_) | |
, m_count(count_) | |
{} | |
explicit RNG(int count) | |
: RNG(std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), count) | |
{} | |
struct iterator { | |
typedef std::forward_iterator_tag iterator_category; | |
typedef std::ptrdiff_t difference_type; | |
typedef T value_type; | |
typedef T* pointer; | |
typedef T& reference; | |
explicit iterator(RNG const& rng_, int count_) : rng(rng_), count(count_){} | |
T operator*() const { return rng.generate(); } | |
friend bool operator==(iterator const& lhs, iterator const& rhs) { | |
assert(&lhs.rng == &rhs.rng); | |
return lhs.count == rhs.count; | |
} | |
friend bool operator!=(iterator const& lhs, iterator const& rhs) { | |
return !(lhs == rhs); | |
} | |
iterator& operator++() { --count; return *this; } | |
iterator operator++(int) { iterator tmp; ++(*this); return tmp; } | |
private: | |
RNG const& rng; | |
int count; | |
}; | |
iterator begin() const { return iterator{*this, m_count}; } | |
iterator end () const { return iterator{*this, 0}; } | |
T generate() const { return m_distribution(m_engine); } | |
private: | |
std::random_device mutable m_device; | |
std::default_random_engine mutable m_engine; | |
std::uniform_int_distribution<T> mutable m_distribution; | |
int m_count; | |
}; | |
#endif // RNG_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Luc Hermitte, 2020 | |
// Licence: BSL | |
// Compile with: | |
// PKG_CONFIG_PATH="/usr/local/4clang/lib/pkgconfig:${PKG_CONFIG_PATH}" LDFLAGS="$(pkg-config --libs-only-L benchmark)" CXX='clang++' CXXFLAGS="-O3 -DNDEBUG -march=native -stdlib=libc++ -std=c++2a $(pkg-config --cflags-only-I benchmark) -DLEVEL1_DCACHE_LINESIZE=$(getconf LEVEL1_DCACHE_LINESIZE)" make type-iter | |
#include "RNG.hpp" | |
#include <benchmark/benchmark.h> | |
#include <boost/align/aligned_allocator.hpp> | |
#include <vector> | |
#include <span> | |
#if defined(__arm__) | |
constexpr bool isArm = true; | |
#else | |
constexpr bool isArm = false; | |
#endif | |
constexpr std::size_t hardware_destructive_interference_size = isArm ? 64 : 128; | |
// static_assert(hardware_destructive_interference_size >= max_align_v, "math?"); | |
#if defined(LEVEL1_DCACHE_LINESIZE) | |
constexpr std::size_t hardware_constructive_interference_size = LEVEL1_DCACHE_LINESIZE; | |
#else | |
constexpr std::size_t hardware_constructive_interference_size = 64; | |
#endif | |
template <typename T, std::size_t alignment = hardware_constructive_interference_size> | |
using aligned_vector | |
= std::vector<T, boost::alignment::aligned_allocator<T, alignment>>; | |
template <typename IndexType> | |
int sum_with_size_in_loop(std::span<int const> s) | |
{ | |
int r = 0; | |
for (IndexType i = 0; i != s.size(); ++i) | |
r += s[i]; | |
return r; | |
} | |
template <typename IndexType> | |
int sum_with_cached_size(std::span<int const> s) | |
{ | |
int r = 0; | |
IndexType const N = s.size(); | |
for (IndexType i = 0; i != N; ++i) | |
r += s[i]; | |
return r; | |
} | |
int sum_for_range(std::span<const int> s) | |
{ | |
int r = 0; | |
for (auto e : s) | |
r += e; | |
return r; | |
} | |
int sum_accumulate(std::span<const int> s) | |
{ | |
return std::accumulate(s.begin(), s.end(), 0); | |
} | |
struct DontCacheSize{}; | |
struct CacheSize{}; | |
template <typename IndexType, typename Caching> | |
struct Sum { }; | |
template <typename IndexType> | |
struct Sum<IndexType, DontCacheSize> | |
{ | |
static int compute(std::span<int const> s) { | |
return sum_with_size_in_loop<IndexType>(s); | |
} | |
}; | |
template <typename IndexType> | |
struct Sum<IndexType, CacheSize> | |
{ | |
static int compute(std::span<int const> s) { | |
return sum_with_cached_size<IndexType>(s); | |
} | |
}; | |
template <typename IndexType, typename Caching> | |
static void accu_bench(benchmark::State & s) { | |
int bytes = 1 << s.range(0); | |
// Compute how many elements we can fit in this many bytes | |
int count = bytes / sizeof(int); | |
aligned_vector<int> v; v.reserve(count); | |
for (auto i : RNG<int>(count)) { | |
v.push_back(i); | |
} | |
for (auto _ : s) { | |
int sum = Sum<IndexType, Caching>::compute(v); | |
benchmark::DoNotOptimize(sum); | |
benchmark::ClobberMemory(); | |
} | |
s.SetBytesProcessed(long(s.iterations()) * long(bytes)); | |
s.SetLabel(std::to_string(bytes / 1024) + "kb"); | |
} | |
BENCHMARK_TEMPLATE(accu_bench, std::size_t, CacheSize) ->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, std::size_t, DontCacheSize)->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, std::ptrdiff_t, CacheSize) ->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, std::ptrdiff_t, DontCacheSize)->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, unsigned int, CacheSize) ->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, unsigned int, DontCacheSize)->Arg(15)->Arg(18)->Arg(2)->ReportAggregatesOnly(true); | |
// BENCHMARK_TEMPLATE(accu_bench, int, CacheSize) ->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
BENCHMARK_TEMPLATE(accu_bench, int, DontCacheSize)->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
static void accu_bench_for_range(benchmark::State & s) { | |
int bytes = 1 << s.range(0); | |
// Compute how many elements we can fit in this many bytes | |
int count = bytes / sizeof(int); | |
aligned_vector<int> v; v.reserve(count); | |
for (auto i : RNG<int>(count)) { | |
v.push_back(i); | |
} | |
for (auto _ : s) { | |
int sum = sum_for_range(v); | |
benchmark::DoNotOptimize(sum); | |
benchmark::ClobberMemory(); | |
} | |
s.SetBytesProcessed(long(s.iterations()) * long(bytes)); | |
s.SetLabel(std::to_string(bytes / 1024) + "kb"); | |
} | |
BENCHMARK(accu_bench_for_range)->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
static void accu_bench_accumulate(benchmark::State & s) { | |
int bytes = 1 << s.range(0); | |
// Compute how many elements we can fit in this many bytes | |
int count = bytes / sizeof(int); | |
aligned_vector<int> v; v.reserve(count); | |
for (auto i : RNG<int>(count)) { | |
v.push_back(i); | |
} | |
for (auto _ : s) { | |
int sum = sum_accumulate(v); | |
benchmark::DoNotOptimize(sum); | |
benchmark::ClobberMemory(); | |
} | |
s.SetBytesProcessed(long(s.iterations()) * long(bytes)); | |
s.SetLabel(std::to_string(bytes / 1024) + "kb"); | |
} | |
BENCHMARK(accu_bench_accumulate)->Arg(15)->Arg(18)->Arg(22)->ReportAggregatesOnly(true); | |
BENCHMARK_MAIN(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment