// gist for showing that rand() with global lock is slower ~60x times | |
// (came to this for librdkafa random partitioning with producer per group, and eventually program has ~1200 threads) | |
// | |
// $ g++ -DRAND -o rand -O3 -pthread rand.cpp | |
// $ g++ -DRAND_R -o rand_r -O3 -pthread rand.cpp | |
// | |
// $ time ./rand | |
// | |
// real 0m2.336s | |
// user 0m2.685s | |
// sys 0m15.588s | |
// | |
// $ time ./rand_r | |
// | |
// real 0m0.041s | |
// user 0m0.106s | |
// sys 0m0.042s | |
#include <cstdlib> | |
#include <thread> | |
#include <vector> | |
#include <stdexcept> | |
static constexpr size_t MESSAGES = 10'000'000; | |
static constexpr size_t THREADS = 1000; | |
static constexpr size_t MESSAGES_PER_THREAD = MESSAGES/THREADS; | |
thread_local unsigned int seed = 0xdeadbeaf; | |
// from google benchmark (just in case, the gen_rand() does not optimized anyway) | |
inline __attribute__((always_inline)) void DoNotOptimize(int const &value) { | |
asm volatile("" : : "r,m"(value) : "memory"); | |
} | |
int gen_rand() | |
{ | |
#ifdef RAND | |
return rand(); | |
#elif defined(RAND_R) | |
return rand_r(&seed); | |
#else | |
#error No rand/rand_r seleted | |
#endif | |
} | |
int main() | |
{ | |
std::vector<std::thread> threads; | |
for (int i = 0; i < THREADS; ++i) { | |
threads.emplace_back([]() { | |
for (int i = 0; i < MESSAGES_PER_THREAD; ++i) { | |
DoNotOptimize(gen_rand()); | |
} | |
}); | |
} | |
for (auto &thread : threads) { | |
thread.join(); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment