Skip to content

Instantly share code, notes, and snippets.

@azat

azat/rand.c

Last active Oct 7, 2020
Embed
What would you like to do?
// gist for showing that rand() with global lock is slower ~60x times
// (came to this for librdkafa random partitioning with producer per group, and eventually program has ~1200 threads)
//
// $ g++ -DRAND -o rand -O3 -pthread rand.cpp
// $ g++ -DRAND_R -o rand_r -O3 -pthread rand.cpp
//
// $ time ./rand
//
// real 0m2.336s
// user 0m2.685s
// sys 0m15.588s
//
// $ time ./rand_r
//
// real 0m0.041s
// user 0m0.106s
// sys 0m0.042s
#include <cstdlib>
#include <thread>
#include <vector>
#include <stdexcept>
static constexpr size_t MESSAGES = 10'000'000;
static constexpr size_t THREADS = 1000;
static constexpr size_t MESSAGES_PER_THREAD = MESSAGES/THREADS;
thread_local unsigned int seed = 0xdeadbeaf;
// from google benchmark (just in case, the gen_rand() does not optimized anyway)
inline __attribute__((always_inline)) void DoNotOptimize(int const &value) {
asm volatile("" : : "r,m"(value) : "memory");
}
int gen_rand()
{
#ifdef RAND
return rand();
#elif defined(RAND_R)
return rand_r(&seed);
#else
#error No rand/rand_r seleted
#endif
}
int main()
{
std::vector<std::thread> threads;
for (int i = 0; i < THREADS; ++i) {
threads.emplace_back([]() {
for (int i = 0; i < MESSAGES_PER_THREAD; ++i) {
DoNotOptimize(gen_rand());
}
});
}
for (auto &thread : threads) {
thread.join();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment