Skip to content

Instantly share code, notes, and snippets.

@preshing
Created October 23, 2014 00:13
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save preshing/4d28abad8da4e40cb1d4 to your computer and use it in GitHub Desktop.
Save preshing/4d28abad8da4e40cb1d4 to your computer and use it in GitHub Desktop.
Generate benchmarks for three implementations of a wait-free queue using C++11 atomics
//
// Generate benchmarks for the three queue implementations shown in
// Jeff Preshing's CppCon 2014 talk, "How Ubisoft Montreal Develops Games for
// Multicore - Before and After C++11".
//
// Slides: https://github.com/CppCon/CppCon2014/blob/master/Presentations/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore%20-%20Before%20and%20After%20C++11%20-%20Jeff%20Preshing%20-%20CppCon%202014.pdf?raw=true
//
#include <iostream>
#include <atomic>
#include <thread>
#include <chrono>
// 0 : Low-level (relaxed) C++11 atomics with standalone memory fences
// 1 : Low-level C++11 atomics with ordering constraints
// 2 : Sequentially consistent C++11 atomics
#define METHOD 0
using namespace std;
template <class T, int size>
class CappedSPSCQueue
{
private:
T m_items[size];
atomic<int> m_writePos;
alignas(64) int m_readPos;
public:
CappedSPSCQueue() : m_writePos(0), m_readPos(0) {}
void clear()
{
m_writePos = 0;
m_readPos = 0;
}
#if METHOD == 0
bool tryPush(const T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (w >= size)
return false;
m_items[w] = item;
atomic_thread_fence(memory_order_release);
m_writePos.store(w + 1, memory_order_relaxed);
return true;
}
bool tryPop(T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (m_readPos >= w)
return false;
atomic_thread_fence(memory_order_acquire);
item = m_items[m_readPos];
m_readPos++;
return true;
}
#elif METHOD == 1
bool tryPush(const T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (w >= size)
return false;
m_items[w] = item;
m_writePos.store(w + 1, memory_order_release);
return true;
}
bool tryPop(T& item)
{
int w = m_writePos.load(memory_order_acquire);
if (m_readPos >= w)
return false;
item = m_items[m_readPos];
m_readPos++;
return true;
}
#elif METHOD == 2
bool tryPush(const T& item)
{
int w = m_writePos;
if (w >= size)
return false;
m_items[w] = item;
m_writePos = w + 1;
return true;
}
bool tryPop(T& item)
{
int w = m_writePos;
if (m_readPos >= w)
return false;
item = m_items[m_readPos];
m_readPos++;
return true;
}
#endif
};
class Timer
{
private:
chrono::high_resolution_clock::time_point t0, t1;
double span;
public:
Timer()
{
span = 0;
}
void begin()
{
t0 = chrono::high_resolution_clock::now();
}
void end()
{
t1 = chrono::high_resolution_clock::now();
span += chrono::duration_cast<chrono::duration<double>>(t1 - t0).count();
}
double duration()
{
return span;
}
void clear()
{
span = 0;
}
};
static const int limit = 10000000;
CappedSPSCQueue<int, limit> TestQueue;
atomic<bool> ready(false);
Timer writeTimer, readTimer;
void writer()
{
ready = true;
writeTimer.begin();
for (int i = 0; i < limit; i++)
{
TestQueue.tryPush(i);
}
writeTimer.end();
}
void reader()
{
static volatile int destination;
while (!ready) {}
readTimer.begin();
for (int i = 0; i < limit; i++)
{
int y;
while (!TestQueue.tryPop((int&) y)) {}
// Force the result to be stored somewhere, otherwise
// the compiler will optimize y away and not bother
// copying the item from the queue at all
destination = y;
}
readTimer.end();
}
#if METHOD < 2 && !defined(__arm__)
static const int iterations = 100;
#else
static const int iterations = 20;
#endif
extern "C"
void tests()
{
printf("METHOD = %d, queue size = %d\n", METHOD, limit);
writeTimer.clear();
readTimer.clear();
for (int i = 0; i < iterations; i++)
{
TestQueue.clear();
ready = false;
writer();
reader();
}
printf("separate: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9);
writeTimer.clear();
readTimer.clear();
for (int i = 0; i < iterations; i++)
{
TestQueue.clear();
ready = false;
thread t1(writer);
reader();
t1.join();
}
printf("together: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9);
}
int main(int argc, const char * argv[])
{
tests();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment