| // | |
| // Generate benchmarks for the three queue implementations shown in | |
| // Jeff Preshing's CppCon 2014 talk, "How Ubisoft Montreal Develops Games for | |
| // Multicore - Before and After C++11". | |
| // | |
| // Slides: https://github.com/CppCon/CppCon2014/blob/master/Presentations/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore%20-%20Before%20and%20After%20C++11%20-%20Jeff%20Preshing%20-%20CppCon%202014.pdf?raw=true | |
| // | |
| #include <iostream> | |
| #include <atomic> | |
| #include <thread> | |
| #include <chrono> | |
| // 0 : Low-level (relaxed) C++11 atomics with standalone memory fences | |
| // 1 : Low-level C++11 atomics with ordering constraints | |
| // 2 : Sequentially consistent C++11 atomics | |
| #define METHOD 0 | |
| using namespace std; | |
| template <class T, int size> | |
| class CappedSPSCQueue | |
| { | |
| private: | |
| T m_items[size]; | |
| atomic<int> m_writePos; | |
| alignas(64) int m_readPos; | |
| public: | |
| CappedSPSCQueue() : m_writePos(0), m_readPos(0) {} | |
| void clear() | |
| { | |
| m_writePos = 0; | |
| m_readPos = 0; | |
| } | |
| #if METHOD == 0 | |
| bool tryPush(const T& item) | |
| { | |
| int w = m_writePos.load(memory_order_relaxed); | |
| if (w >= size) | |
| return false; | |
| m_items[w] = item; | |
| atomic_thread_fence(memory_order_release); | |
| m_writePos.store(w + 1, memory_order_relaxed); | |
| return true; | |
| } | |
| bool tryPop(T& item) | |
| { | |
| int w = m_writePos.load(memory_order_relaxed); | |
| if (m_readPos >= w) | |
| return false; | |
| atomic_thread_fence(memory_order_acquire); | |
| item = m_items[m_readPos]; | |
| m_readPos++; | |
| return true; | |
| } | |
| #elif METHOD == 1 | |
| bool tryPush(const T& item) | |
| { | |
| int w = m_writePos.load(memory_order_relaxed); | |
| if (w >= size) | |
| return false; | |
| m_items[w] = item; | |
| m_writePos.store(w + 1, memory_order_release); | |
| return true; | |
| } | |
| bool tryPop(T& item) | |
| { | |
| int w = m_writePos.load(memory_order_acquire); | |
| if (m_readPos >= w) | |
| return false; | |
| item = m_items[m_readPos]; | |
| m_readPos++; | |
| return true; | |
| } | |
| #elif METHOD == 2 | |
| bool tryPush(const T& item) | |
| { | |
| int w = m_writePos; | |
| if (w >= size) | |
| return false; | |
| m_items[w] = item; | |
| m_writePos = w + 1; | |
| return true; | |
| } | |
| bool tryPop(T& item) | |
| { | |
| int w = m_writePos; | |
| if (m_readPos >= w) | |
| return false; | |
| item = m_items[m_readPos]; | |
| m_readPos++; | |
| return true; | |
| } | |
| #endif | |
| }; | |
| class Timer | |
| { | |
| private: | |
| chrono::high_resolution_clock::time_point t0, t1; | |
| double span; | |
| public: | |
| Timer() | |
| { | |
| span = 0; | |
| } | |
| void begin() | |
| { | |
| t0 = chrono::high_resolution_clock::now(); | |
| } | |
| void end() | |
| { | |
| t1 = chrono::high_resolution_clock::now(); | |
| span += chrono::duration_cast<chrono::duration<double>>(t1 - t0).count(); | |
| } | |
| double duration() | |
| { | |
| return span; | |
| } | |
| void clear() | |
| { | |
| span = 0; | |
| } | |
| }; | |
| static const int limit = 10000000; | |
| CappedSPSCQueue<int, limit> TestQueue; | |
| atomic<bool> ready(false); | |
| Timer writeTimer, readTimer; | |
| void writer() | |
| { | |
| ready = true; | |
| writeTimer.begin(); | |
| for (int i = 0; i < limit; i++) | |
| { | |
| TestQueue.tryPush(i); | |
| } | |
| writeTimer.end(); | |
| } | |
| void reader() | |
| { | |
| static volatile int destination; | |
| while (!ready) {} | |
| readTimer.begin(); | |
| for (int i = 0; i < limit; i++) | |
| { | |
| int y; | |
| while (!TestQueue.tryPop((int&) y)) {} | |
| // Force the result to be stored somewhere, otherwise | |
| // the compiler will optimize y away and not bother | |
| // copying the item from the queue at all | |
| destination = y; | |
| } | |
| readTimer.end(); | |
| } | |
| #if METHOD < 2 && !defined(__arm__) | |
| static const int iterations = 100; | |
| #else | |
| static const int iterations = 20; | |
| #endif | |
| extern "C" | |
| void tests() | |
| { | |
| printf("METHOD = %d, queue size = %d\n", METHOD, limit); | |
| writeTimer.clear(); | |
| readTimer.clear(); | |
| for (int i = 0; i < iterations; i++) | |
| { | |
| TestQueue.clear(); | |
| ready = false; | |
| writer(); | |
| reader(); | |
| } | |
| printf("separate: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9); | |
| writeTimer.clear(); | |
| readTimer.clear(); | |
| for (int i = 0; i < iterations; i++) | |
| { | |
| TestQueue.clear(); | |
| ready = false; | |
| thread t1(writer); | |
| reader(); | |
| t1.join(); | |
| } | |
| printf("together: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9); | |
| } | |
| int main(int argc, const char * argv[]) | |
| { | |
| tests(); | |
| return 0; | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment