Created
October 20, 2023 16:02
-
-
Save Gerold103/0199ac4c2facc2b34ef25f8ea44a0601 to your computer and use it in GitHub Desktop.
BenchObjectPool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <atomic> | |
#include <cassert> | |
#include <condition_variable> | |
#include <ctime> | |
#include <iostream> | |
#include <mutex> | |
#include <random> | |
#include <thread> | |
template <typename T, uint32_t BatchSize> | |
class ThreadGlobalPool; | |
static constexpr uint32_t theThreadLocalBatchSize = 32; | |
static constexpr uint32_t theThreadCount = 5; | |
static constexpr uint32_t theValueCount = 300000; | |
static constexpr uint32_t theIterCount = 200; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template<typename T, T* T::*myLink = &T::myNext> | |
class ForwardList | |
{ | |
public: | |
ForwardList() : myHead(nullptr), myTail(nullptr) {} | |
bool IsEmpty() const { return myHead == nullptr; } | |
T* PopFirst() { T* res = myHead; if ((myHead = myHead->*myLink) == nullptr) myTail = nullptr; return res; } | |
void Append( | |
T* aItem) { if (myHead != nullptr) myTail->*myLink = aItem; else myHead = aItem; myTail = aItem; aItem->*myLink = nullptr; } | |
void Prepend( | |
T* aItem) { aItem->*myLink = myHead; myHead = aItem; if (myTail == nullptr) myTail = aItem; } | |
private: | |
T* myHead; | |
T* myTail; | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T> | |
struct ThreadLocalPoolObject | |
{ | |
ThreadLocalPoolObject() {} | |
~ThreadLocalPoolObject() {} | |
// Union allows to keep proper size and alignment while not to invoke any | |
// constructors. | |
union | |
{ | |
T myBytes; | |
ThreadLocalPoolObject* myNext; | |
}; | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T> | |
struct ThreadLocalPoolObjectChain | |
{ | |
using Object = ThreadLocalPoolObject<T>; | |
using ObjectList = ForwardList<Object>; | |
ThreadLocalPoolObjectChain(); | |
ObjectList myList; | |
uint32_t mySize; | |
ThreadLocalPoolObjectChain* myNext; | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T> | |
struct ThreadLocalPoolUnit | |
{ | |
using Object = ThreadLocalPoolObject<T>; | |
using Chain = ThreadLocalPoolObjectChain<T>; | |
ThreadLocalPoolUnit() {} | |
~ThreadLocalPoolUnit() {} | |
// Each pooled unit is either an object or a list of objects (head and tail | |
// pointer). This gives flexibility when a pool is empty and an object is freed - | |
// no need to allocate any list-containers. The first freed object simply becomes | |
// the container. | |
union | |
{ | |
Object myObject; | |
Chain myChain; | |
}; | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T, uint32_t BatchSize> | |
class ThreadLocalPool | |
{ | |
public: | |
using Chain = ThreadLocalPoolObjectChain<T>; | |
using Object = typename Chain::Object; | |
using ObjectList = typename Chain::ObjectList; | |
using GlobalPool = ThreadGlobalPool<T, BatchSize>; | |
using Unit = ThreadLocalPoolUnit<T>; | |
ThreadLocalPool(); | |
~ThreadLocalPool(); | |
static ThreadLocalPool& GetInstance(); | |
void* AllocateT(); | |
void FreeT( | |
void* aObject); | |
Chain* myChain; | |
bool myIsDestroyed; | |
static_assert(BatchSize > 0, "BatchSize must be > 0"); | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T, uint32_t BatchSize> | |
class ThreadGlobalPool | |
{ | |
public: | |
using LocalPool = ThreadLocalPool<T, BatchSize>; | |
using ChainList = ForwardList<typename LocalPool::Chain>; | |
static ThreadGlobalPool& GetInstance(); | |
std::mutex myMutex; | |
// Thread-local pools return excess objects to the global pool. It allows to keep | |
// reusing the objects even if some treads allocate more than free, or vice versa. | |
ChainList myFreeChains; | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T, size_t BatchSize> | |
class ThreadPooled | |
{ | |
public: | |
using LocalPool = ThreadLocalPool<T, BatchSize>; | |
void* operator new( | |
size_t) { return LocalPool::GetInstance().AllocateT(); } | |
void operator delete( | |
void* aPtr) { LocalPool::GetInstance().FreeT(aPtr); } | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template<uint32_t PaddigSize> | |
struct ValueBase | |
{ | |
static constexpr uint32_t ourPaddingSize = PaddigSize; | |
static constexpr uint32_t ourBatchSize = 0; | |
static constexpr bool ourIsPooled = false; | |
ValueBase( | |
int aValue, | |
int aOwner) : myValue(aValue), myOwner(aOwner) {} | |
ValueBase( | |
int aValue) : ValueBase(aValue, 0) {} | |
int myValue; | |
int myOwner; | |
uint8_t myPadding[PaddigSize]; | |
}; | |
template<uint32_t PaddigSize, uint32_t BatchSize> | |
struct Value | |
: public ValueBase<PaddigSize> | |
, public ThreadPooled<Value<PaddigSize, BatchSize>, BatchSize> | |
{ | |
static constexpr uint32_t ourBatchSize = BatchSize; | |
static constexpr bool ourIsPooled = true; | |
Value( | |
int aValue, | |
int aOwner) : ValueBase<PaddigSize>(aValue, aOwner) {} | |
Value( | |
int aValue) : ValueBase<PaddigSize>(aValue) {} | |
}; | |
////////////////////////////////////////////////////////////////////////////////////////// | |
static uint64_t | |
getUsec(); | |
template<typename ValueT> | |
static void | |
run(); | |
////////////////////////////////////////////////////////////////////////////////////////// | |
int | |
main() | |
{ | |
std::cout << "Thread count: " << theThreadCount | |
<< ", value count: " << theValueCount | |
<< '\n' << std::endl; | |
run<ValueBase<1>>(); | |
run<Value<1, theThreadLocalBatchSize>>(); | |
run<ValueBase<512>>(); | |
run<Value<512, theThreadLocalBatchSize>>(); | |
run<ValueBase<1024>>(); | |
run<Value<1024, theThreadLocalBatchSize>>(); | |
return 0; | |
} | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template<typename T> | |
inline | |
ThreadLocalPoolObjectChain<T>::ThreadLocalPoolObjectChain() | |
: mySize(0) | |
{ | |
} | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T, uint32_t BatchSize> | |
inline | |
ThreadLocalPool<T, BatchSize>::ThreadLocalPool() | |
: myChain(nullptr) | |
, myIsDestroyed(false) | |
{ | |
} | |
template <typename T, uint32_t BatchSize> | |
ThreadLocalPool<T, BatchSize>::~ThreadLocalPool() | |
{ | |
assert(!myIsDestroyed); | |
myIsDestroyed = true; | |
if (myChain != nullptr) | |
{ | |
GlobalPool& glob = GlobalPool::GetInstance(); | |
glob.myMutex.lock(); | |
// Prepend to pop cache-hot data first, when need to allocate next time. | |
glob.myFreeChains.Prepend(myChain); | |
glob.myMutex.unlock(); | |
myChain = nullptr; | |
} | |
} | |
template <typename T, uint32_t BatchSize> | |
inline ThreadLocalPool<T, BatchSize>& | |
ThreadLocalPool<T, BatchSize>::GetInstance() | |
{ | |
static thread_local ThreadLocalPool ourInstance; | |
assert(!ourInstance.myIsDestroyed); | |
return ourInstance; | |
} | |
template <typename T, uint32_t BatchSize> | |
void* | |
ThreadLocalPool<T, BatchSize>::AllocateT() | |
{ | |
assert(!myIsDestroyed); | |
if (myChain == nullptr) | |
{ | |
GlobalPool& glob = GlobalPool::GetInstance(); | |
glob.myMutex.lock(); | |
if (!glob.myFreeChains.IsEmpty()) | |
myChain = glob.myFreeChains.PopFirst(); | |
glob.myMutex.unlock(); | |
if (myChain == nullptr) | |
{ | |
Unit* units = new Unit[BatchSize + 1]; | |
// First unit -> container. Other units - objects. | |
Chain* chain = new (&units[0]) Chain(); | |
for (uint32_t i = 1; i < BatchSize + 1; ++i) | |
chain->myList.Append(new (&units[i]) Object()); | |
chain->mySize = BatchSize; | |
myChain = chain; | |
} | |
} | |
ObjectList* list = &myChain->myList; | |
if (!list->IsEmpty()) | |
{ | |
assert(myChain->mySize > 0); | |
--myChain->mySize; | |
return (void*)list->PopFirst(); | |
} | |
assert(myChain->mySize == 0); | |
// Can use the container itself as an object too. | |
void* obj = (void*)myChain; | |
myChain = nullptr; | |
return obj; | |
} | |
template <typename T, uint32_t BatchSize> | |
void | |
ThreadLocalPool<T, BatchSize>::FreeT( | |
void* aObject) | |
{ | |
assert(!myIsDestroyed); | |
if (myChain == nullptr) | |
{ | |
// First object becomes the container for next objects. | |
myChain = new (aObject) Chain(); | |
return; | |
} | |
if (myChain->mySize == BatchSize) | |
{ | |
GlobalPool& glob = GlobalPool::GetInstance(); | |
glob.myMutex.lock(); | |
// Prepend to pop cache-hot data first, when need to allocate next time. | |
glob.myFreeChains.Prepend(myChain); | |
glob.myMutex.unlock(); | |
myChain = new (aObject) Chain();; | |
return; | |
} | |
// Prepend to pop cache-hot data first, when need to allocate next time. | |
myChain->myList.Prepend((Object*)aObject); | |
++myChain->mySize; | |
} | |
////////////////////////////////////////////////////////////////////////////////////////// | |
template <typename T, uint32_t BatchSize> | |
inline ThreadGlobalPool<T, BatchSize>& | |
ThreadGlobalPool<T, BatchSize>::GetInstance() | |
{ | |
static ThreadGlobalPool ourInstance; | |
return ourInstance; | |
} | |
////////////////////////////////////////////////////////////////////////////////////////// | |
static uint64_t | |
getUsec() | |
{ | |
timespec t; | |
clock_gettime(CLOCK_MONOTONIC, &t); | |
return t.tv_sec * 1'000'000 + t.tv_nsec / 1000; | |
} | |
template<typename ValueT> | |
static void | |
run() | |
{ | |
std::cout << "Pooled: " << ValueT::ourIsPooled | |
<< ", object size: " << sizeof(ValueT) | |
<< std::endl; | |
std::vector<std::thread> threads; | |
threads.reserve(theThreadCount); | |
std::condition_variable condVar; | |
std::mutex mutex; | |
bool doStart = false; | |
std::atomic_int threadIdCounter(1); | |
for (uint32_t threadI = 0; threadI < theThreadCount; ++threadI) | |
{ | |
threads.emplace_back([&]() { | |
std::mt19937 randomGen; | |
randomGen.seed(((uint64_t)&randomGen + time(NULL)) & UINT32_MAX); | |
int threadId = threadIdCounter.fetch_add(1, std::memory_order_relaxed); | |
std::vector<ValueT*> values; | |
values.resize(theValueCount); | |
{ | |
std::unique_lock lock(mutex); | |
while (!doStart) | |
condVar.wait(lock); | |
} | |
for (uint32_t iterI = 0; iterI < theIterCount; ++iterI) | |
{ | |
for (uint32_t valI = 0; valI < theValueCount; ++valI) | |
values[valI] = new ValueT(valI, threadId); | |
for (uint32_t valI = 0; valI < theValueCount; ++valI) | |
{ | |
uint32_t lastIdx = theValueCount - valI - 1; | |
std::uniform_int_distribution<uint32_t> dist(0, lastIdx); | |
uint32_t idx = dist(randomGen); | |
ValueT* v = values[idx]; | |
assert(v->myValue == (int)idx); | |
assert(v->myOwner == threadId); | |
delete v; | |
if (idx == lastIdx) | |
continue; | |
// Cyclic removal. | |
v = values[lastIdx]; | |
assert(v->myValue == (int)lastIdx); | |
assert(v->myOwner == threadId); | |
v->myValue = idx; | |
values[idx] = v; | |
} | |
} | |
}); | |
} | |
uint64_t t1 = getUsec(); | |
mutex.lock(); | |
doStart = true; | |
condVar.notify_all(); | |
mutex.unlock(); | |
for (std::thread& t : threads) | |
t.join(); | |
uint64_t t2 = getUsec(); | |
std::cout << "Took " << (t2 - t1) / 1000.0 << " ms\n" << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment