Skip to content

Instantly share code, notes, and snippets.

@Gerold103
Created October 20, 2023 16:02
Show Gist options
  • Save Gerold103/0199ac4c2facc2b34ef25f8ea44a0601 to your computer and use it in GitHub Desktop.
Save Gerold103/0199ac4c2facc2b34ef25f8ea44a0601 to your computer and use it in GitHub Desktop.
BenchObjectPool
#include <atomic>
#include <cassert>
#include <condition_variable>
#include <ctime>
#include <iostream>
#include <mutex>
#include <random>
#include <thread>
template <typename T, uint32_t BatchSize>
class ThreadGlobalPool;
static constexpr uint32_t theThreadLocalBatchSize = 32;
static constexpr uint32_t theThreadCount = 5;
static constexpr uint32_t theValueCount = 300000;
static constexpr uint32_t theIterCount = 200;
//////////////////////////////////////////////////////////////////////////////////////////
template<typename T, T* T::*myLink = &T::myNext>
class ForwardList
{
public:
ForwardList() : myHead(nullptr), myTail(nullptr) {}
bool IsEmpty() const { return myHead == nullptr; }
T* PopFirst() { T* res = myHead; if ((myHead = myHead->*myLink) == nullptr) myTail = nullptr; return res; }
void Append(
T* aItem) { if (myHead != nullptr) myTail->*myLink = aItem; else myHead = aItem; myTail = aItem; aItem->*myLink = nullptr; }
void Prepend(
T* aItem) { aItem->*myLink = myHead; myHead = aItem; if (myTail == nullptr) myTail = aItem; }
private:
T* myHead;
T* myTail;
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T>
struct ThreadLocalPoolObject
{
ThreadLocalPoolObject() {}
~ThreadLocalPoolObject() {}
// Union allows to keep proper size and alignment while not to invoke any
// constructors.
union
{
T myBytes;
ThreadLocalPoolObject* myNext;
};
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T>
struct ThreadLocalPoolObjectChain
{
using Object = ThreadLocalPoolObject<T>;
using ObjectList = ForwardList<Object>;
ThreadLocalPoolObjectChain();
ObjectList myList;
uint32_t mySize;
ThreadLocalPoolObjectChain* myNext;
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T>
struct ThreadLocalPoolUnit
{
using Object = ThreadLocalPoolObject<T>;
using Chain = ThreadLocalPoolObjectChain<T>;
ThreadLocalPoolUnit() {}
~ThreadLocalPoolUnit() {}
// Each pooled unit is either an object or a list of objects (head and tail
// pointer). This gives flexibility when a pool is empty and an object is freed -
// no need to allocate any list-containers. The first freed object simply becomes
// the container.
union
{
Object myObject;
Chain myChain;
};
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T, uint32_t BatchSize>
class ThreadLocalPool
{
public:
using Chain = ThreadLocalPoolObjectChain<T>;
using Object = typename Chain::Object;
using ObjectList = typename Chain::ObjectList;
using GlobalPool = ThreadGlobalPool<T, BatchSize>;
using Unit = ThreadLocalPoolUnit<T>;
ThreadLocalPool();
~ThreadLocalPool();
static ThreadLocalPool& GetInstance();
void* AllocateT();
void FreeT(
void* aObject);
Chain* myChain;
bool myIsDestroyed;
static_assert(BatchSize > 0, "BatchSize must be > 0");
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T, uint32_t BatchSize>
class ThreadGlobalPool
{
public:
using LocalPool = ThreadLocalPool<T, BatchSize>;
using ChainList = ForwardList<typename LocalPool::Chain>;
static ThreadGlobalPool& GetInstance();
std::mutex myMutex;
// Thread-local pools return excess objects to the global pool. It allows to keep
// reusing the objects even if some treads allocate more than free, or vice versa.
ChainList myFreeChains;
};
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T, size_t BatchSize>
class ThreadPooled
{
public:
using LocalPool = ThreadLocalPool<T, BatchSize>;
void* operator new(
size_t) { return LocalPool::GetInstance().AllocateT(); }
void operator delete(
void* aPtr) { LocalPool::GetInstance().FreeT(aPtr); }
};
//////////////////////////////////////////////////////////////////////////////////////////
template<uint32_t PaddigSize>
struct ValueBase
{
static constexpr uint32_t ourPaddingSize = PaddigSize;
static constexpr uint32_t ourBatchSize = 0;
static constexpr bool ourIsPooled = false;
ValueBase(
int aValue,
int aOwner) : myValue(aValue), myOwner(aOwner) {}
ValueBase(
int aValue) : ValueBase(aValue, 0) {}
int myValue;
int myOwner;
uint8_t myPadding[PaddigSize];
};
template<uint32_t PaddigSize, uint32_t BatchSize>
struct Value
: public ValueBase<PaddigSize>
, public ThreadPooled<Value<PaddigSize, BatchSize>, BatchSize>
{
static constexpr uint32_t ourBatchSize = BatchSize;
static constexpr bool ourIsPooled = true;
Value(
int aValue,
int aOwner) : ValueBase<PaddigSize>(aValue, aOwner) {}
Value(
int aValue) : ValueBase<PaddigSize>(aValue) {}
};
//////////////////////////////////////////////////////////////////////////////////////////
static uint64_t
getUsec();
template<typename ValueT>
static void
run();
//////////////////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "Thread count: " << theThreadCount
<< ", value count: " << theValueCount
<< '\n' << std::endl;
run<ValueBase<1>>();
run<Value<1, theThreadLocalBatchSize>>();
run<ValueBase<512>>();
run<Value<512, theThreadLocalBatchSize>>();
run<ValueBase<1024>>();
run<Value<1024, theThreadLocalBatchSize>>();
return 0;
}
//////////////////////////////////////////////////////////////////////////////////////////
template<typename T>
inline
ThreadLocalPoolObjectChain<T>::ThreadLocalPoolObjectChain()
: mySize(0)
{
}
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T, uint32_t BatchSize>
inline
ThreadLocalPool<T, BatchSize>::ThreadLocalPool()
: myChain(nullptr)
, myIsDestroyed(false)
{
}
template <typename T, uint32_t BatchSize>
ThreadLocalPool<T, BatchSize>::~ThreadLocalPool()
{
assert(!myIsDestroyed);
myIsDestroyed = true;
if (myChain != nullptr)
{
GlobalPool& glob = GlobalPool::GetInstance();
glob.myMutex.lock();
// Prepend to pop cache-hot data first, when need to allocate next time.
glob.myFreeChains.Prepend(myChain);
glob.myMutex.unlock();
myChain = nullptr;
}
}
template <typename T, uint32_t BatchSize>
inline ThreadLocalPool<T, BatchSize>&
ThreadLocalPool<T, BatchSize>::GetInstance()
{
static thread_local ThreadLocalPool ourInstance;
assert(!ourInstance.myIsDestroyed);
return ourInstance;
}
template <typename T, uint32_t BatchSize>
void*
ThreadLocalPool<T, BatchSize>::AllocateT()
{
assert(!myIsDestroyed);
if (myChain == nullptr)
{
GlobalPool& glob = GlobalPool::GetInstance();
glob.myMutex.lock();
if (!glob.myFreeChains.IsEmpty())
myChain = glob.myFreeChains.PopFirst();
glob.myMutex.unlock();
if (myChain == nullptr)
{
Unit* units = new Unit[BatchSize + 1];
// First unit -> container. Other units - objects.
Chain* chain = new (&units[0]) Chain();
for (uint32_t i = 1; i < BatchSize + 1; ++i)
chain->myList.Append(new (&units[i]) Object());
chain->mySize = BatchSize;
myChain = chain;
}
}
ObjectList* list = &myChain->myList;
if (!list->IsEmpty())
{
assert(myChain->mySize > 0);
--myChain->mySize;
return (void*)list->PopFirst();
}
assert(myChain->mySize == 0);
// Can use the container itself as an object too.
void* obj = (void*)myChain;
myChain = nullptr;
return obj;
}
template <typename T, uint32_t BatchSize>
void
ThreadLocalPool<T, BatchSize>::FreeT(
void* aObject)
{
assert(!myIsDestroyed);
if (myChain == nullptr)
{
// First object becomes the container for next objects.
myChain = new (aObject) Chain();
return;
}
if (myChain->mySize == BatchSize)
{
GlobalPool& glob = GlobalPool::GetInstance();
glob.myMutex.lock();
// Prepend to pop cache-hot data first, when need to allocate next time.
glob.myFreeChains.Prepend(myChain);
glob.myMutex.unlock();
myChain = new (aObject) Chain();;
return;
}
// Prepend to pop cache-hot data first, when need to allocate next time.
myChain->myList.Prepend((Object*)aObject);
++myChain->mySize;
}
//////////////////////////////////////////////////////////////////////////////////////////
template <typename T, uint32_t BatchSize>
inline ThreadGlobalPool<T, BatchSize>&
ThreadGlobalPool<T, BatchSize>::GetInstance()
{
static ThreadGlobalPool ourInstance;
return ourInstance;
}
//////////////////////////////////////////////////////////////////////////////////////////
static uint64_t
getUsec()
{
timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return t.tv_sec * 1'000'000 + t.tv_nsec / 1000;
}
template<typename ValueT>
static void
run()
{
std::cout << "Pooled: " << ValueT::ourIsPooled
<< ", object size: " << sizeof(ValueT)
<< std::endl;
std::vector<std::thread> threads;
threads.reserve(theThreadCount);
std::condition_variable condVar;
std::mutex mutex;
bool doStart = false;
std::atomic_int threadIdCounter(1);
for (uint32_t threadI = 0; threadI < theThreadCount; ++threadI)
{
threads.emplace_back([&]() {
std::mt19937 randomGen;
randomGen.seed(((uint64_t)&randomGen + time(NULL)) & UINT32_MAX);
int threadId = threadIdCounter.fetch_add(1, std::memory_order_relaxed);
std::vector<ValueT*> values;
values.resize(theValueCount);
{
std::unique_lock lock(mutex);
while (!doStart)
condVar.wait(lock);
}
for (uint32_t iterI = 0; iterI < theIterCount; ++iterI)
{
for (uint32_t valI = 0; valI < theValueCount; ++valI)
values[valI] = new ValueT(valI, threadId);
for (uint32_t valI = 0; valI < theValueCount; ++valI)
{
uint32_t lastIdx = theValueCount - valI - 1;
std::uniform_int_distribution<uint32_t> dist(0, lastIdx);
uint32_t idx = dist(randomGen);
ValueT* v = values[idx];
assert(v->myValue == (int)idx);
assert(v->myOwner == threadId);
delete v;
if (idx == lastIdx)
continue;
// Cyclic removal.
v = values[lastIdx];
assert(v->myValue == (int)lastIdx);
assert(v->myOwner == threadId);
v->myValue = idx;
values[idx] = v;
}
}
});
}
uint64_t t1 = getUsec();
mutex.lock();
doStart = true;
condVar.notify_all();
mutex.unlock();
for (std::thread& t : threads)
t.join();
uint64_t t2 = getUsec();
std::cout << "Took " << (t2 - t1) / 1000.0 << " ms\n" << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment