Skip to content

Instantly share code, notes, and snippets.

@glampert
Created November 13, 2016 21:15
Show Gist options
  • Save glampert/163cd54e483f29782cc9ea89cd4f1205 to your computer and use it in GitHub Desktop.
Save glampert/163cd54e483f29782cc9ea89cd4f1205 to your computer and use it in GitHub Desktop.
Stack-like linear allocator - uses std::atomic to allow concurrent allocations from different threads.
// ============================================================================
// This code is in the Public Domain - I claim no copyrights over it.
// Use at your own leisure and risk.
//
// Compiled and tested with:
// c++ -std=c++11 -fno-exceptions -Wall -Wextra -pedantic -O3 linear_allocator.cpp
// ============================================================================
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <atomic>
#include <memory>
#include <thread>
#include <utility>
#include <vector>
// ========================================================
inline bool is_power_of_two(const std::size_t size)
{
return size != 0 && (size & (size - 1)) == 0;
}
inline bool is_aligned_ptr(const void * ptr, const std::size_t alignment)
{
// Aligned if the pointer is evenly divisible by the alignment value.
return (reinterpret_cast<std::uintptr_t>(ptr) & (alignment - 1)) == 0;
}
inline std::size_t align_size(const std::size_t size, const std::size_t alignment)
{
// Add the minimum extra needed to the size for pointer alignment.
// This size can then be used to malloc() some memory
// and then have the pointer aligned with align_ptr().
return size + (alignment - 1);
}
template<typename T>
inline T * align_ptr(const T * ptr, const std::size_t alignment)
{
// Cast to integer and align:
const std::uintptr_t uint_ptr = reinterpret_cast<std::uintptr_t>(ptr);
const std::uintptr_t aligned_ptr = (uint_ptr + (alignment - 1)) & ~(alignment - 1);
// Re-cast to void*, validate and return:
T * user_ptr = reinterpret_cast<T *>(aligned_ptr);
assert(is_aligned_ptr(user_ptr, alignment));
return user_ptr;
}
// ========================================================
struct allocator_out_of_mem_abort
{
void operator()(const char * user_msg, std::size_t num_bytes, std::size_t alignment) const
{
std::fprintf(stderr, "[%s]: Failed to allocate %zu bytes with %zub alignment!\n",
user_msg, num_bytes, alignment);
std::abort();
}
};
// ========================================================
template<typename OutOfMemHandler = allocator_out_of_mem_abort>
class linear_allocator final
{
public:
// Not copyable.
linear_allocator(const linear_allocator &) = delete;
linear_allocator & operator = (const linear_allocator &) = delete;
// Allocated the stack memory block.
explicit linear_allocator(std::uint32_t max_stack_size_bytes)
: m_stack{ new std::uint8_t[max_stack_size_bytes] }
, m_size{ max_stack_size_bytes }
, m_top{ 0 }
{
// Should be on any decent architecture.
assert(m_top.is_lock_free());
}
// Allocates a chunk of bytes from the linear stack, bumping the stack top counter
// and calling the user-defined error handler if out-of-memory (stack overflowed).
std::uint8_t * allocate_bytes(std::size_t num_bytes, std::size_t alignment = 16)
{
assert(num_bytes != 0);
assert(alignment != 0);
assert(is_power_of_two(alignment));
num_bytes = align_size(num_bytes, alignment);
assert(num_bytes <= UINT32_MAX);
const std::uint32_t old_top = m_top.fetch_add(static_cast<std::uint32_t>(num_bytes));
if ((old_top + num_bytes) > m_size)
{
OutOfMemHandler out_of_mem{};
out_of_mem("linear_allocator", num_bytes, alignment);
return nullptr;
}
return align_ptr(&m_stack[old_top], alignment);
}
// Allocate with cast to user-defined type T.
// The returned element is *not* constructed, so the caller is
// responsible for calling placement new on the pointer to run
// class constructors.
template<typename T>
T * allocate(std::size_t count, std::size_t alignment = 16)
{
return reinterpret_cast<T *>(allocate_bytes(count * sizeof(T), alignment));
}
// Does nothing - only provided for consistency.
// The only way to deallocate memory is by rolling back the stack top counter.
void free(const void *) { }
// Rests the stack top counter to a given position (zero by default),
// effectively marking any memory beyond that position as free for
// new allocations.
void reset(std::uint32_t position = 0)
{
m_top = position;
}
// Accessors:
std::uint8_t * get_base() const { return m_stack; }
std::uint32_t get_size() const { return m_size; }
std::uint32_t get_top() const { return m_top; }
private:
// The linear_allocator is a simple block of memory where
// allocations always move forward in the block - i.e.: it
// is a stack-like allocator. Total size of the stack mem
// block never changes after initialization.
std::unique_ptr<std::uint8_t[]> m_stack;
const std::uint32_t m_size;
// The stack top counter is atomic, so it is safe to allocate
// from the same linear_allocator across different threads.
std::atomic<std::uint32_t> m_top;
};
// ========================================================
template<typename T>
T * construct(T * obj, const T & other) // Copy constructor
{
return ::new(obj) T(other);
}
template<typename T, typename... Args>
T * construct(T * obj, Args&&... args) // Parameterized or default constructor
{
return ::new(obj) T(std::forward<Args>(args)...);
}
template<typename T>
void destroy(T * obj)
{
if (obj != nullptr)
{
obj->~T();
}
}
// ========================================================
void thread_func(std::uint8_t thread_idx, std::uint64_t * out, linear_allocator<> * shared_allocator)
{
const int MaxThreadAlloc = 2048;
const int N = std::max(1, std::rand() % 128);
std::printf("Thread %u running %i iterations...\n", static_cast<unsigned>(thread_idx), N);
std::vector<std::pair<std::uint8_t *, int>> blocks;
blocks.reserve(N);
// Do a bunch of small/medium allocation every other iteration of the loop:
for (int i = 0; i < N; ++i)
{
if (i & 1)
{
const int size = std::max(1, std::rand() % MaxThreadAlloc);
std::uint8_t * ptr = shared_allocator->allocate_bytes(size);
std::memset(ptr, thread_idx, size);
blocks.emplace_back(ptr, size);
}
}
// Makes sure we have a visible side-effect so the compiler
// doesn't strip out the whole loop; this value is ignored.
std::uint64_t sum = 0;
// Assert that our memory was not overwritten by other threads:
for (const auto & block_and_size : blocks)
{
const std::uint8_t * ptr = block_and_size.first;
const int size = block_and_size.second;
for (int i = 0; i < size; ++i)
{
assert(ptr[i] == thread_idx);
sum += ptr[i];
}
}
(*out) += sum;
}
// ========================================================
int main()
{
linear_allocator<> shared_allocator{ 2048 * 1024 }; // 2MB
std::uint64_t ignored = 0;
std::vector<std::thread> threads;
// 10 runs with 10 thread each
for (int i = 0; i < 10; ++i)
{
for (int t = 0; t < 10; ++t)
{
threads.emplace_back(thread_func, static_cast<std::uint8_t>(t), &ignored, &shared_allocator);
}
for (auto & t : threads)
{
t.join();
}
threads.clear();
std::printf("Mem used: %ub\n", shared_allocator.get_top());
shared_allocator.reset();
}
return static_cast<int>(ignored);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment