Created
November 13, 2016 21:15
-
-
Save glampert/163cd54e483f29782cc9ea89cd4f1205 to your computer and use it in GitHub Desktop.
Stack-like linear allocator - uses std::atomic to allow concurrent allocations from different threads.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ============================================================================ | |
// This code is in the Public Domain - I claim no copyrights over it. | |
// Use at your own leisure and risk. | |
// | |
// Compiled and tested with: | |
// c++ -std=c++11 -fno-exceptions -Wall -Wextra -pedantic -O3 linear_allocator.cpp | |
// ============================================================================ | |
#include <cassert> | |
#include <cstdint> | |
#include <cstdio> | |
#include <cstdlib> | |
#include <cstring> | |
#include <atomic> | |
#include <memory> | |
#include <thread> | |
#include <utility> | |
#include <vector> | |
// ======================================================== | |
inline bool is_power_of_two(const std::size_t size) | |
{ | |
return size != 0 && (size & (size - 1)) == 0; | |
} | |
inline bool is_aligned_ptr(const void * ptr, const std::size_t alignment) | |
{ | |
// Aligned if the pointer is evenly divisible by the alignment value. | |
return (reinterpret_cast<std::uintptr_t>(ptr) & (alignment - 1)) == 0; | |
} | |
inline std::size_t align_size(const std::size_t size, const std::size_t alignment) | |
{ | |
// Add the minimum extra needed to the size for pointer alignment. | |
// This size can then be used to malloc() some memory | |
// and then have the pointer aligned with align_ptr(). | |
return size + (alignment - 1); | |
} | |
template<typename T> | |
inline T * align_ptr(const T * ptr, const std::size_t alignment) | |
{ | |
// Cast to integer and align: | |
const std::uintptr_t uint_ptr = reinterpret_cast<std::uintptr_t>(ptr); | |
const std::uintptr_t aligned_ptr = (uint_ptr + (alignment - 1)) & ~(alignment - 1); | |
// Re-cast to void*, validate and return: | |
T * user_ptr = reinterpret_cast<T *>(aligned_ptr); | |
assert(is_aligned_ptr(user_ptr, alignment)); | |
return user_ptr; | |
} | |
// ======================================================== | |
struct allocator_out_of_mem_abort | |
{ | |
void operator()(const char * user_msg, std::size_t num_bytes, std::size_t alignment) const | |
{ | |
std::fprintf(stderr, "[%s]: Failed to allocate %zu bytes with %zub alignment!\n", | |
user_msg, num_bytes, alignment); | |
std::abort(); | |
} | |
}; | |
// ======================================================== | |
template<typename OutOfMemHandler = allocator_out_of_mem_abort> | |
class linear_allocator final | |
{ | |
public: | |
// Not copyable. | |
linear_allocator(const linear_allocator &) = delete; | |
linear_allocator & operator = (const linear_allocator &) = delete; | |
// Allocated the stack memory block. | |
explicit linear_allocator(std::uint32_t max_stack_size_bytes) | |
: m_stack{ new std::uint8_t[max_stack_size_bytes] } | |
, m_size{ max_stack_size_bytes } | |
, m_top{ 0 } | |
{ | |
// Should be on any decent architecture. | |
assert(m_top.is_lock_free()); | |
} | |
// Allocates a chunk of bytes from the linear stack, bumping the stack top counter | |
// and calling the user-defined error handler if out-of-memory (stack overflowed). | |
std::uint8_t * allocate_bytes(std::size_t num_bytes, std::size_t alignment = 16) | |
{ | |
assert(num_bytes != 0); | |
assert(alignment != 0); | |
assert(is_power_of_two(alignment)); | |
num_bytes = align_size(num_bytes, alignment); | |
assert(num_bytes <= UINT32_MAX); | |
const std::uint32_t old_top = m_top.fetch_add(static_cast<std::uint32_t>(num_bytes)); | |
if ((old_top + num_bytes) > m_size) | |
{ | |
OutOfMemHandler out_of_mem{}; | |
out_of_mem("linear_allocator", num_bytes, alignment); | |
return nullptr; | |
} | |
return align_ptr(&m_stack[old_top], alignment); | |
} | |
// Allocate with cast to user-defined type T. | |
// The returned element is *not* constructed, so the caller is | |
// responsible for calling placement new on the pointer to run | |
// class constructors. | |
template<typename T> | |
T * allocate(std::size_t count, std::size_t alignment = 16) | |
{ | |
return reinterpret_cast<T *>(allocate_bytes(count * sizeof(T), alignment)); | |
} | |
// Does nothing - only provided for consistency. | |
// The only way to deallocate memory is by rolling back the stack top counter. | |
void free(const void *) { } | |
// Rests the stack top counter to a given position (zero by default), | |
// effectively marking any memory beyond that position as free for | |
// new allocations. | |
void reset(std::uint32_t position = 0) | |
{ | |
m_top = position; | |
} | |
// Accessors: | |
std::uint8_t * get_base() const { return m_stack; } | |
std::uint32_t get_size() const { return m_size; } | |
std::uint32_t get_top() const { return m_top; } | |
private: | |
// The linear_allocator is a simple block of memory where | |
// allocations always move forward in the block - i.e.: it | |
// is a stack-like allocator. Total size of the stack mem | |
// block never changes after initialization. | |
std::unique_ptr<std::uint8_t[]> m_stack; | |
const std::uint32_t m_size; | |
// The stack top counter is atomic, so it is safe to allocate | |
// from the same linear_allocator across different threads. | |
std::atomic<std::uint32_t> m_top; | |
}; | |
// ======================================================== | |
template<typename T> | |
T * construct(T * obj, const T & other) // Copy constructor | |
{ | |
return ::new(obj) T(other); | |
} | |
template<typename T, typename... Args> | |
T * construct(T * obj, Args&&... args) // Parameterized or default constructor | |
{ | |
return ::new(obj) T(std::forward<Args>(args)...); | |
} | |
template<typename T> | |
void destroy(T * obj) | |
{ | |
if (obj != nullptr) | |
{ | |
obj->~T(); | |
} | |
} | |
// ======================================================== | |
void thread_func(std::uint8_t thread_idx, std::uint64_t * out, linear_allocator<> * shared_allocator) | |
{ | |
const int MaxThreadAlloc = 2048; | |
const int N = std::max(1, std::rand() % 128); | |
std::printf("Thread %u running %i iterations...\n", static_cast<unsigned>(thread_idx), N); | |
std::vector<std::pair<std::uint8_t *, int>> blocks; | |
blocks.reserve(N); | |
// Do a bunch of small/medium allocation every other iteration of the loop: | |
for (int i = 0; i < N; ++i) | |
{ | |
if (i & 1) | |
{ | |
const int size = std::max(1, std::rand() % MaxThreadAlloc); | |
std::uint8_t * ptr = shared_allocator->allocate_bytes(size); | |
std::memset(ptr, thread_idx, size); | |
blocks.emplace_back(ptr, size); | |
} | |
} | |
// Makes sure we have a visible side-effect so the compiler | |
// doesn't strip out the whole loop; this value is ignored. | |
std::uint64_t sum = 0; | |
// Assert that our memory was not overwritten by other threads: | |
for (const auto & block_and_size : blocks) | |
{ | |
const std::uint8_t * ptr = block_and_size.first; | |
const int size = block_and_size.second; | |
for (int i = 0; i < size; ++i) | |
{ | |
assert(ptr[i] == thread_idx); | |
sum += ptr[i]; | |
} | |
} | |
(*out) += sum; | |
} | |
// ======================================================== | |
int main() | |
{ | |
linear_allocator<> shared_allocator{ 2048 * 1024 }; // 2MB | |
std::uint64_t ignored = 0; | |
std::vector<std::thread> threads; | |
// 10 runs with 10 thread each | |
for (int i = 0; i < 10; ++i) | |
{ | |
for (int t = 0; t < 10; ++t) | |
{ | |
threads.emplace_back(thread_func, static_cast<std::uint8_t>(t), &ignored, &shared_allocator); | |
} | |
for (auto & t : threads) | |
{ | |
t.join(); | |
} | |
threads.clear(); | |
std::printf("Mem used: %ub\n", shared_allocator.get_top()); | |
shared_allocator.reset(); | |
} | |
return static_cast<int>(ignored); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment