Last active
April 26, 2021 07:06
-
-
Save martinus/a5d1c5114cfd1930eec9b620a7803f7a to your computer and use it in GitHub Desktop.
stable_vector prototype, see https://www.reddit.com/r/cpp/comments/mwi9dk/new_paper_random_access_of_elements_in_data/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstddef> | |
#include <cstdint> // UINTPTR_MAX | |
#include <cstring> // memcpy | |
#include <memory> // allocator | |
#include <type_traits> | |
#include <utility> | |
#if _MSC_VER | |
# include <intrin.h> // required for CLZ builtin | |
#endif | |
namespace ankerl { | |
// This random-access container behaves very similar to std::vector, except it never invalidates references and never | |
// reallocates memory. It, it doubles its memory when it gets full. Random access is O(1). It implements the bit trick | |
// described in [1] (see discussion in [2]) It also seems to have similar properties as stationary_vector [3] and is highly | |
// similar to cphstl::pile [4]. | |
// | |
// WARNING: This is just a prototype that is not (yet) well tested. | |
// | |
// Features are: | |
// | |
// * Small-Vector-Optimization: First bucket is used for SVO, no allocations are necessary. | |
// * Fast O(1) random access. In my benchmark access is ~67% slower than for a std::vector. | |
// * No memory relocations. References will always stay valid through the lifetime of the container. | |
// | |
// | |
// Benchmarks | |
// | |
// * std::sort, runtime in ns per n*log2(n) | |
// | |
// n | vector | stable_vector | how much slower? | |
// -----+--------+---------------+------------------ | |
// 2^10 | 4.02 | 7.03 | 75% slower | |
// 2^15 | 4.02 | 6.97 | 73% slower | |
// 2^20 | 3.98 | 6.71 | 69% slower | |
// 2^25 | 3.98 | 6.65 | 67% slower | |
// | |
// * std::stable_sort, runtime in ns per n*log2(n) | |
// | |
// n | vector | stable_vector | how much slower? | |
// -----+--------+---------------+------------------ | |
// 2^10 | 4.58 | 7.77 | 70% slower | |
// 2^15 | 4.54 | 7.40 | 63% slower | |
// 2^20 | 4.67 | 7.46 | 60% slower | |
// 2^25 | 4.83 | 7.52 | 56% slower | |
// | |
// On average stable_vector is 67% slower. That's much better than pile from [4], which averages 162% slower. | |
// | |
// References | |
// [1] https://plflib.org/matt_bentley_-_pot_blocks_bit_tricks.pdf | |
// [2] https://www.reddit.com/r/cpp/comments/mwi9dk/new_paper_random_access_of_elements_in_data/ | |
// [3] https://github.com/mehrdadn/libvital/blob/master/doc/vital/container/stationary_vector.md | |
// [4] Worst-Case-Efficient Dynamic Arrays in Practice http://www.cphstl.dk/Paper/Dynamic-arrays/sea16.pdf | |
// [5] Experiences with the design and implementationof space-efficient deques | |
// http://www.cphstl.dk/Report/Deque/cphstl-report-2001-7.pdf | |
template <typename T, class Allocator = std::allocator<T>, std::size_t StartSize = 128 / sizeof(T) < 8U ? 8U : 64 / sizeof(T)> | |
class stable_vector : private Allocator { | |
// constexpr version to find the index of the most significant bit. E.g. for 33 (100001b) this returns 5. | |
static constexpr auto most_significant_active_bit_cx(std::size_t num) noexcept -> std::size_t { | |
auto result = std::size_t(); | |
while ((num >>= 1U) != 0U) { | |
++result; | |
} | |
return result; | |
} | |
public: | |
using value_type = T; | |
template <bool is_const> | |
class iter { | |
using container_t = typename std::conditional_t<is_const, stable_vector const, stable_vector>; | |
container_t* m_sv{}; | |
std::size_t m_idx{}; | |
public: | |
using iterator_category = std::random_access_iterator_tag; | |
using value_type = T; | |
using difference_type = std::ptrdiff_t; | |
using pointer = std::conditional_t<is_const, T const*, T*>; | |
using reference = std::conditional_t<is_const, T const&, T&>; | |
iter() = default; | |
iter(container_t* sv, std::size_t idx) noexcept | |
: m_sv(sv) | |
, m_idx(idx) {} | |
auto operator++() noexcept -> iter& { | |
++m_idx; | |
return *this; | |
} | |
auto operator--() noexcept -> iter& { | |
--m_idx; | |
return *this; | |
} | |
auto operator++(int) noexcept -> iter { | |
return iter(m_idx++); | |
} | |
auto operator--(int) noexcept -> iter { | |
return iter(m_idx--); | |
} | |
auto operator-(iter const& other) const noexcept -> difference_type { | |
return static_cast<difference_type>(m_idx - other.m_idx); | |
} | |
auto operator-(difference_type d) const noexcept -> iter { | |
return iter(m_sv, m_idx - d); | |
} | |
auto operator+(difference_type d) const noexcept -> iter { | |
return iter(m_sv, m_idx + d); | |
} | |
auto operator+=(difference_type d) noexcept -> iter& { | |
m_idx += d; | |
return *this; | |
} | |
auto operator-=(difference_type d) noexcept -> iter& { | |
m_idx -= d; | |
return *this; | |
} | |
auto operator*() const noexcept -> reference { | |
return (*m_sv)[m_idx]; | |
} | |
auto operator->() const noexcept -> pointer { | |
return &(*m_sv)[m_idx]; | |
} | |
template <bool O> | |
auto operator==(iter<O> const& other) const noexcept -> bool { | |
return m_idx == other.m_idx; | |
} | |
template <bool O> | |
auto operator!=(iter<O> const& other) const noexcept -> bool { | |
return m_idx != other.m_idx; | |
} | |
template <bool O> | |
auto operator<(iter<O> const& other) const noexcept -> bool { | |
return m_idx < other.m_idx; | |
} | |
template <bool O> | |
auto operator>(iter<O> const& other) const noexcept -> bool { | |
return m_idx > other.m_idx; | |
} | |
template <bool O> | |
auto operator<=(iter<O> const& other) const noexcept -> bool { | |
return m_idx <= other.m_idx; | |
} | |
template <bool O> | |
auto operator>=(iter<O> const& other) const noexcept -> bool { | |
return m_idx >= other.m_idx; | |
} | |
}; | |
using iterator = iter<false>; | |
using const_iterator = iter<true>; | |
auto begin() -> iterator { | |
return iterator(this, 0U); | |
} | |
auto end() -> iterator { | |
return iterator(this, m_size); | |
} | |
auto begin() const -> const_iterator { | |
return cbegin(); | |
} | |
auto end() const -> const_iterator { | |
return cend(); | |
} | |
auto cbegin() const -> const_iterator { | |
return const_iterator(this, 0U); | |
} | |
auto cend() const -> const_iterator { | |
return const_iterator(this, m_size); | |
} | |
explicit stable_vector() = default; | |
stable_vector(stable_vector const& other) { | |
copy(other); | |
} | |
auto operator=(stable_vector const& other) -> stable_vector& { | |
if (this != &other) { | |
copy(other); | |
} | |
return *this; | |
} | |
stable_vector(stable_vector&& other) noexcept { | |
move_into_uninitialized(other); | |
} | |
auto operator=(stable_vector&& other) noexcept -> stable_vector& { | |
clear(); | |
deallocate_uninitialized_memory(); | |
move_into_uninitialized(std::move(other)); | |
return *this; | |
} | |
[[nodiscard]] auto size() const -> std::size_t { | |
return m_size; | |
} | |
template <typename... Args> | |
auto emplace_back(Args&&... args) -> T& { | |
if (m_size == m_capacity) { | |
grow(); | |
} | |
auto* location = &operator[](m_size); | |
auto* ret = ::new (static_cast<void*>(location)) T(std::forward<Args>(args)...); | |
++m_size; | |
return *ret; | |
} | |
void push_back(T const& obj) { | |
emplace_back(obj); | |
} | |
void push_back(T&& obj) { | |
emplace_back(std::move(obj)); | |
} | |
void pop_back() noexcept { | |
if constexpr (std::is_trivially_destructible_v<T>) { | |
--m_size; | |
} else { | |
--m_size; | |
auto* ptr = &operator[](m_size); | |
ptr->~T(); | |
} | |
} | |
void clear() noexcept { | |
if constexpr (std::is_trivially_destructible_v<T>) { | |
m_size = 0; | |
} else { | |
while (!empty()) { | |
pop_back(); | |
} | |
} | |
} | |
constexpr auto empty() noexcept -> bool { | |
return m_size == 0U; | |
} | |
// correctly access m_svo's memory through std::launder | |
constexpr auto svo() noexcept -> T* { | |
return std::launder(reinterpret_cast<T*>(m_svo)); | |
} | |
constexpr auto svo() const noexcept -> T const* { | |
return std::launder(reinterpret_cast<T const*>(m_svo)); | |
} | |
auto front() const noexcept -> T const& { | |
return svo()[0]; | |
} | |
auto front() noexcept -> T& { | |
return svo()[0]; | |
} | |
auto back() const noexcept -> T const& { | |
return operator[](m_size - 1); | |
} | |
auto back() noexcept -> T& { | |
return operator[](m_size - 1); | |
} | |
constexpr auto operator[](std::size_t i) const noexcept -> T const& { | |
if (i < StartSize) { | |
return svo()[i]; | |
} | |
auto msab = most_significant_active_bit(i); | |
// block index is number of bits in i, minus number of bits already covered by the first block (the svo). | |
auto* block = m_blocks[msab - num_bits_svo]; | |
// clear the highest bit to get to the index within the block | |
return block[i ^ (1U << msab)]; | |
} | |
auto operator[](std::size_t i) noexcept -> T& { | |
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) | |
return const_cast<T&>(std::as_const(*this)[i]); | |
} | |
~stable_vector() noexcept { | |
clear(); | |
deallocate_uninitialized_memory(); | |
} | |
[[nodiscard]] auto capacity() const noexcept -> std::size_t { | |
return m_capacity; | |
} | |
private: | |
// Uses intrinsics to find the most significant bit. E.g. for 33 (100001b) this returns 5. | |
// On Windows this can't be constexpr, so we have a separate function for that. | |
static inline auto most_significant_active_bit(std::size_t num) noexcept -> std::size_t { | |
#ifdef _MSC_VER | |
# if UINTPTR_MAX == UINT32_MAX | |
return 31U - __lzcnt(num); | |
# else | |
return 63U - __lzcnt64(num); | |
# endif | |
#else | |
# if UINTPTR_MAX == UINT32_MAX | |
return 31U - __builtin_clz(num); | |
# else | |
return 63U - __builtin_clzll(num); | |
# endif | |
#endif | |
} | |
// allocates a new block, doubling the container's capacity. Returns index to the newly allocated block | |
void grow() { | |
if (m_blocks == nullptr) { | |
m_blocks = ::new T*[max_num_blocks]; | |
} | |
// we always double the capacity | |
m_blocks[calc_block_idx(m_capacity)] = Allocator::allocate(m_capacity); | |
m_capacity *= 2; | |
} | |
void reserve(std::size_t s) { | |
while (m_capacity < s) { | |
grow(); | |
} | |
} | |
void deallocate_uninitialized_memory() noexcept { | |
clear(); | |
if (m_blocks != nullptr) { | |
auto num_blocks = calc_block_idx(m_capacity); | |
auto block_size = StartSize; | |
for (std::size_t i = 0; i < num_blocks; ++i) { | |
Allocator::deallocate(m_blocks[i], block_size); | |
block_size *= 2; | |
} | |
delete[] m_blocks; | |
m_blocks = nullptr; | |
} | |
} | |
void copy(stable_vector const& other) { | |
clear(); | |
reserve(other.size()); | |
auto current_block_idx = std::size_t(-1); | |
auto block_size = StartSize; | |
auto num_left_to_copy = other.size(); | |
while (num_left_to_copy != 0) { | |
auto num_copy = num_left_to_copy < block_size ? num_left_to_copy : block_size; | |
if (current_block_idx == std::size_t(-1)) { | |
std::uninitialized_copy_n(other.svo(), num_copy, svo()); | |
} else { | |
std::uninitialized_copy_n(other.m_blocks[current_block_idx], num_copy, m_blocks[current_block_idx]); | |
} | |
num_left_to_copy -= num_copy; | |
++current_block_idx; | |
block_size = StartSize << current_block_idx; | |
} | |
m_size = other.size(); | |
} | |
void move_into_uninitialized(stable_vector&& other) { | |
m_size = std::exchange(other.m_size, 0U); | |
m_capacity = std::exchange(other.m_capacity, StartSize); | |
m_blocks = std::exchange(other.m_blocks, nullptr); | |
// only the SVO (or parts of it) needs to be copied | |
auto num_copy = m_size < StartSize ? m_size : StartSize; | |
if (num_copy > 0) { | |
std::uninitialized_move_n(other.svo(), num_copy, svo()); | |
} | |
} | |
static auto calc_block_idx(std::size_t i) noexcept -> std::size_t { | |
return most_significant_active_bit(i) - num_bits_svo; | |
} | |
static constexpr auto num_bits_svo = most_significant_active_bit_cx(StartSize); | |
static constexpr auto max_num_blocks = sizeof(std::size_t) * 8U - most_significant_active_bit_cx(StartSize); | |
std::size_t m_size{}; // current number of elements | |
std::size_t m_capacity{StartSize}; // currently allocated capacity | |
T** m_blocks{}; // array of all the blocks | |
// uninitialized memory for small-vector optimization (SVO) | |
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) | |
typename std::aligned_storage<sizeof(T), alignof(T)>::type m_svo[StartSize]; | |
}; | |
} // namespace ankerl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment