martinus/stable_vector.h

## stable_vector.h
#include <cstddef>
#include <cstdint> // UINTPTR_MAX
#include <cstring> // memcpy
#include <memory>  // allocator
#include <type_traits>
#include <utility>

#if _MSC_VER
#    include <intrin.h> // required for CLZ builtin
#endif

namespace ankerl {

// This random-access container behaves very similar to std::vector, except it never invalidates references and never
// reallocates memory. It, it doubles its memory when it gets full. Random access is O(1). It implements the bit trick
// described in [1] (see discussion in [2]) It also seems to have similar properties as stationary_vector [3] and is highly
// similar to cphstl::pile [4].
//
// WARNING: This is just a prototype that is not (yet) well tested.
//
// Features are:
//
// * Small-Vector-Optimization: First bucket is used for SVO, no allocations are necessary.
// * Fast O(1) random access. In my benchmark access is ~67% slower than for a std::vector.
// * No memory relocations. References will always stay valid through the lifetime of the container.
//
//
// Benchmarks
//
// * std::sort, runtime in ns per n*log2(n)
//
//       n  | vector | stable_vector | how much slower?
//     -----+--------+---------------+------------------
//     2^10 |   4.02 |     7.03      | 75% slower
//     2^15 |   4.02 |     6.97      | 73% slower
//     2^20 |   3.98 |     6.71      | 69% slower
//     2^25 |   3.98 |     6.65      | 67% slower
//
// * std::stable_sort, runtime in ns per n*log2(n)
//
//       n  | vector | stable_vector | how much slower?
//     -----+--------+---------------+------------------
//     2^10 |   4.58 |     7.77      | 70% slower
//     2^15 |   4.54 |     7.40      | 63% slower
//     2^20 |   4.67 |     7.46      | 60% slower
//     2^25 |   4.83 |     7.52      | 56% slower
//
// On average stable_vector is 67% slower. That's much better than pile from [4], which averages 162% slower.
//
// References
// [1] https://plflib.org/matt_bentley_-_pot_blocks_bit_tricks.pdf
// [2] https://www.reddit.com/r/cpp/comments/mwi9dk/new_paper_random_access_of_elements_in_data/
// [3] https://github.com/mehrdadn/libvital/blob/master/doc/vital/container/stationary_vector.md
// [4] Worst-Case-Efficient Dynamic Arrays in Practice http://www.cphstl.dk/Paper/Dynamic-arrays/sea16.pdf
// [5] Experiences with the design and implementationof space-efficient deques
//     http://www.cphstl.dk/Report/Deque/cphstl-report-2001-7.pdf
template <typename T, class Allocator = std::allocator<T>, std::size_t StartSize = 128 / sizeof(T) < 8U ? 8U : 64 / sizeof(T)>
class stable_vector : private Allocator {
    // constexpr version to find the index of the most significant bit. E.g. for 33 (100001b) this returns 5.
    static constexpr auto most_significant_active_bit_cx(std::size_t num) noexcept -> std::size_t {
        auto result = std::size_t();
        while ((num >>= 1U) != 0U) {
            ++result;
        }
        return result;
    }

public:
    using value_type = T;

    template <bool is_const>
    class iter {
        using container_t = typename std::conditional_t<is_const, stable_vector const, stable_vector>;
        container_t* m_sv{};
        std::size_t m_idx{};

    public:
        using iterator_category = std::random_access_iterator_tag;
        using value_type = T;
        using difference_type = std::ptrdiff_t;
        using pointer = std::conditional_t<is_const, T const*, T*>;
        using reference = std::conditional_t<is_const, T const&, T&>;

        iter() = default;

        iter(container_t* sv, std::size_t idx) noexcept
            : m_sv(sv)
            , m_idx(idx) {}

        auto operator++() noexcept -> iter& {
            ++m_idx;
            return *this;
        }

        auto operator--() noexcept -> iter& {
            --m_idx;
            return *this;
        }

        auto operator++(int) noexcept -> iter {
            return iter(m_idx++);
        }

        auto operator--(int) noexcept -> iter {
            return iter(m_idx--);
        }

        auto operator-(iter const& other) const noexcept -> difference_type {
            return static_cast<difference_type>(m_idx - other.m_idx);
        }

        auto operator-(difference_type d) const noexcept -> iter {
            return iter(m_sv, m_idx - d);
        }

        auto operator+(difference_type d) const noexcept -> iter {
            return iter(m_sv, m_idx + d);
        }

        auto operator+=(difference_type d) noexcept -> iter& {
            m_idx += d;
            return *this;
        }

        auto operator-=(difference_type d) noexcept -> iter& {
            m_idx -= d;
            return *this;
        }

        auto operator*() const noexcept -> reference {
            return (*m_sv)[m_idx];
        }

        auto operator->() const noexcept -> pointer {
            return &(*m_sv)[m_idx];
        }

        template <bool O>
        auto operator==(iter<O> const& other) const noexcept -> bool {
            return m_idx == other.m_idx;
        }

        template <bool O>
        auto operator!=(iter<O> const& other) const noexcept -> bool {
            return m_idx != other.m_idx;
        }

        template <bool O>
        auto operator<(iter<O> const& other) const noexcept -> bool {
            return m_idx < other.m_idx;
        }

        template <bool O>
        auto operator>(iter<O> const& other) const noexcept -> bool {
            return m_idx > other.m_idx;
        }

        template <bool O>
        auto operator<=(iter<O> const& other) const noexcept -> bool {
            return m_idx <= other.m_idx;
        }

        template <bool O>
        auto operator>=(iter<O> const& other) const noexcept -> bool {
            return m_idx >= other.m_idx;
        }
    };

    using iterator = iter<false>;
    using const_iterator = iter<true>;

    auto begin() -> iterator {
        return iterator(this, 0U);
    }

    auto end() -> iterator {
        return iterator(this, m_size);
    }

    auto begin() const -> const_iterator {
        return cbegin();
    }

    auto end() const -> const_iterator {
        return cend();
    }

    auto cbegin() const -> const_iterator {
        return const_iterator(this, 0U);
    }

    auto cend() const -> const_iterator {
        return const_iterator(this, m_size);
    }

    explicit stable_vector() = default;

    stable_vector(stable_vector const& other) {
        copy(other);
    }

    auto operator=(stable_vector const& other) -> stable_vector& {
        if (this != &other) {
            copy(other);
        }
        return *this;
    }

    stable_vector(stable_vector&& other) noexcept {
        move_into_uninitialized(other);
    }

    auto operator=(stable_vector&& other) noexcept -> stable_vector& {
        clear();
        deallocate_uninitialized_memory();
        move_into_uninitialized(std::move(other));
        return *this;
    }

    [[nodiscard]] auto size() const -> std::size_t {
        return m_size;
    }

    template <typename... Args>
    auto emplace_back(Args&&... args) -> T& {
        if (m_size == m_capacity) {
            grow();
        }
        auto* location = &operator[](m_size);
        auto* ret = ::new (static_cast<void*>(location)) T(std::forward<Args>(args)...);
        ++m_size;
        return *ret;
    }

    void push_back(T const& obj) {
        emplace_back(obj);
    }

    void push_back(T&& obj) {
        emplace_back(std::move(obj));
    }

    void pop_back() noexcept {
        if constexpr (std::is_trivially_destructible_v<T>) {
            --m_size;
        } else {
            --m_size;
            auto* ptr = &operator[](m_size);
            ptr->~T();
        }
    }

    void clear() noexcept {
        if constexpr (std::is_trivially_destructible_v<T>) {
            m_size = 0;
        } else {
            while (!empty()) {
                pop_back();
            }
        }
    }

    constexpr auto empty() noexcept -> bool {
        return m_size == 0U;
    }

    // correctly access m_svo's memory through std::launder
    constexpr auto svo() noexcept -> T* {
        return std::launder(reinterpret_cast<T*>(m_svo));
    }
    constexpr auto svo() const noexcept -> T const* {
        return std::launder(reinterpret_cast<T const*>(m_svo));
    }

    auto front() const noexcept -> T const& {
        return svo()[0];
    }
    auto front() noexcept -> T& {
        return svo()[0];
    }

    auto back() const noexcept -> T const& {
        return operator[](m_size - 1);
    }
    auto back() noexcept -> T& {
        return operator[](m_size - 1);
    }

    constexpr auto operator[](std::size_t i) const noexcept -> T const& {
        if (i < StartSize) {
            return svo()[i];
        }

        auto msab = most_significant_active_bit(i);

        // block index is number of bits in i, minus number of bits already covered by the first block (the svo).
        auto* block = m_blocks[msab - num_bits_svo];

        // clear the highest bit to get to the index within the block
        return block[i ^ (1U << msab)];
    }

    auto operator[](std::size_t i) noexcept -> T& {
        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
        return const_cast<T&>(std::as_const(*this)[i]);
    }

    ~stable_vector() noexcept {
        clear();
        deallocate_uninitialized_memory();
    }

    [[nodiscard]] auto capacity() const noexcept -> std::size_t {
        return m_capacity;
    }

private:
    // Uses intrinsics to find the most significant bit. E.g. for 33 (100001b) this returns 5.
    // On Windows this can't be constexpr, so we have a separate function for that.
    static inline auto most_significant_active_bit(std::size_t num) noexcept -> std::size_t {
#ifdef _MSC_VER

#    if UINTPTR_MAX == UINT32_MAX
        return 31U - __lzcnt(num);
#    else
        return 63U - __lzcnt64(num);
#    endif

#else

#    if UINTPTR_MAX == UINT32_MAX
        return 31U - __builtin_clz(num);
#    else
        return 63U - __builtin_clzll(num);
#    endif

#endif
    }

    // allocates a new block, doubling the container's capacity. Returns index to the newly allocated block
    void grow() {
        if (m_blocks == nullptr) {
            m_blocks = ::new T*[max_num_blocks];
        }
        // we always double the capacity
        m_blocks[calc_block_idx(m_capacity)] = Allocator::allocate(m_capacity);
        m_capacity *= 2;
    }

    void reserve(std::size_t s) {
        while (m_capacity < s) {
            grow();
        }
    }

    void deallocate_uninitialized_memory() noexcept {
        clear();
        if (m_blocks != nullptr) {
            auto num_blocks = calc_block_idx(m_capacity);
            auto block_size = StartSize;
            for (std::size_t i = 0; i < num_blocks; ++i) {
                Allocator::deallocate(m_blocks[i], block_size);
                block_size *= 2;
            }
            delete[] m_blocks;
            m_blocks = nullptr;
        }
    }

    void copy(stable_vector const& other) {
        clear();
        reserve(other.size());

        auto current_block_idx = std::size_t(-1);
        auto block_size = StartSize;
        auto num_left_to_copy = other.size();

        while (num_left_to_copy != 0) {
            auto num_copy = num_left_to_copy < block_size ? num_left_to_copy : block_size;

            if (current_block_idx == std::size_t(-1)) {
                std::uninitialized_copy_n(other.svo(), num_copy, svo());
            } else {
                std::uninitialized_copy_n(other.m_blocks[current_block_idx], num_copy, m_blocks[current_block_idx]);
            }

            num_left_to_copy -= num_copy;
            ++current_block_idx;
            block_size = StartSize << current_block_idx;
        }

        m_size = other.size();
    }

    void move_into_uninitialized(stable_vector&& other) {
        m_size = std::exchange(other.m_size, 0U);
        m_capacity = std::exchange(other.m_capacity, StartSize);
        m_blocks = std::exchange(other.m_blocks, nullptr);

        // only the SVO (or parts of it) needs to be copied
        auto num_copy = m_size < StartSize ? m_size : StartSize;
        if (num_copy > 0) {
            std::uninitialized_move_n(other.svo(), num_copy, svo());
        }
    }

    static auto calc_block_idx(std::size_t i) noexcept -> std::size_t {
        return most_significant_active_bit(i) - num_bits_svo;
    }

    static constexpr auto num_bits_svo = most_significant_active_bit_cx(StartSize);
    static constexpr auto max_num_blocks = sizeof(std::size_t) * 8U - most_significant_active_bit_cx(StartSize);

    std::size_t m_size{};              // current number of elements
    std::size_t m_capacity{StartSize}; // currently allocated capacity
    T** m_blocks{};                    // array of all the blocks

    // uninitialized memory for small-vector optimization (SVO)
    // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
    typename std::aligned_storage<sizeof(T), alignof(T)>::type m_svo[StartSize];
};

} // namespace ankerl
	#include <cstddef>
	#include <cstdint> // UINTPTR_MAX
	#include <cstring> // memcpy
	#include <memory> // allocator
	#include <type_traits>
	#include <utility>

	#if _MSC_VER
	# include <intrin.h> // required for CLZ builtin
	#endif

	namespace ankerl {

	// This random-access container behaves very similar to std::vector, except it never invalidates references and never
	// reallocates memory. It, it doubles its memory when it gets full. Random access is O(1). It implements the bit trick
	// described in [1] (see discussion in [2]) It also seems to have similar properties as stationary_vector [3] and is highly
	// similar to cphstl::pile [4].
	//
	// WARNING: This is just a prototype that is not (yet) well tested.
	//
	// Features are:
	//
	// * Small-Vector-Optimization: First bucket is used for SVO, no allocations are necessary.
	// * Fast O(1) random access. In my benchmark access is ~67% slower than for a std::vector.
	// * No memory relocations. References will always stay valid through the lifetime of the container.
	//
	//
	// Benchmarks
	//
	// * std::sort, runtime in ns per n*log2(n)
	//
	// n \| vector \| stable_vector \| how much slower?
	// -----+--------+---------------+------------------
	// 2^10 \| 4.02 \| 7.03 \| 75% slower
	// 2^15 \| 4.02 \| 6.97 \| 73% slower
	// 2^20 \| 3.98 \| 6.71 \| 69% slower
	// 2^25 \| 3.98 \| 6.65 \| 67% slower
	//
	// * std::stable_sort, runtime in ns per n*log2(n)
	//
	// n \| vector \| stable_vector \| how much slower?
	// -----+--------+---------------+------------------
	// 2^10 \| 4.58 \| 7.77 \| 70% slower
	// 2^15 \| 4.54 \| 7.40 \| 63% slower
	// 2^20 \| 4.67 \| 7.46 \| 60% slower
	// 2^25 \| 4.83 \| 7.52 \| 56% slower
	//
	// On average stable_vector is 67% slower. That's much better than pile from [4], which averages 162% slower.
	//
	// References
	// [1] https://plflib.org/matt_bentley_-_pot_blocks_bit_tricks.pdf
	// [2] https://www.reddit.com/r/cpp/comments/mwi9dk/new_paper_random_access_of_elements_in_data/
	// [3] https://github.com/mehrdadn/libvital/blob/master/doc/vital/container/stationary_vector.md
	// [4] Worst-Case-Efficient Dynamic Arrays in Practice http://www.cphstl.dk/Paper/Dynamic-arrays/sea16.pdf
	// [5] Experiences with the design and implementationof space-efficient deques
	// http://www.cphstl.dk/Report/Deque/cphstl-report-2001-7.pdf
	template <typename T, class Allocator = std::allocator<T>, std::size_t StartSize = 128 / sizeof(T) < 8U ? 8U : 64 / sizeof(T)>
	class stable_vector : private Allocator {
	// constexpr version to find the index of the most significant bit. E.g. for 33 (100001b) this returns 5.
	static constexpr auto most_significant_active_bit_cx(std::size_t num) noexcept -> std::size_t {
	auto result = std::size_t();
	while ((num >>= 1U) != 0U) {
	++result;
	}
	return result;
	}

	public:
	using value_type = T;

	template <bool is_const>
	class iter {
	using container_t = typename std::conditional_t<is_const, stable_vector const, stable_vector>;
	container_t* m_sv{};
	std::size_t m_idx{};

	public:
	using iterator_category = std::random_access_iterator_tag;
	using value_type = T;
	using difference_type = std::ptrdiff_t;
	using pointer = std::conditional_t<is_const, T const, T>;
	using reference = std::conditional_t<is_const, T const&, T&>;

	iter() = default;

	iter(container_t* sv, std::size_t idx) noexcept
	: m_sv(sv)
	, m_idx(idx) {}

	auto operator++() noexcept -> iter& {
	++m_idx;
	return *this;
	}

	auto operator--() noexcept -> iter& {
	--m_idx;
	return *this;
	}

	auto operator++(int) noexcept -> iter {
	return iter(m_idx++);
	}

	auto operator--(int) noexcept -> iter {
	return iter(m_idx--);
	}

	auto operator-(iter const& other) const noexcept -> difference_type {
	return static_cast<difference_type>(m_idx - other.m_idx);
	}

	auto operator-(difference_type d) const noexcept -> iter {
	return iter(m_sv, m_idx - d);
	}

	auto operator+(difference_type d) const noexcept -> iter {
	return iter(m_sv, m_idx + d);
	}

	auto operator+=(difference_type d) noexcept -> iter& {
	m_idx += d;
	return *this;
	}

	auto operator-=(difference_type d) noexcept -> iter& {
	m_idx -= d;
	return *this;
	}

	auto operator*() const noexcept -> reference {
	return (*m_sv)[m_idx];
	}

	auto operator->() const noexcept -> pointer {
	return &(*m_sv)[m_idx];
	}

	template <bool O>
	auto operator==(iter<O> const& other) const noexcept -> bool {
	return m_idx == other.m_idx;
	}

	template <bool O>
	auto operator!=(iter<O> const& other) const noexcept -> bool {
	return m_idx != other.m_idx;
	}

	template <bool O>
	auto operator<(iter<O> const& other) const noexcept -> bool {
	return m_idx < other.m_idx;
	}

	template <bool O>
	auto operator>(iter<O> const& other) const noexcept -> bool {
	return m_idx > other.m_idx;
	}

	template <bool O>
	auto operator<=(iter<O> const& other) const noexcept -> bool {
	return m_idx <= other.m_idx;
	}

	template <bool O>
	auto operator>=(iter<O> const& other) const noexcept -> bool {
	return m_idx >= other.m_idx;
	}
	};

	using iterator = iter<false>;
	using const_iterator = iter<true>;

	auto begin() -> iterator {
	return iterator(this, 0U);
	}

	auto end() -> iterator {
	return iterator(this, m_size);
	}

	auto begin() const -> const_iterator {
	return cbegin();
	}

	auto end() const -> const_iterator {
	return cend();
	}

	auto cbegin() const -> const_iterator {
	return const_iterator(this, 0U);
	}

	auto cend() const -> const_iterator {
	return const_iterator(this, m_size);
	}

	explicit stable_vector() = default;

	stable_vector(stable_vector const& other) {
	copy(other);
	}

	auto operator=(stable_vector const& other) -> stable_vector& {
	if (this != &other) {
	copy(other);
	}
	return *this;
	}

	stable_vector(stable_vector&& other) noexcept {
	move_into_uninitialized(other);
	}

	auto operator=(stable_vector&& other) noexcept -> stable_vector& {
	clear();
	deallocate_uninitialized_memory();
	move_into_uninitialized(std::move(other));
	return *this;
	}

	[[nodiscard]] auto size() const -> std::size_t {
	return m_size;
	}

	template <typename... Args>
	auto emplace_back(Args&&... args) -> T& {
	if (m_size == m_capacity) {
	grow();
	}
	auto* location = &operator[](m_size);
	auto* ret = ::new (static_cast<void*>(location)) T(std::forward<Args>(args)...);
	++m_size;
	return *ret;
	}

	void push_back(T const& obj) {
	emplace_back(obj);
	}

	void push_back(T&& obj) {
	emplace_back(std::move(obj));
	}

	void pop_back() noexcept {
	if constexpr (std::is_trivially_destructible_v<T>) {
	--m_size;
	} else {
	--m_size;
	auto* ptr = &operator[](m_size);
	ptr->~T();
	}
	}

	void clear() noexcept {
	if constexpr (std::is_trivially_destructible_v<T>) {
	m_size = 0;
	} else {
	while (!empty()) {
	pop_back();
	}
	}
	}

	constexpr auto empty() noexcept -> bool {
	return m_size == 0U;
	}

	// correctly access m_svo's memory through std::launder
	constexpr auto svo() noexcept -> T* {
	return std::launder(reinterpret_cast<T*>(m_svo));
	}
	constexpr auto svo() const noexcept -> T const* {
	return std::launder(reinterpret_cast<T const*>(m_svo));
	}

	auto front() const noexcept -> T const& {
	return svo()[0];
	}
	auto front() noexcept -> T& {
	return svo()[0];
	}

	auto back() const noexcept -> T const& {
	return operator[](m_size - 1);
	}
	auto back() noexcept -> T& {
	return operator[](m_size - 1);
	}

	constexpr auto operator[](std::size_t i) const noexcept -> T const& {
	if (i < StartSize) {
	return svo()[i];
	}

	auto msab = most_significant_active_bit(i);

	// block index is number of bits in i, minus number of bits already covered by the first block (the svo).
	auto* block = m_blocks[msab - num_bits_svo];

	// clear the highest bit to get to the index within the block
	return block[i ^ (1U << msab)];
	}

	auto operator[](std::size_t i) noexcept -> T& {
	// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
	return const_cast<T&>(std::as_const(*this)[i]);
	}

	~stable_vector() noexcept {
	clear();
	deallocate_uninitialized_memory();
	}

	[[nodiscard]] auto capacity() const noexcept -> std::size_t {
	return m_capacity;
	}

	private:
	// Uses intrinsics to find the most significant bit. E.g. for 33 (100001b) this returns 5.
	// On Windows this can't be constexpr, so we have a separate function for that.
	static inline auto most_significant_active_bit(std::size_t num) noexcept -> std::size_t {
	#ifdef _MSC_VER

	# if UINTPTR_MAX == UINT32_MAX
	return 31U - __lzcnt(num);
	# else
	return 63U - __lzcnt64(num);
	# endif

	#else

	# if UINTPTR_MAX == UINT32_MAX
	return 31U - __builtin_clz(num);
	# else
	return 63U - __builtin_clzll(num);
	# endif

	#endif
	}

	// allocates a new block, doubling the container's capacity. Returns index to the newly allocated block
	void grow() {
	if (m_blocks == nullptr) {
	m_blocks = ::new T*[max_num_blocks];
	}
	// we always double the capacity
	m_blocks[calc_block_idx(m_capacity)] = Allocator::allocate(m_capacity);
	m_capacity *= 2;
	}

	void reserve(std::size_t s) {
	while (m_capacity < s) {
	grow();
	}
	}

	void deallocate_uninitialized_memory() noexcept {
	clear();
	if (m_blocks != nullptr) {
	auto num_blocks = calc_block_idx(m_capacity);
	auto block_size = StartSize;
	for (std::size_t i = 0; i < num_blocks; ++i) {
	Allocator::deallocate(m_blocks[i], block_size);
	block_size *= 2;
	}
	delete[] m_blocks;
	m_blocks = nullptr;
	}
	}

	void copy(stable_vector const& other) {
	clear();
	reserve(other.size());

	auto current_block_idx = std::size_t(-1);
	auto block_size = StartSize;
	auto num_left_to_copy = other.size();

	while (num_left_to_copy != 0) {
	auto num_copy = num_left_to_copy < block_size ? num_left_to_copy : block_size;

	if (current_block_idx == std::size_t(-1)) {
	std::uninitialized_copy_n(other.svo(), num_copy, svo());
	} else {
	std::uninitialized_copy_n(other.m_blocks[current_block_idx], num_copy, m_blocks[current_block_idx]);
	}

	num_left_to_copy -= num_copy;
	++current_block_idx;
	block_size = StartSize << current_block_idx;
	}

	m_size = other.size();
	}

	void move_into_uninitialized(stable_vector&& other) {
	m_size = std::exchange(other.m_size, 0U);
	m_capacity = std::exchange(other.m_capacity, StartSize);
	m_blocks = std::exchange(other.m_blocks, nullptr);

	// only the SVO (or parts of it) needs to be copied
	auto num_copy = m_size < StartSize ? m_size : StartSize;
	if (num_copy > 0) {
	std::uninitialized_move_n(other.svo(), num_copy, svo());
	}
	}

	static auto calc_block_idx(std::size_t i) noexcept -> std::size_t {
	return most_significant_active_bit(i) - num_bits_svo;
	}

	static constexpr auto num_bits_svo = most_significant_active_bit_cx(StartSize);
	static constexpr auto max_num_blocks = sizeof(std::size_t) * 8U - most_significant_active_bit_cx(StartSize);

	std::size_t m_size{}; // current number of elements
	std::size_t m_capacity{StartSize}; // currently allocated capacity
	T** m_blocks{}; // array of all the blocks

	// uninitialized memory for small-vector optimization (SVO)
	// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
	typename std::aligned_storage<sizeof(T), alignof(T)>::type m_svo[StartSize];
	};

	} // namespace ankerl