Last active
December 28, 2022 18:43
-
-
Save martinus/4c2bd0213b5d5959dcc0addca2a459c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Fast open-addressing hash table. | |
* | |
* Copyright 2022 Joaquin M Lopez Munoz. | |
* Distributed under the Boost Software License, Version 1.0. | |
* (See accompanying file LICENSE_1_0.txt or copy at | |
* http://www.boost.org/LICENSE_1_0.txt) | |
* | |
* See https://www.boost.org/libs/unordered for library home page. | |
*/ | |
#ifndef BOOST_UNORDERED_DETAIL_CFOA_HPP | |
#define BOOST_UNORDERED_DETAIL_CFOA_HPP | |
#include <atomic> | |
#include <boost/assert.hpp> | |
#include <boost/config.hpp> | |
#include <boost/config/workaround.hpp> | |
#include <boost/core/allocator_traits.hpp> | |
#include <boost/core/bit.hpp> | |
#include <boost/core/empty_value.hpp> | |
#include <boost/core/no_exceptions_support.hpp> | |
#include <boost/core/pointer_traits.hpp> | |
#include <boost/cstdint.hpp> | |
#include <boost/predef.h> | |
#include <boost/type_traits/has_trivial_copy.hpp> | |
#include <boost/type_traits/is_nothrow_swappable.hpp> | |
#include <boost/unordered/detail/narrow_cast.hpp> | |
#include <boost/unordered/detail/xmx.hpp> | |
#include <boost/unordered/hash_traits.hpp> | |
#include <climits> | |
#include <cmath> | |
#include <cstddef> | |
#include <cstring> | |
#include <iterator> | |
#include <limits> | |
#include <memory> | |
#include <mutex> | |
#include <tuple> | |
#include <type_traits> | |
#include <utility> | |
#if defined(__SSE2__)||\ | |
defined(_M_X64)||(defined(_M_IX86_FP)&&_M_IX86_FP>=2) | |
#define BOOST_UNORDERED_SSE2 | |
#include <emmintrin.h> | |
#elif defined(__ARM_NEON)&&!defined(__ARM_BIG_ENDIAN) | |
#define BOOST_UNORDERED_LITTLE_ENDIAN_NEON | |
#include <arm_neon.h> | |
#endif | |
#ifdef __has_builtin | |
#define BOOST_UNORDERED_HAS_BUILTIN(x) __has_builtin(x) | |
#else | |
#define BOOST_UNORDERED_HAS_BUILTIN(x) 0 | |
#endif | |
#if !defined(NDEBUG) | |
#define BOOST_UNORDERED_ASSUME(cond) BOOST_ASSERT(cond) | |
#elif BOOST_UNORDERED_HAS_BUILTIN(__builtin_assume) | |
#define BOOST_UNORDERED_ASSUME(cond) __builtin_assume(cond) | |
#elif defined(__GNUC__) || BOOST_UNORDERED_HAS_BUILTIN(__builtin_unreachable) | |
#define BOOST_UNORDERED_ASSUME(cond) \ | |
do{ \ | |
if(!(cond))__builtin_unreachable(); \ | |
}while(0) | |
#elif defined(_MSC_VER) | |
#define BOOST_UNORDERED_ASSUME(cond) __assume(cond) | |
#else | |
#define BOOST_UNORDERED_ASSUME(cond) \ | |
do{ \ | |
static_cast<void>(false&&(cond)); \ | |
}while(0) | |
#endif | |
#define BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred) \ | |
static_assert(boost::is_nothrow_swappable<Hash>::value, \ | |
"Template parameter Hash is required to be nothrow Swappable."); \ | |
static_assert(boost::is_nothrow_swappable<Pred>::value, \ | |
"Template parameter Pred is required to be nothrow Swappable"); | |
namespace boost{ | |
namespace unordered{ | |
namespace detail{ | |
namespace cfoa{ | |
static const std::size_t default_bucket_count = 0; | |
/* foa::table is an open-addressing hash table serving as the foundational core | |
* of boost::unordered_[flat|node]_[map|set]. Its main internal design aspects | |
* are: | |
* | |
* - Element slots are logically split into groups of size N=15. The number | |
* of groups is always a power of two, so the number of allocated slots | |
is of the form (N*2^n)-1 (final slot reserved for a sentinel mark). | |
* - Positioning is done at the group level rather than the slot level, that | |
* is, for any given element its hash value is used to locate a group and | |
* insertion is performed on the first available element of that group; | |
* if the group is full (overflow), further groups are tried using | |
* quadratic probing. | |
* - Each group has an associated 16B metadata word holding reduced hash | |
* values and overflow information. Reduced hash values are used to | |
* accelerate lookup within the group by using 128-bit SIMD or 64-bit word | |
* operations. | |
*/ | |
/* copied from https://rigtorp.se/spinlock/ */ | |
template<typename Atomic,typename Atomic::value_type Locked=1> | |
class lock | |
{ | |
public: | |
using value_type=typename Atomic::value_type; | |
lock(Atomic& a_):a{a_} | |
{ | |
for (;;) { | |
if((x=a.exchange(Locked,std::memory_order_acquire))!=Locked)return; | |
while(a.load(std::memory_order_relaxed)==Locked){ | |
_mm_pause(); | |
} | |
} | |
} | |
~lock(){a.store(x,std::memory_order_release);} | |
operator const value_type&()const{return x;} | |
lock& operator=(const value_type& x_){x=x_;return *this;} | |
private: | |
Atomic& a; | |
value_type x; | |
}; | |
/* group15 controls metadata information of a group of N=15 element slots. | |
* The 16B metadata word is organized as follows (LSB depicted rightmost): | |
* | |
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
* |ofw|h14|h13|h13|h11|h10|h09|h08|h07|h06|h05|h04|h03|h02|h01|h00| | |
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
* | |
* hi is 0 if the i-th element slot is avalaible, 1 to mark a sentinel and, | |
* when the slot is occupied, a value in the range [2,255] obtained from the | |
* element's original hash value. | |
* ofw is the so-called overflow byte. If insertion of an element with hash | |
* value h is tried on a full group, then the (h%8)-th bit of the overflow | |
* byte is set to 1 and a further group is probed. Having an overflow byte | |
* brings two advantages: | |
* | |
* - There's no need to reserve a special value of hi to mark tombstone | |
* slots; each reduced hash value keeps then log2(254)=7.99 bits of the | |
* original hash (alternative approaches reserve one full bit to mark | |
* if the slot is available/deleted, so their reduced hash values are 7 bit | |
* strong only). | |
* - When doing an unsuccessful lookup (i.e. the element is not present in | |
* the table), probing stops at the first non-overflowed group. Having 8 | |
* bits for signalling overflow makes it very likely that we stop at the | |
* current group (this happens when no element with the same (h%8) value | |
* has overflowed in the group), saving us an additional group check even | |
* under high-load/high-erase conditions. It is critical that hash | |
* reduction is invariant under modulo 8 (see maybe_caused_overflow). | |
* | |
* When looking for an element with hash value h, match(h) returns a bitmask | |
* signalling which slots have the same reduced hash value. If available, | |
* match uses SSE2 or (little endian) Neon 128-bit SIMD operations. On non-SIMD | |
* scenarios, the logical layout described above is physically mapped to two | |
* 64-bit words with *bit interleaving*, i.e. the least significant 16 bits of | |
* the first 64-bit word contain the least significant bits of each byte in the | |
* "logical" 128-bit word, and so forth. With this layout, match can be | |
* implemented with 4 ANDs, 3 shifts, 2 XORs, 1 OR and 1 NOT. | |
* | |
* group15 has no user-defined ctor so that it's a trivial type and can be | |
* initialized via memset etc. Where needed, group15::initialize sets the | |
* metadata to all zeros. | |
*/ | |
#if defined(BOOST_UNORDERED_SSE2) | |
static_assert(sizeof(std::atomic<unsigned char>)==1); | |
struct group15 | |
{ | |
static constexpr int N=15; | |
struct dummy_group_type | |
{ | |
alignas(16) std::atomic<unsigned char> storage[N+1]= | |
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
}; | |
inline void initialize() | |
{ | |
_mm_store_si128( | |
reinterpret_cast<__m128i*>(m),_mm_setzero_si128()); | |
} | |
inline auto acquire(std::size_t pos) | |
{ | |
BOOST_ASSERT(pos<N); | |
return lock<std::atomic<unsigned char>,locked_>(at(pos)); | |
} | |
static inline unsigned char get_reduced_hash(std::size_t hash) | |
{ | |
return reduced_hash(hash); | |
} | |
#if 0 | |
inline void set(std::size_t pos,std::size_t hash) | |
{ | |
BOOST_ASSERT(pos<N); | |
at(pos)=reduced_hash(hash); | |
} | |
#endif | |
inline void reset(std::size_t pos) | |
{ | |
BOOST_ASSERT(pos<N); | |
at(pos)=available_; | |
} | |
static inline void reset(unsigned char* pc) | |
{ | |
*pc=available_; | |
} | |
inline int match(std::size_t hash)const | |
{ | |
auto w=_mm_load_si128(reinterpret_cast<const __m128i*>(m)); | |
_mm_mfence(); // TODO WHY O WHY | |
return | |
(_mm_movemask_epi8(_mm_cmpeq_epi8(w,_mm_set1_epi32(match_word(hash))))| | |
_mm_movemask_epi8(_mm_cmpeq_epi8(w,_mm_set1_epi8(locked_))))&0x7FFF; | |
} | |
inline bool is_not_overflowed(std::size_t hash)const | |
{ | |
static constexpr unsigned char shift[]={1,2,4,8,16,32,64,128}; | |
return !(overflow()&shift[hash%8]); | |
} | |
inline void mark_overflow(std::size_t hash) | |
{ | |
overflow()|=static_cast<unsigned char>(1<<(hash%8)); | |
} | |
static inline bool maybe_caused_overflow(unsigned char* pc) | |
{ | |
std::size_t pos=reinterpret_cast<uintptr_t>(pc)%sizeof(group15); | |
group15 *pg=reinterpret_cast<group15*>(pc-pos); | |
return !pg->is_not_overflowed(*pc); | |
}; | |
inline int match_available()const | |
{ | |
return _mm_movemask_epi8( | |
_mm_cmpeq_epi8( | |
_mm_load_si128(reinterpret_cast<const __m128i*>(m)), | |
_mm_setzero_si128()))&0x7FFF; | |
} | |
inline int match_occupied()const | |
{ | |
return (~match_available())&0x7FFF; | |
} | |
private: | |
static constexpr unsigned char available_=0, | |
locked_=1; | |
inline static int match_word(std::size_t hash) | |
{ | |
static constexpr boost::uint32_t word[]= | |
{ | |
0x08080808u,0x09090909u,0x02020202u,0x03030303u,0x04040404u,0x05050505u,0x06060606u,0x07070707u, | |
0x08080808u,0x09090909u,0x0A0A0A0Au,0x0B0B0B0Bu,0x0C0C0C0Cu,0x0D0D0D0Du,0x0E0E0E0Eu,0x0F0F0F0Fu, | |
0x10101010u,0x11111111u,0x12121212u,0x13131313u,0x14141414u,0x15151515u,0x16161616u,0x17171717u, | |
0x18181818u,0x19191919u,0x1A1A1A1Au,0x1B1B1B1Bu,0x1C1C1C1Cu,0x1D1D1D1Du,0x1E1E1E1Eu,0x1F1F1F1Fu, | |
0x20202020u,0x21212121u,0x22222222u,0x23232323u,0x24242424u,0x25252525u,0x26262626u,0x27272727u, | |
0x28282828u,0x29292929u,0x2A2A2A2Au,0x2B2B2B2Bu,0x2C2C2C2Cu,0x2D2D2D2Du,0x2E2E2E2Eu,0x2F2F2F2Fu, | |
0x30303030u,0x31313131u,0x32323232u,0x33333333u,0x34343434u,0x35353535u,0x36363636u,0x37373737u, | |
0x38383838u,0x39393939u,0x3A3A3A3Au,0x3B3B3B3Bu,0x3C3C3C3Cu,0x3D3D3D3Du,0x3E3E3E3Eu,0x3F3F3F3Fu, | |
0x40404040u,0x41414141u,0x42424242u,0x43434343u,0x44444444u,0x45454545u,0x46464646u,0x47474747u, | |
0x48484848u,0x49494949u,0x4A4A4A4Au,0x4B4B4B4Bu,0x4C4C4C4Cu,0x4D4D4D4Du,0x4E4E4E4Eu,0x4F4F4F4Fu, | |
0x50505050u,0x51515151u,0x52525252u,0x53535353u,0x54545454u,0x55555555u,0x56565656u,0x57575757u, | |
0x58585858u,0x59595959u,0x5A5A5A5Au,0x5B5B5B5Bu,0x5C5C5C5Cu,0x5D5D5D5Du,0x5E5E5E5Eu,0x5F5F5F5Fu, | |
0x60606060u,0x61616161u,0x62626262u,0x63636363u,0x64646464u,0x65656565u,0x66666666u,0x67676767u, | |
0x68686868u,0x69696969u,0x6A6A6A6Au,0x6B6B6B6Bu,0x6C6C6C6Cu,0x6D6D6D6Du,0x6E6E6E6Eu,0x6F6F6F6Fu, | |
0x70707070u,0x71717171u,0x72727272u,0x73737373u,0x74747474u,0x75757575u,0x76767676u,0x77777777u, | |
0x78787878u,0x79797979u,0x7A7A7A7Au,0x7B7B7B7Bu,0x7C7C7C7Cu,0x7D7D7D7Du,0x7E7E7E7Eu,0x7F7F7F7Fu, | |
0x80808080u,0x81818181u,0x82828282u,0x83838383u,0x84848484u,0x85858585u,0x86868686u,0x87878787u, | |
0x88888888u,0x89898989u,0x8A8A8A8Au,0x8B8B8B8Bu,0x8C8C8C8Cu,0x8D8D8D8Du,0x8E8E8E8Eu,0x8F8F8F8Fu, | |
0x90909090u,0x91919191u,0x92929292u,0x93939393u,0x94949494u,0x95959595u,0x96969696u,0x97979797u, | |
0x98989898u,0x99999999u,0x9A9A9A9Au,0x9B9B9B9Bu,0x9C9C9C9Cu,0x9D9D9D9Du,0x9E9E9E9Eu,0x9F9F9F9Fu, | |
0xA0A0A0A0u,0xA1A1A1A1u,0xA2A2A2A2u,0xA3A3A3A3u,0xA4A4A4A4u,0xA5A5A5A5u,0xA6A6A6A6u,0xA7A7A7A7u, | |
0xA8A8A8A8u,0xA9A9A9A9u,0xAAAAAAAAu,0xABABABABu,0xACACACACu,0xADADADADu,0xAEAEAEAEu,0xAFAFAFAFu, | |
0xB0B0B0B0u,0xB1B1B1B1u,0xB2B2B2B2u,0xB3B3B3B3u,0xB4B4B4B4u,0xB5B5B5B5u,0xB6B6B6B6u,0xB7B7B7B7u, | |
0xB8B8B8B8u,0xB9B9B9B9u,0xBABABABAu,0xBBBBBBBBu,0xBCBCBCBCu,0xBDBDBDBDu,0xBEBEBEBEu,0xBFBFBFBFu, | |
0xC0C0C0C0u,0xC1C1C1C1u,0xC2C2C2C2u,0xC3C3C3C3u,0xC4C4C4C4u,0xC5C5C5C5u,0xC6C6C6C6u,0xC7C7C7C7u, | |
0xC8C8C8C8u,0xC9C9C9C9u,0xCACACACAu,0xCBCBCBCBu,0xCCCCCCCCu,0xCDCDCDCDu,0xCECECECEu,0xCFCFCFCFu, | |
0xD0D0D0D0u,0xD1D1D1D1u,0xD2D2D2D2u,0xD3D3D3D3u,0xD4D4D4D4u,0xD5D5D5D5u,0xD6D6D6D6u,0xD7D7D7D7u, | |
0xD8D8D8D8u,0xD9D9D9D9u,0xDADADADAu,0xDBDBDBDBu,0xDCDCDCDCu,0xDDDDDDDDu,0xDEDEDEDEu,0xDFDFDFDFu, | |
0xE0E0E0E0u,0xE1E1E1E1u,0xE2E2E2E2u,0xE3E3E3E3u,0xE4E4E4E4u,0xE5E5E5E5u,0xE6E6E6E6u,0xE7E7E7E7u, | |
0xE8E8E8E8u,0xE9E9E9E9u,0xEAEAEAEAu,0xEBEBEBEBu,0xECECECECu,0xEDEDEDEDu,0xEEEEEEEEu,0xEFEFEFEFu, | |
0xF0F0F0F0u,0xF1F1F1F1u,0xF2F2F2F2u,0xF3F3F3F3u,0xF4F4F4F4u,0xF5F5F5F5u,0xF6F6F6F6u,0xF7F7F7F7u, | |
0xF8F8F8F8u,0xF9F9F9F9u,0xFAFAFAFAu,0xFBFBFBFBu,0xFCFCFCFCu,0xFDFDFDFDu,0xFEFEFEFEu,0xFFFFFFFFu, | |
}; | |
return (int)word[narrow_cast<unsigned char>(hash)]; | |
} | |
inline static unsigned char reduced_hash(std::size_t hash) | |
{ | |
return narrow_cast<unsigned char>(match_word(hash)); | |
} | |
inline std::atomic<unsigned char>& at(std::size_t pos) | |
{ | |
return m[pos]; | |
} | |
inline const std::atomic<unsigned char>& at(std::size_t pos)const | |
{ | |
return m[pos]; | |
} | |
inline std::atomic<unsigned char>& overflow() | |
{ | |
return at(N); | |
} | |
inline const std::atomic<unsigned char>& overflow()const | |
{ | |
return at(N); | |
} | |
alignas(16) std::atomic<unsigned char> m[16]; | |
}; | |
#elif defined(BOOST_UNORDERED_LITTLE_ENDIAN_NEON) | |
#error Neon not supported yet | |
#else /* non-SIMD */ | |
#error Non-SIMD not supported yet | |
#endif | |
/* foa::table uses a size policy to obtain the permissible sizes of the group | |
* array (and, by implication, the element array) and to do the hash->group | |
* mapping. | |
* | |
* - size_index(n) returns an unspecified "index" number used in other policy | |
* operations. | |
* - size(size_index_) returns the number of groups for the given index. It is | |
* guaranteed that size(size_index(n)) >= n. | |
* - min_size() is the minimum number of groups permissible, i.e. | |
* size(size_index(0)). | |
* - position(hash,size_index_) maps hash to a position in the range | |
* [0,size(size_index_)). | |
* | |
* The reason we're introducing the intermediate index value for calculating | |
* sizes and positions is that it allows us to optimize the implementation of | |
* position, which is in the hot path of lookup and insertion operations: | |
* pow2_size_policy, the actual size policy used by foa::table, returns 2^n | |
* (n>0) as permissible sizes and returns the n most significant bits | |
* of the hash value as the position in the group array; using a size index | |
* defined as i = (bits in std::size_t) - n, we have an unbeatable | |
* implementation of position(hash) as hash>>i. | |
* There's a twofold reason for choosing the high bits of hash for positioning: | |
* - Multiplication-based mixing tends to yield better entropy in the high | |
* part of its result. | |
* - group15 reduced-hash values take the *low* bits of hash, and we want | |
* these values and positioning to be as uncorrelated as possible. | |
*/ | |
struct pow2_size_policy | |
{ | |
static inline std::size_t size_index(std::size_t n) | |
{ | |
// TODO: min size is 2, see if we can bring it down to 1 without loss | |
// of performance | |
return sizeof(std::size_t)*CHAR_BIT- | |
(n<=2?1:((std::size_t)(boost::core::bit_width(n-1)))); | |
} | |
static inline std::size_t size(std::size_t size_index_) | |
{ | |
return std::size_t(1)<<(sizeof(std::size_t)*CHAR_BIT-size_index_); | |
} | |
static constexpr std::size_t min_size(){return 2;} | |
static inline std::size_t position(std::size_t hash,std::size_t size_index_) | |
{ | |
return hash>>size_index_; | |
} | |
}; | |
/* size index of a group array for a given *element* capacity */ | |
template<typename Group,typename SizePolicy> | |
static inline std::size_t size_index_for(std::size_t n) | |
{ | |
/* n/N+1 == ceil((n+1)/N) (extra +1 for the sentinel) */ | |
return SizePolicy::size_index(n/Group::N+1); | |
} | |
/* Quadratic prober over a power-of-two range using triangular numbers. | |
* mask in next(mask) must be the range size minus one (and since size is 2^n, | |
* mask has exactly its n first bits set to 1). | |
*/ | |
struct pow2_quadratic_prober | |
{ | |
pow2_quadratic_prober(std::size_t pos_):pos{pos_}{} | |
inline std::size_t get()const{return pos;} | |
/* next returns false when the whole array has been traversed, which ends | |
* probing (in practice, full-table probing will only happen with very small | |
* arrays). | |
*/ | |
inline bool next(std::size_t mask) | |
{ | |
step+=1; | |
pos=(pos+step)&mask; | |
return step<=mask; | |
} | |
private: | |
std::size_t pos,step=0; | |
}; | |
/* Mixing policies: no_mix is the identity function and xmx_mix uses the | |
* xmx function defined in <boost/unordered/detail/xmx.hpp>. | |
* foa::table mixes hash results with xmx_mix unless the hash is marked as | |
* avalanching, i.e. of good quality (see <boost/unordered/hash_traits.hpp>). | |
*/ | |
struct no_mix | |
{ | |
template<typename Hash,typename T> | |
static inline std::size_t mix(const Hash& h,const T& x) | |
{ | |
return h(x); | |
} | |
}; | |
struct xmx_mix | |
{ | |
template<typename Hash,typename T> | |
static inline std::size_t mix(const Hash& h,const T& x) | |
{ | |
return xmx(h(x)); | |
} | |
}; | |
/* boost::core::countr_zero has a potentially costly check for | |
* the case x==0. | |
*/ | |
inline unsigned int unchecked_countr_zero(int x) | |
{ | |
#if defined(BOOST_MSVC) | |
unsigned long r; | |
_BitScanForward(&r,(unsigned long)x); | |
return (unsigned int)r; | |
#else | |
BOOST_UNORDERED_ASSUME(x!=0); | |
return (unsigned int)boost::core::countr_zero((unsigned int)x); | |
#endif | |
} | |
template<typename,typename,typename,typename> | |
class table; | |
/* table_iterator keeps two pointers: | |
* | |
* - A pointer p to the element slot. | |
* - A pointer pc to the n-th byte of the associated group metadata, where n | |
* is the position of the element in the group. | |
* | |
* A simpler solution would have been to keep a pointer p to the element, a | |
* pointer pg to the group, and the position n, but that would increase | |
* sizeof(table_iterator) by 4/8 bytes. In order to make this compact | |
* representation feasible, it is required that group objects are aligned | |
* to their size, so that we can recover pg and n as | |
* | |
* - n = pc%sizeof(group) | |
* - pg = pc-n | |
* | |
* (for explanatory purposes pg and pc are treated above as if they were memory | |
* addresses rather than pointers).The main drawback of this two-pointer | |
* representation is that iterator increment is relatively slow. | |
* | |
* p = nullptr is conventionally used to mark end() iterators. | |
* | |
* TypePolicy encodes the element type and the actual value_type, which are | |
* different for node containers. See below for a full explanation of type | |
* policies. | |
*/ | |
/* internal conversion from const_iterator to iterator */ | |
class const_iterator_cast_tag {}; | |
template<typename TypePolicy,typename Group,bool Const> | |
class table_iterator | |
{ | |
using type_policy=TypePolicy; | |
/* "element_type" is taken by the homonym pointer trait */ | |
using table_element_type=typename type_policy::element_type; | |
public: | |
using difference_type=std::ptrdiff_t; | |
using value_type=typename type_policy::value_type; | |
using pointer= | |
typename std::conditional<Const,value_type const*,value_type*>::type; | |
using reference= | |
typename std::conditional<Const,value_type const&,value_type&>::type; | |
using iterator_category=std::forward_iterator_tag; | |
using element_type= | |
typename std::conditional<Const,value_type const,value_type>::type; | |
table_iterator()=default; | |
template<bool Const2,typename std::enable_if<!Const2>::type* =nullptr> | |
table_iterator(const table_iterator<TypePolicy,Group,Const2>& x): | |
pc{x.pc},p{x.p}{} | |
table_iterator( | |
const_iterator_cast_tag, const table_iterator<TypePolicy,Group,true>& x): | |
pc{x.pc},p{x.p}{} | |
inline reference operator*()const noexcept | |
{return type_policy::value_from(*p);} | |
inline pointer operator->()const noexcept | |
{return std::addressof(type_policy::value_from(*p));} | |
inline table_iterator& operator++()noexcept{increment();return *this;} | |
inline table_iterator operator++(int)noexcept | |
{auto x=*this;increment();return x;} | |
friend inline bool operator==( | |
const table_iterator& x,const table_iterator& y) | |
{return x.p==y.p;} | |
friend inline bool operator!=( | |
const table_iterator& x,const table_iterator& y) | |
{return !(x==y);} | |
private: | |
template<typename,typename,bool> friend class table_iterator; | |
template<typename,typename,typename,typename> friend class table; | |
table_iterator(Group* pg,std::size_t n,const table_element_type* p_): | |
pc{reinterpret_cast<unsigned char*>(const_cast<Group*>(pg))+n}, | |
p{const_cast<table_element_type*>(p_)} | |
{} | |
inline std::size_t rebase() noexcept | |
{ | |
std::size_t off=reinterpret_cast<uintptr_t>(pc)%sizeof(Group); | |
pc-=off; | |
return off; | |
} | |
inline void increment()noexcept | |
{ | |
std::size_t n0=rebase(); | |
int mask=(reinterpret_cast<Group*>(pc)->match_occupied()>>(n0+1))<<(n0+1); | |
if(!mask){ | |
do{ | |
pc+=sizeof(Group); | |
p+=Group::N; | |
} | |
while((mask=reinterpret_cast<Group*>(pc)->match_occupied())==0); | |
} | |
auto n=unchecked_countr_zero(mask); | |
if(BOOST_UNLIKELY(reinterpret_cast<Group*>(pc)->is_sentinel(n))){ | |
p=nullptr; | |
} | |
else{ | |
pc+=n; | |
p-=n0; | |
p+=n; | |
} | |
} | |
unsigned char *pc=nullptr; | |
table_element_type *p=nullptr; | |
}; | |
/* table_arrays controls allocation, initialization and deallocation of | |
* paired arrays of groups and element slots. Only one chunk of memory is | |
* allocated to place both arrays: this is not done for efficiency reasons, | |
* but in order to be able to properly align the group array without storing | |
* additional offset information --the alignment required (16B) is usually | |
* greater than alignof(std::max_align_t) and thus not guaranteed by | |
* allocators. | |
*/ | |
template<typename Group,std::size_t Size> | |
Group* dummy_groups() | |
{ | |
/* Dummy storage initialized as if in an empty container (actually, each | |
* of its groups is initialized like a separate empty container). | |
* We make table_arrays::groups point to this when capacity()==0, so that | |
* we are not allocating any dynamic memory and yet lookup can be implemented | |
* without checking for groups==nullptr. This space won't ever be used for | |
* insertion as the container's capacity is precisely zero. | |
*/ | |
static constexpr typename Group::dummy_group_type | |
storage[Size]={typename Group::dummy_group_type(),}; | |
return reinterpret_cast<Group*>( | |
const_cast<typename Group::dummy_group_type*>(storage)); | |
} | |
template<std::size_t Size> | |
std::atomic<bool>* dummy_group_mutexes() | |
{ | |
static std::atomic<bool> mutexes[Size]; | |
return mutexes; | |
} | |
template<typename Element,typename Group,typename SizePolicy> | |
struct table_arrays | |
{ | |
using element_type=Element; | |
using group_type=Group; | |
static constexpr auto N=group_type::N; | |
using size_policy=SizePolicy; | |
template<typename Allocator> | |
static table_arrays new_(Allocator& al,std::size_t n) | |
{ | |
using alloc_traits=boost::allocator_traits<Allocator>; | |
auto groups_size_index=size_index_for<group_type,size_policy>(n); | |
auto groups_size=size_policy::size(groups_size_index); | |
table_arrays arrays{groups_size_index,groups_size-1,nullptr,nullptr,nullptr}; | |
if(!n){ | |
arrays.groups=dummy_groups<group_type,size_policy::min_size()>(); | |
arrays.group_mutexes=dummy_group_mutexes<size_policy::min_size()>(); | |
} | |
else{ | |
arrays.elements= | |
boost::to_address(alloc_traits::allocate(al,buffer_size(groups_size))); | |
/* Align arrays.groups to sizeof(group_type). table_iterator critically | |
* depends on such alignment for its increment operation. | |
*/ | |
auto p=reinterpret_cast<unsigned char*>(arrays.elements+groups_size*N-1); | |
p+=(uintptr_t(sizeof(group_type))- | |
reinterpret_cast<uintptr_t>(p))%sizeof(group_type); | |
arrays.groups=reinterpret_cast<group_type*>(p); | |
/* memset is faster/not slower than initializing groups individually. | |
* This assumes all zeros is group_type's default layout. | |
*/ | |
std::memset(arrays.groups,0,sizeof(group_type)*groups_size); | |
using mutex_allocator_type=allocator_rebind_t<Allocator,std::atomic<bool>>; | |
mutex_allocator_type mal=al; | |
arrays.group_mutexes= | |
boost::allocator_traits<mutex_allocator_type>::allocate(mal,groups_size); | |
for(std::size_t n=0;n<groups_size;++n){ | |
boost::allocator_traits<mutex_allocator_type>::construct(mal,arrays.group_mutexes+n); | |
} | |
} | |
return arrays; | |
} | |
template<typename Allocator> | |
static void delete_(Allocator& al,table_arrays& arrays)noexcept | |
{ | |
using alloc_traits=boost::allocator_traits<Allocator>; | |
using pointer=typename alloc_traits::pointer; | |
using pointer_traits=boost::pointer_traits<pointer>; | |
if(arrays.elements){ | |
alloc_traits::deallocate( | |
al,pointer_traits::pointer_to(*arrays.elements), | |
buffer_size(arrays.groups_size_mask+1)); | |
using mutex_allocator_type=allocator_rebind_t<Allocator,std::atomic<bool>>; | |
mutex_allocator_type mal=al; | |
for(std::size_t n=0;n<arrays.groups_size_mask+1;++n){ | |
boost::allocator_traits<mutex_allocator_type>::destroy(mal,arrays.group_mutexes+n); | |
} | |
boost::allocator_traits<mutex_allocator_type>::deallocate( | |
mal,arrays.group_mutexes,arrays.groups_size_mask+1); | |
} | |
} | |
/* Combined space for elements and groups measured in | |
* sizeof(element_type)s. | |
*/ | |
static std::size_t buffer_size(std::size_t groups_size) | |
{ | |
auto buffer_bytes= | |
/* space for elements (we subtract 1 because of the sentinel) */ | |
sizeof(element_type)*(groups_size*N-1)+ | |
/* space for groups + padding for group alignment */ | |
sizeof(group_type)*(groups_size+1)-1; | |
/* ceil(buffer_bytes/sizeof(element_type)) */ | |
return (buffer_bytes+sizeof(element_type)-1)/sizeof(element_type); | |
} | |
std::size_t groups_size_index; | |
std::size_t groups_size_mask; | |
group_type *groups; | |
element_type *elements; | |
std::atomic<bool> *group_mutexes; | |
}; | |
struct if_constexpr_void_else{void operator()()const{}}; | |
template<bool B,typename F,typename G=if_constexpr_void_else> | |
void if_constexpr(F f,G g={}) | |
{ | |
std::get<B?0:1>(std::forward_as_tuple(f,g))(); | |
} | |
template<bool B,typename T,typename std::enable_if<B>::type* =nullptr> | |
void copy_assign_if(T& x,const T& y){x=y;} | |
template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr> | |
void copy_assign_if(T&,const T&){} | |
template<bool B,typename T,typename std::enable_if<B>::type* =nullptr> | |
void move_assign_if(T& x,T& y){x=std::move(y);} | |
template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr> | |
void move_assign_if(T&,T&){} | |
template<bool B,typename T,typename std::enable_if<B>::type* =nullptr> | |
void swap_if(T& x,T& y){using std::swap; swap(x,y);} | |
template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr> | |
void swap_if(T&,T&){} | |
inline void prefetch(const void* p) | |
{ | |
(void) p; | |
#if defined(BOOST_GCC)||defined(BOOST_CLANG) | |
__builtin_prefetch((const char*)p); | |
#elif defined(BOOST_UNORDERED_SSE2) | |
_mm_prefetch((const char*)p,_MM_HINT_T0); | |
#endif | |
} | |
struct try_emplace_args_t{}; | |
template<typename Allocator> | |
struct is_std_allocator:std::false_type{}; | |
template<typename T> | |
struct is_std_allocator<std::allocator<T>>:std::true_type{}; | |
/* std::allocator::construct marked as deprecated */ | |
#if defined(_LIBCPP_SUPPRESS_DEPRECATED_PUSH) | |
_LIBCPP_SUPPRESS_DEPRECATED_PUSH | |
#elif defined(_STL_DISABLE_DEPRECATED_WARNING) | |
_STL_DISABLE_DEPRECATED_WARNING | |
#elif defined(_MSC_VER) | |
#pragma warning(push) | |
#pragma warning(disable:4996) | |
#endif | |
template<typename Allocator,typename Ptr,typename... Args> | |
struct alloc_has_construct | |
{ | |
private: | |
template<typename Allocator2> | |
static decltype( | |
std::declval<Allocator2&>().construct( | |
std::declval<Ptr>(),std::declval<Args&&>()...), | |
std::true_type{} | |
) check(int); | |
template<typename> static std::false_type check(...); | |
public: | |
static constexpr bool value=decltype(check<Allocator>(0))::value; | |
}; | |
#if defined(_LIBCPP_SUPPRESS_DEPRECATED_POP) | |
_LIBCPP_SUPPRESS_DEPRECATED_POP | |
#elif defined(_STL_RESTORE_DEPRECATED_WARNING) | |
_STL_RESTORE_DEPRECATED_WARNING | |
#elif defined(_MSC_VER) | |
#pragma warning(pop) | |
#endif | |
#if defined(BOOST_GCC) | |
/* GCC's -Wshadow triggers at scenarios like this: | |
* | |
* struct foo{}; | |
* template<typename Base> | |
* struct derived:Base | |
* { | |
* void f(){int foo;} | |
* }; | |
* | |
* derived<foo>x; | |
* x.f(); // declaration of "foo" in derived::f shadows base type "foo" | |
* | |
* This makes shadowing warnings unavoidable in general when a class template | |
* derives from user-provided classes, as is the case with table and | |
* empty_value's below. | |
*/ | |
#pragma GCC diagnostic push | |
#pragma GCC diagnostic ignored "-Wshadow" | |
#endif | |
#if defined(BOOST_MSVC) | |
#pragma warning(push) | |
#pragma warning(disable:4714) /* marked as __forceinline not inlined */ | |
#endif | |
#if BOOST_WORKAROUND(BOOST_MSVC,<=1900) | |
/* VS2015 marks as unreachable generic catch clauses around non-throwing | |
* code. | |
*/ | |
#pragma warning(push) | |
#pragma warning(disable:4702) | |
#endif | |
/* foa::table interface departs in a number of ways from that of C++ unordered | |
* associative containers because it's not for end-user consumption | |
* (boost::unordered_[flat|node]_[map|set] wrappers complete it as appropriate) | |
* and, more importantly, because of fundamental restrictions imposed by open | |
* addressing: | |
* | |
* - value_type must be moveable (flat containers). | |
* - Pointer stability is not kept under rehashing (flat containers). | |
* - begin() is not O(1). | |
* - No bucket API. | |
* - Load factor is fixed and can't be set by the user. | |
* - No extract API (implemented externally by wrapping node containers). | |
* | |
* The TypePolicy template parameter is used to generate instantiations | |
* suitable for each container, and introduces API-public, non-standard | |
* init_type: | |
* | |
* - TypePolicy::key_type and TypePolicy::value_type have the obvious | |
* meaning. | |
* - TypePolicy::init_type is the type implicitly converted to when | |
* writing x.insert({...}). For maps, this is std::pair<Key,T> rather | |
* than std::pair<const Key,T> so that, for instance, x.insert({"hello",0}) | |
* produces a cheaply moveable std::string&& ("hello") rather than | |
* a copyable const std::string&&. foa::table::insert is extended to accept | |
* both init_type and value_type references. | |
* - element_type is the type actually stored in buckets --value_type for | |
* flat containers and (something equivalent to) value_type* for node | |
* containers. | |
* - TypePolicy::value_from returns a reference to the value_type contained | |
* in an element_type object; for flat containers, this is the identity, | |
* whereas node containers dereference the stored pointer. | |
* - TypePolicy::move(element_type&) returns a temporary object for value | |
* transfer on rehashing, move copy/assignment, and merge. In general | |
* this resolves to std::move, except for flat maps, where the object | |
* returned is a std::pair<Key&&,T&&>, which is generally cheaper to move | |
* than std::pair<const Key,T>&& because of the constness in Key. | |
* - TypePolicy::construct(Allocator&,element_type*,Args&&...), where | |
* Allocator::value_type is value_type, constructs an element from the | |
* given arguments. For flat containers, this resolves to | |
* std::allocator_traits<Allocator>::construct, whereas node containers | |
* additionally do node allocation and/or transfer as appropriate. | |
* - TypePolicy::destroy(Allocator&,element_type*) is the destroying | |
* counterpart of the above. | |
* - TypePolicy::extract returns a const reference to the key part of a const | |
* reference to value_type, init_type, element_type or | |
* decltype(TypePolicy::move(...)). | |
* | |
* try_emplace, erase and find support heterogenous lookup by default, that | |
* is, without checking for any ::is_transparent typedefs --the checking is | |
* done by boost::unordered_[flat|node]_[map|set]. | |
* | |
* At the moment, we're not supporting allocators with fancy pointers. | |
* Allocator::pointer must be convertible to/from regular pointers. | |
*/ | |
/* We pull this out so the tests don't have to rely on a magic constant or | |
* instantiate the table class template as it can be quite gory. | |
*/ | |
constexpr static float const mlf = 0.875f; | |
template<typename TypePolicy,typename Hash,typename Pred,typename Allocator> | |
class | |
#if defined(_MSC_VER)&&_MSC_FULL_VER>=190023918 | |
__declspec(empty_bases) /* activate EBO with multiple inheritance */ | |
#endif | |
table:empty_value<Hash,0>,empty_value<Pred,1>,empty_value<Allocator,2> | |
{ | |
using hash_base=empty_value<Hash,0>; | |
using pred_base=empty_value<Pred,1>; | |
using allocator_base=empty_value<Allocator,2>; | |
using type_policy=TypePolicy; | |
using group_type=group15; | |
static constexpr auto N=group_type::N; | |
using size_policy=pow2_size_policy; | |
using prober=pow2_quadratic_prober; | |
using mix_policy=typename std::conditional< | |
hash_is_avalanching<Hash>::value, | |
no_mix, | |
xmx_mix | |
>::type; | |
using alloc_traits=boost::allocator_traits<Allocator>; | |
public: | |
using key_type=typename type_policy::key_type; | |
using init_type=typename type_policy::init_type; | |
using value_type=typename type_policy::value_type; | |
private: | |
static constexpr bool has_mutable_iterator= | |
!std::is_same<key_type,value_type>::value; | |
public: | |
using hasher=Hash; | |
using key_equal=Pred; | |
using allocator_type=Allocator; | |
using pointer=value_type*; | |
using const_pointer=const value_type*; | |
using reference=value_type&; | |
using const_reference=const value_type&; | |
using size_type=std::size_t; | |
using difference_type=std::ptrdiff_t; | |
using const_iterator=table_iterator<type_policy,group_type,true>; | |
using iterator=typename std::conditional< | |
has_mutable_iterator, | |
table_iterator<type_policy,group_type,false>, | |
const_iterator>::type; | |
table( | |
std::size_t n=0,const Hash& h_=Hash(),const Pred& pred_=Pred(), | |
const Allocator& al_=Allocator()): | |
hash_base{empty_init,h_},pred_base{empty_init,pred_}, | |
allocator_base{empty_init,al_},size_{0},arrays(new_arrays(n)), | |
ml{initial_max_load()} | |
{} | |
table(const table& x): | |
table{x,alloc_traits::select_on_container_copy_construction(x.al())}{} | |
table(table&& x) | |
noexcept( | |
std::is_nothrow_move_constructible<Hash>::value&& | |
std::is_nothrow_move_constructible<Pred>::value&& | |
std::is_nothrow_move_constructible<Allocator>::value): | |
hash_base{empty_init,std::move(x.h())}, | |
pred_base{empty_init,std::move(x.pred())}, | |
allocator_base{empty_init,std::move(x.al())}, | |
size_{x.size_},arrays(x.arrays),ml{x.ml} | |
{ | |
x.size_=0; | |
x.arrays=x.new_arrays(0); | |
x.ml=x.initial_max_load(); | |
} | |
table(const table& x,const Allocator& al_): | |
table{std::size_t(std::ceil(float(x.size())/mlf)),x.h(),x.pred(),al_} | |
{ | |
copy_elements_from(x); | |
} | |
table(table&& x,const Allocator& al_): | |
table{0,std::move(x.h()),std::move(x.pred()),al_} | |
{ | |
if(al()==x.al()){ | |
auto tmp = x._size.load(); | |
x._size = size_; | |
size_ = tmp; | |
//std::swap(size_,x.size_); | |
std::swap(arrays,x.arrays); | |
std::swap(ml,x.ml); | |
} | |
else{ | |
reserve(x.size()); | |
clear_on_exit c{x}; | |
(void)c; /* unused var warning */ | |
/* This works because subsequent x.clear() does not depend on the | |
* elements' values. | |
*/ | |
x.for_all_elements([this](element_type* p){ | |
unchecked_insert(type_policy::move(*p)); | |
}); | |
} | |
} | |
~table()noexcept | |
{ | |
for_all_elements([this](element_type* p){ | |
destroy_element(p); | |
}); | |
delete_arrays(arrays); | |
} | |
table& operator=(const table& x) | |
{ | |
BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred) | |
static constexpr auto pocca= | |
alloc_traits::propagate_on_container_copy_assignment::value; | |
if(this!=std::addressof(x)){ | |
// if copy construction here winds up throwing, the container is still | |
// left intact so we perform these operations first | |
hasher tmp_h=x.h(); | |
key_equal tmp_p=x.pred(); | |
// already noexcept, clear() before we swap the Hash, Pred just in case | |
// the clear() impl relies on them at some point in the future | |
clear(); | |
// because we've asserted at compile-time that Hash and Pred are nothrow | |
// swappable, we can safely mutate our source container and maintain | |
// consistency between the Hash, Pred compatibility | |
using std::swap; | |
swap(h(),tmp_h); | |
swap(pred(),tmp_p); | |
if_constexpr<pocca>([&,this]{ | |
if(al()!=x.al())reserve(0); | |
copy_assign_if<pocca>(al(),x.al()); | |
}); | |
/* noshrink: favor memory reuse over tightness */ | |
noshrink_reserve(x.size()); | |
copy_elements_from(x); | |
} | |
return *this; | |
} | |
#if defined(BOOST_MSVC) | |
#pragma warning(push) | |
#pragma warning(disable:4127) /* conditional expression is constant */ | |
#endif | |
table& operator=(table&& x) | |
noexcept( | |
alloc_traits::propagate_on_container_move_assignment::value|| | |
alloc_traits::is_always_equal::value) | |
{ | |
BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred) | |
static constexpr auto pocma= | |
alloc_traits::propagate_on_container_move_assignment::value; | |
if(this!=std::addressof(x)){ | |
/* Given ambiguity in implementation strategies briefly discussed here: | |
* https://www.open-std.org/jtc1/sc22/wg21/docs/lwg-active.html#2227 | |
* | |
* we opt into requiring nothrow swappability and eschew the move | |
* operations associated with Hash, Pred. | |
* | |
* To this end, we ensure that the user never has to consider the | |
* moved-from state of their Hash, Pred objects | |
*/ | |
using std::swap; | |
clear(); | |
swap(h(),x.h()); | |
swap(pred(),x.pred()); | |
if(pocma||al()==x.al()){ | |
reserve(0); | |
move_assign_if<pocma>(al(),x.al()); | |
auto tmp = x._size.load(); | |
x._size = size_; | |
size_ = tmp; | |
//swap(size_,x.size_); | |
swap(arrays,x.arrays); | |
swap(ml,x.ml); | |
} | |
else{ | |
/* noshrink: favor memory reuse over tightness */ | |
noshrink_reserve(x.size()); | |
clear_on_exit c{x}; | |
(void)c; /* unused var warning */ | |
/* This works because subsequent x.clear() does not depend on the | |
* elements' values. | |
*/ | |
x.for_all_elements([this](element_type* p){ | |
unchecked_insert(type_policy::move(*p)); | |
}); | |
} | |
} | |
return *this; | |
} | |
#if defined(BOOST_MSVC) | |
#pragma warning(pop) /* C4127 */ | |
#endif | |
allocator_type get_allocator()const noexcept{return al();} | |
iterator begin()noexcept | |
{ | |
iterator it{arrays.groups,0,arrays.elements}; | |
if(!(arrays.groups[0].match_occupied()&0x1))++it; | |
return it; | |
} | |
const_iterator begin()const noexcept | |
{return const_cast<table*>(this)->begin();} | |
iterator end()noexcept{return {};} | |
const_iterator end()const noexcept{return const_cast<table*>(this)->end();} | |
const_iterator cbegin()const noexcept{return begin();} | |
const_iterator cend()const noexcept{return end();} | |
bool empty()const noexcept{return size()==0;} | |
std::size_t size()const noexcept{return size_;} | |
std::size_t max_size()const noexcept{return SIZE_MAX;} | |
template<typename... Args> | |
BOOST_FORCEINLINE std::pair<iterator,bool> emplace(Args&&... args) | |
{ | |
using emplace_type = typename std::conditional< | |
std::is_constructible< | |
init_type, Args... | |
>::value, | |
init_type, | |
value_type | |
>::type; | |
return emplace_impl(emplace_type(std::forward<Args>(args)...)); | |
} | |
template<typename Key,typename... Args> | |
BOOST_FORCEINLINE std::pair<iterator,bool> try_emplace( | |
Key&& x,Args&&... args) | |
{ | |
return emplace_impl( | |
try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...); | |
} | |
BOOST_FORCEINLINE std::pair<iterator,bool> | |
insert(const init_type& x){return emplace_impl(x);} | |
BOOST_FORCEINLINE std::pair<iterator,bool> | |
insert(init_type&& x){return emplace_impl(std::move(x));} | |
/* template<typename=void> tilts call ambiguities in favor of init_type */ | |
template<typename=void> | |
BOOST_FORCEINLINE std::pair<iterator,bool> | |
insert(const value_type& x){return emplace_impl(x);} | |
template<typename=void> | |
BOOST_FORCEINLINE std::pair<iterator,bool> | |
insert(value_type&& x){return emplace_impl(std::move(x));} | |
template< | |
bool dependent_value=false, | |
typename std::enable_if< | |
has_mutable_iterator||dependent_value>::type* =nullptr | |
> | |
void erase(iterator pos)noexcept{return erase(const_iterator(pos));} | |
BOOST_FORCEINLINE | |
void erase(const_iterator pos)noexcept | |
{ | |
destroy_element(pos.p); | |
recover_slot(pos.pc); | |
} | |
template<typename Key> | |
BOOST_FORCEINLINE | |
auto erase(Key&& x) -> typename std::enable_if< | |
!std::is_convertible<Key,iterator>::value&& | |
!std::is_convertible<Key,const_iterator>::value, std::size_t>::type | |
{ | |
auto it=find(x); | |
if(it!=end()){ | |
erase(it); | |
return 1; | |
} | |
else return 0; | |
} | |
void swap(table& x) | |
noexcept( | |
alloc_traits::propagate_on_container_swap::value|| | |
alloc_traits::is_always_equal::value) | |
{ | |
BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred) | |
static constexpr auto pocs= | |
alloc_traits::propagate_on_container_swap::value; | |
using std::swap; | |
if_constexpr<pocs>([&,this]{ | |
swap_if<pocs>(al(),x.al()); | |
}, | |
[&,this]{ /* else */ | |
BOOST_ASSERT(al()==x.al()); | |
(void)this; /* makes sure captured this is used */ | |
}); | |
swap(h(),x.h()); | |
swap(pred(),x.pred()); | |
auto tmp = x._size.load(); | |
x._size = size_; | |
size_ = tmp; | |
//swap(size_,x.size_); | |
swap(arrays,x.arrays); | |
swap(ml,x.ml); | |
} | |
void clear()noexcept | |
{ | |
auto p=arrays.elements; | |
if(p){ | |
for(auto pg=arrays.groups,last=pg+arrays.groups_size_mask+1; | |
pg!=last;++pg,p+=N){ | |
auto mask=pg->match_really_occupied(); | |
while(mask){ | |
destroy_element(p+unchecked_countr_zero(mask)); | |
mask&=mask-1; | |
} | |
/* we wipe the entire metadata to reset the overflow byte as well */ | |
pg->initialize(); | |
} | |
arrays.groups[arrays.groups_size_mask].set_sentinel(); | |
size_=0; | |
ml=initial_max_load(); | |
} | |
} | |
// TODO: should we accept different allocator too? | |
template<typename Hash2,typename Pred2> | |
void merge(table<TypePolicy,Hash2,Pred2,Allocator>& x) | |
{ | |
x.for_all_elements([&,this](group_type* pg,unsigned int n,element_type* p){ | |
if(emplace_impl(type_policy::move(*p)).second){ | |
x.erase(iterator{pg,n,p}); | |
} | |
}); | |
} | |
template<typename Hash2,typename Pred2> | |
void merge(table<TypePolicy,Hash2,Pred2,Allocator>&& x){merge(x);} | |
hasher hash_function()const{return h();} | |
key_equal key_eq()const{return pred();} | |
template<typename Key> | |
BOOST_FORCEINLINE iterator find(const Key& x) | |
{ | |
auto hash=hash_for(x); | |
return find_impl(x,position_for(hash),hash); | |
} | |
template<typename Key> | |
BOOST_FORCEINLINE const_iterator find(const Key& x)const | |
{ | |
return const_cast<table*>(this)->find(x); | |
} | |
std::size_t capacity()const noexcept | |
{ | |
return arrays.elements?(arrays.groups_size_mask+1)*N-1:0; | |
} | |
float load_factor()const noexcept | |
{ | |
if (capacity() == 0) { return 0; } | |
return float(size())/float(capacity()); | |
} | |
float max_load_factor()const noexcept{return mlf;} | |
std::size_t max_load()const noexcept{return ml;} | |
void rehash(std::size_t n) | |
{ | |
auto m=size_t(std::ceil(float(size())/mlf)); | |
if(m>n)n=m; | |
if(n)n=capacity_for(n); /* exact resulting capacity */ | |
if(n!=capacity())unchecked_rehash(n); | |
} | |
void reserve(std::size_t n) | |
{ | |
rehash(std::size_t(std::ceil(float(n)/mlf))); | |
} | |
template<typename Predicate> | |
friend std::size_t erase_if(table& x,Predicate pr) | |
{ | |
return x.erase_if_impl(pr); | |
} | |
private: | |
template<typename,typename,typename,typename> friend class table; | |
using element_type=typename type_policy::element_type; | |
using element_allocator_type=allocator_rebind_t<Allocator,element_type>; | |
using arrays_type=table_arrays<element_type,group_type,size_policy>; | |
struct clear_on_exit | |
{ | |
~clear_on_exit(){x.clear();} | |
table& x; | |
}; | |
Hash& h(){return hash_base::get();} | |
const Hash& h()const{return hash_base::get();} | |
Pred& pred(){return pred_base::get();} | |
const Pred& pred()const{return pred_base::get();} | |
Allocator& al(){return allocator_base::get();} | |
const Allocator& al()const{return allocator_base::get();} | |
arrays_type new_arrays(std::size_t n) | |
{ | |
element_allocator_type eal=al(); | |
return arrays_type::new_(eal,n); | |
} | |
void delete_arrays(arrays_type& arrays_)noexcept | |
{ | |
element_allocator_type eal=al(); | |
arrays_type::delete_(eal,arrays_); | |
} | |
template<typename... Args> | |
void construct_element(element_type* p,Args&&... args) | |
{ | |
type_policy::construct(al(),p,std::forward<Args>(args)...); | |
} | |
template<typename... Args> | |
void construct_element(element_type* p,try_emplace_args_t,Args&&... args) | |
{ | |
construct_element_from_try_emplace_args( | |
p, | |
std::integral_constant<bool,std::is_same<key_type,value_type>::value>{}, | |
std::forward<Args>(args)...); | |
} | |
template<typename Key,typename... Args> | |
void construct_element_from_try_emplace_args( | |
element_type* p,std::false_type,Key&& x,Args&&... args) | |
{ | |
type_policy::construct( | |
al(),p, | |
std::piecewise_construct, | |
std::forward_as_tuple(std::forward<Key>(x)), | |
std::forward_as_tuple(std::forward<Args>(args)...)); | |
} | |
/* This overload allows boost::unordered_[flat|node]_set to internally use | |
* try_emplace to implement heterogeneous insert (P2363). | |
*/ | |
template<typename Key> | |
void construct_element_from_try_emplace_args( | |
element_type* p,std::true_type,Key&& x) | |
{ | |
type_policy::construct(al(),p,std::forward<Key>(x)); | |
} | |
void destroy_element(element_type* p)noexcept | |
{ | |
type_policy::destroy(al(),p); | |
} | |
struct destroy_element_on_exit | |
{ | |
~destroy_element_on_exit(){this_->destroy_element(p);} | |
table *this_; | |
element_type *p; | |
}; | |
void copy_elements_from(const table& x) | |
{ | |
BOOST_ASSERT(empty()); | |
BOOST_ASSERT(this!=std::addressof(x)); | |
if(arrays.groups_size_mask==x.arrays.groups_size_mask){ | |
fast_copy_elements_from(x); | |
} | |
else{ | |
x.for_all_elements([this](const element_type* p){ | |
unchecked_insert(*p); | |
}); | |
} | |
} | |
void fast_copy_elements_from(const table& x) | |
{ | |
if(arrays.elements){ | |
copy_elements_array_from(x); | |
std::memcpy( | |
arrays.groups,x.arrays.groups, | |
(arrays.groups_size_mask+1)*sizeof(group_type)); | |
size_=x.size(); | |
} | |
} | |
void copy_elements_array_from(const table& x) | |
{ | |
copy_elements_array_from( | |
x, | |
std::integral_constant< | |
bool, | |
std::is_same<element_type,value_type>::value&& | |
#if BOOST_WORKAROUND(BOOST_LIBSTDCXX_VERSION,<50000) | |
/* std::is_trivially_copy_constructible not provided */ | |
boost::has_trivial_copy<value_type>::value | |
#else | |
std::is_trivially_copy_constructible<value_type>::value | |
#endif | |
&&( | |
is_std_allocator<Allocator>::value|| | |
!alloc_has_construct<Allocator,value_type*,const value_type&>::value) | |
>{} | |
); | |
} | |
void copy_elements_array_from(const table& x,std::true_type /* -> memcpy */) | |
{ | |
/* reinterpret_cast: GCC may complain about element_type not being | |
* trivially copy-assignable when we're relying on trivial copy | |
* constructibility. | |
*/ | |
std::memcpy( | |
reinterpret_cast<unsigned char*>(arrays.elements), | |
reinterpret_cast<unsigned char*>(x.arrays.elements), | |
x.capacity()*sizeof(element_type)); | |
} | |
void copy_elements_array_from(const table& x,std::false_type /* -> manual */) | |
{ | |
std::size_t num_constructed=0; | |
BOOST_TRY{ | |
x.for_all_elements([&,this](const element_type* p){ | |
construct_element(arrays.elements+(p-x.arrays.elements),*p); | |
++num_constructed; | |
}); | |
} | |
BOOST_CATCH(...){ | |
if(num_constructed){ | |
x.for_all_elements_while([&,this](const element_type* p){ | |
destroy_element(arrays.elements+(p-x.arrays.elements)); | |
return --num_constructed!=0; | |
}); | |
} | |
BOOST_RETHROW | |
} | |
BOOST_CATCH_END | |
} | |
void recover_slot(unsigned char* pc) | |
{ | |
/* If this slot potentially caused overflow, we decrease the maximum load so | |
* that average probe length won't increase unboundedly in repeated | |
* insert/erase cycles (drift). | |
*/ | |
ml-=group_type::maybe_caused_overflow(pc); | |
group_type::reset(pc); | |
--size_; | |
} | |
void recover_slot(group_type* pg,std::size_t pos) | |
{ | |
recover_slot(reinterpret_cast<unsigned char*>(pg)+pos); | |
} | |
std::size_t initial_max_load()const | |
{ | |
static constexpr std::size_t small_capacity=2*N-1; | |
auto capacity_=capacity(); | |
if(capacity_<=small_capacity){ | |
return capacity_; /* we allow 100% usage */ | |
} | |
else{ | |
return (std::size_t)(mlf*(float)(capacity_)); | |
} | |
} | |
template<typename T> | |
static inline auto key_from(const T& x) | |
->decltype(type_policy::extract(x)) | |
{ | |
return type_policy::extract(x); | |
} | |
template<typename Key,typename... Args> | |
static inline const Key& key_from( | |
try_emplace_args_t,const Key& x,const Args&...) | |
{ | |
return x; | |
} | |
template<typename Key> | |
inline std::size_t hash_for(const Key& x)const | |
{ | |
return mix_policy::mix(h(),x); | |
} | |
inline std::size_t position_for(std::size_t hash)const | |
{ | |
return position_for(hash,arrays); | |
} | |
static inline std::size_t position_for( | |
std::size_t hash,const arrays_type& arrays_) | |
{ | |
return size_policy::position(hash,arrays_.groups_size_index); | |
} | |
static inline void prefetch_elements(const element_type* p) | |
{ | |
/* We have experimentally confirmed that ARM architectures get a higher | |
* speedup when around the first half of the element slots in a group are | |
* prefetched, whereas for Intel just the first cache line is best. | |
* Please report back if you find better tunings for some particular | |
* architectures. | |
*/ | |
#if BOOST_ARCH_ARM | |
/* Cache line size can't be known at compile time, so we settle on | |
* the very frequent value of 64B. | |
*/ | |
constexpr int cache_line=64; | |
const char *p0=reinterpret_cast<const char*>(p), | |
*p1=p0+sizeof(element_type)*N/2; | |
for(;p0<p1;p0+=cache_line)prefetch(p0); | |
#else | |
prefetch(p); | |
#endif | |
} | |
#if defined(BOOST_MSVC) | |
/* warning: forcing value to bool 'true' or 'false' in bool(pred()...) */ | |
#pragma warning(push) | |
#pragma warning(disable:4800) | |
#endif | |
template<typename Key> | |
BOOST_FORCEINLINE iterator find_impl( | |
const Key& x,std::size_t pos0,std::size_t hash)const | |
{ | |
prober pb(pos0); | |
do{ | |
auto pos=pb.get(); | |
auto pg=arrays.groups+pos; | |
auto mask=pg->match(hash); | |
if(mask){ | |
auto p=arrays.elements+pos*N; | |
prefetch_elements(p); | |
do{ | |
auto n=unchecked_countr_zero(mask); | |
auto c=pg->acquire(n); | |
if(BOOST_LIKELY( | |
c!=0&& | |
bool(pred()(x,key_from(p[n]))))){ | |
return {pg,n,p+n}; | |
} | |
mask&=mask-1; | |
}while(mask); | |
} | |
if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ | |
return {}; /* end() */ | |
} | |
} | |
while(BOOST_LIKELY(pb.next(arrays.groups_size_mask))); | |
return {}; /* end() */ | |
} | |
#if defined(BOOST_MSVC) | |
#pragma warning(pop) /* C4800 */ | |
#endif | |
template<typename... Args> | |
BOOST_FORCEINLINE std::pair<iterator,bool> emplace_impl(Args&&... args) | |
{ | |
const auto &k=key_from(std::forward<Args>(args)...); | |
auto hash=hash_for(k); | |
auto pos0=position_for(hash); | |
lock lock(arrays.group_mutexes[pos0]); | |
auto it=find_impl(k,pos0,hash); | |
if(it!=end()){ | |
return {it,false}; | |
} | |
if(BOOST_LIKELY(size_<ml)){ | |
return { | |
unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...), | |
true | |
}; | |
} | |
else{ | |
return { | |
unchecked_emplace_with_rehash(hash,std::forward<Args>(args)...), | |
true | |
}; | |
} | |
} | |
static std::size_t capacity_for(std::size_t n) | |
{ | |
return size_policy::size(size_index_for<group_type,size_policy>(n))*N-1; | |
} | |
template<typename... Args> | |
BOOST_NOINLINE iterator | |
unchecked_emplace_with_rehash(std::size_t hash,Args&&... args) | |
{ | |
/* Due to the anti-drift mechanism (see recover_slot), new_arrays_ may be | |
* of the same size as the old arrays; in the limit, erasing one element at | |
* full load and then inserting could bring us back to the same capacity | |
* after a costly rehash. To avoid this, we jump to the next capacity level | |
* when the number of erased elements is <= 10% of total elements at full | |
* load, which is implemented by requesting additional F*size elements, | |
* with F = P * 10% / (1 - P * 10%), where P is the probability of an | |
* element having caused overflow; P has been measured as ~0.162 under | |
* ideal conditions, yielding F ~ 0.0165 ~ 1/61. | |
*/ | |
auto new_arrays_=new_arrays(std::size_t( | |
std::ceil(static_cast<float>(size_+size_/61+1)/mlf))); | |
iterator it; | |
BOOST_TRY{ | |
/* strong exception guarantee -> try insertion before rehash */ | |
it=nosize_unchecked_emplace_at( | |
new_arrays_,position_for(hash,new_arrays_), | |
hash,std::forward<Args>(args)...); | |
} | |
BOOST_CATCH(...){ | |
delete_arrays(new_arrays_); | |
BOOST_RETHROW | |
} | |
BOOST_CATCH_END | |
/* new_arrays_ lifetime taken care of by unchecked_rehash */ | |
unchecked_rehash(new_arrays_); | |
++size_; | |
return it; | |
} | |
BOOST_NOINLINE void unchecked_rehash(std::size_t n) | |
{ | |
auto new_arrays_=new_arrays(n); | |
unchecked_rehash(new_arrays_); | |
} | |
BOOST_NOINLINE void unchecked_rehash(arrays_type& new_arrays_) | |
{ | |
std::size_t num_destroyed=0; | |
BOOST_TRY{ | |
for_all_elements([&,this](element_type* p){ | |
nosize_transfer_element(p,new_arrays_,num_destroyed); | |
}); | |
} | |
BOOST_CATCH(...){ | |
if(num_destroyed){ | |
for_all_elements_while( | |
[&,this](group_type* pg,unsigned int n,element_type*){ | |
recover_slot(pg,n); | |
return --num_destroyed!=0; | |
} | |
); | |
} | |
for_all_elements(new_arrays_,[this](element_type* p){ | |
destroy_element(p); | |
}); | |
delete_arrays(new_arrays_); | |
BOOST_RETHROW | |
} | |
BOOST_CATCH_END | |
/* either all moved and destroyed or all copied */ | |
BOOST_ASSERT(num_destroyed==size()||num_destroyed==0); | |
if(num_destroyed!=size()){ | |
for_all_elements([this](element_type* p){ | |
destroy_element(p); | |
}); | |
} | |
delete_arrays(arrays); | |
arrays=new_arrays_; | |
ml=initial_max_load(); | |
} | |
void noshrink_reserve(std::size_t n) | |
{ | |
/* used only on assignment after element clearance */ | |
BOOST_ASSERT(empty()); | |
if(n){ | |
n=std::size_t(std::ceil(float(n)/mlf)); /* elements -> slots */ | |
n=capacity_for(n); /* exact resulting capacity */ | |
if(n>capacity()){ | |
auto new_arrays_=new_arrays(n); | |
delete_arrays(arrays); | |
arrays=new_arrays_; | |
ml=initial_max_load(); | |
} | |
} | |
} | |
template<typename Element> | |
void unchecked_insert(Element&& x) | |
{ | |
auto hash=hash_for(key_from(x)); | |
unchecked_emplace_at(position_for(hash),hash,std::forward<Element>(x)); | |
} | |
void nosize_transfer_element( | |
element_type* p,const arrays_type& arrays_,std::size_t& num_destroyed) | |
{ | |
using moved_element_type= | |
decltype(type_policy::move(std::declval<element_type&>())); | |
nosize_transfer_element( | |
p,hash_for(key_from(*p)),arrays_,num_destroyed, | |
std::integral_constant< /* std::move_if_noexcept semantics */ | |
bool, | |
/* Node containers: nothrow move-constructible checks to true even | |
* though type_policy::construct is used in place of actual move ctor. | |
*/ | |
std::is_nothrow_constructible<element_type,moved_element_type>::value|| | |
!std::is_copy_constructible<element_type>::value>{}); | |
} | |
void nosize_transfer_element( | |
element_type* p,std::size_t hash,const arrays_type& arrays_, | |
std::size_t& num_destroyed,std::true_type /* ->move */) | |
{ | |
/* Destroy p even if an an exception is thrown in the middle of move | |
* construction, which could leave the source half-moved. | |
*/ | |
++num_destroyed; | |
destroy_element_on_exit d{this,p}; | |
(void)d; /* unused var warning */ | |
nosize_unchecked_emplace_at( | |
arrays_,position_for(hash,arrays_),hash,type_policy::move(*p)); | |
} | |
void nosize_transfer_element( | |
element_type* p,std::size_t hash,const arrays_type& arrays_, | |
std::size_t& /*num_destroyed*/,std::false_type /* ->copy */) | |
{ | |
nosize_unchecked_emplace_at( | |
arrays_,position_for(hash,arrays_),hash, | |
const_cast<const element_type&>(*p)); | |
} | |
template<typename... Args> | |
iterator unchecked_emplace_at( | |
std::size_t pos0,std::size_t hash,Args&&... args) | |
{ | |
auto res=nosize_unchecked_emplace_at( | |
arrays,pos0,hash,std::forward<Args>(args)...); | |
++size_; | |
return res; | |
} | |
template<typename... Args> | |
iterator nosize_unchecked_emplace_at( | |
const arrays_type& arrays_,std::size_t pos0,std::size_t hash, | |
Args&&... args) | |
{ | |
for(prober pb(pos0);;pb.next(arrays_.groups_size_mask)){ | |
auto pos=pb.get(); | |
auto pg=arrays_.groups+pos; | |
for(;;){ | |
auto mask=pg->match_available(); | |
if(BOOST_UNLIKELY(mask==0))break; | |
auto n=unchecked_countr_zero(mask); | |
auto c=pg->acquire(n); | |
if(BOOST_UNLIKELY(c!=0))continue; | |
auto p=arrays_.elements+pos*N+n; | |
construct_element(p,std::forward<Args>(args)...); | |
//pg->set(n,hash); | |
c=group_type::get_reduced_hash(hash); | |
return {pg,n,p}; | |
} | |
pg->mark_overflow(hash); | |
} | |
} | |
template<typename Predicate> | |
std::size_t erase_if_impl(Predicate pr) | |
{ | |
std::size_t s=size(); | |
for_all_elements([&,this](group_type* pg,unsigned int n,element_type* p){ | |
if(pr(*p)) erase(iterator{pg,n,p}); | |
}); | |
return std::size_t(s-size()); | |
} | |
template<typename F> | |
void for_all_elements(F f)const | |
{ | |
for_all_elements(arrays,f); | |
} | |
template<typename F> | |
static auto for_all_elements(const arrays_type& arrays_,F f) | |
->decltype(f(nullptr),void()) | |
{ | |
for_all_elements_while(arrays_,[&](element_type* p){f(p);return true;}); | |
} | |
template<typename F> | |
static auto for_all_elements(const arrays_type& arrays_,F f) | |
->decltype(f(nullptr,0,nullptr),void()) | |
{ | |
for_all_elements_while( | |
arrays_,[&](group_type* pg,unsigned int n,element_type* p) | |
{f(pg,n,p);return true;}); | |
} | |
template<typename F> | |
void for_all_elements_while(F f)const | |
{ | |
for_all_elements_while(arrays,f); | |
} | |
template<typename F> | |
static auto for_all_elements_while(const arrays_type& arrays_,F f) | |
->decltype(f(nullptr),void()) | |
{ | |
for_all_elements_while( | |
arrays_,[&](group_type*,unsigned int,element_type* p){return f(p);}); | |
} | |
template<typename F> | |
static auto for_all_elements_while(const arrays_type& arrays_,F f) | |
->decltype(f(nullptr,0,nullptr),void()) | |
{ | |
auto p=arrays_.elements; | |
if(!p){return;} | |
for(auto pg=arrays_.groups,last=pg+arrays_.groups_size_mask+1; | |
pg!=last;++pg,p+=N){ | |
auto mask=pg->match_occupied(); | |
while(mask){ | |
auto n=unchecked_countr_zero(mask); | |
if(!f(pg,n,p+n))return; | |
mask&=mask-1; | |
} | |
} | |
} | |
std::atomic<std::size_t> size_; | |
arrays_type arrays; | |
std::size_t ml; | |
}; | |
#if BOOST_WORKAROUND(BOOST_MSVC,<=1900) | |
#pragma warning(pop) /* C4702 */ | |
#endif | |
#if defined(BOOST_MSVC) | |
#pragma warning(pop) /* C4714 */ | |
#endif | |
#if defined(BOOST_GCC) | |
#pragma GCC diagnostic pop /* ignored "-Wshadow" */ | |
#endif | |
} /* namespace cfoa */ | |
} /* namespace detail */ | |
} /* namespace unordered */ | |
} /* namespace boost */ | |
#undef BOOST_UNORDERED_ASSUME | |
#undef BOOST_UNORDERED_HAS_BUILTIN | |
#undef BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED | |
#ifdef BOOST_UNORDERED_LITTLE_ENDIAN_NEON | |
#undef BOOST_UNORDERED_LITTLE_ENDIAN_NEON | |
#endif | |
#ifdef BOOST_UNORDERED_SSE2 | |
#undef BOOST_UNORDERED_SSE2 | |
#endif | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2021, 2022 Peter Dimov. | |
// Distributed under the Boost Software License, Version 1.0. | |
// https://www.boost.org/LICENSE_1_0.txt | |
#include <pstl/glue_execution_defs.h> | |
#define _SILENCE_CXX17_OLD_ALLOCATOR_MEMBERS_DEPRECATION_WARNING | |
#define _SILENCE_CXX20_CISO646_REMOVED_WARNING | |
#include <boost/unordered/unordered_flat_map.hpp> | |
#include <boost/unordered/unordered_flat_set.hpp> | |
#include <boost/regex.hpp> | |
#include <vector> | |
#include <memory> | |
#include <cstdint> | |
#include <iostream> | |
#include <iomanip> | |
#include <chrono> | |
#include <fstream> | |
#include <string_view> | |
#include <string> | |
#include <mutex> | |
#include <thread> | |
#include <atomic> | |
#include <shared_mutex> | |
#include <execution> | |
#include "cfoa.hpp" | |
// copied from https://rigtorp.se/spinlock/ | |
struct spinlock { | |
std::atomic<bool> lock_ = {0}; | |
void lock() noexcept { | |
for (;;) { | |
// Optimistically assume the lock is free on the first try | |
if (!lock_.exchange(true, std::memory_order_acquire)) { | |
return; | |
} | |
// Wait for lock to be released without generating cache misses | |
while (lock_.load(std::memory_order_relaxed)) { | |
// Issue X86 PAUSE or ARM YIELD instruction to reduce contention between | |
// hyper-threads | |
__builtin_ia32_pause(); | |
} | |
} | |
} | |
bool try_lock() noexcept { | |
// First do a relaxed load to check if lock is free in order to prevent | |
// unnecessary cache misses if someone does while(!try_lock()) | |
return !lock_.load(std::memory_order_relaxed) && | |
!lock_.exchange(true, std::memory_order_acquire); | |
} | |
void unlock() noexcept { | |
lock_.store(false, std::memory_order_release); | |
} | |
}; | |
int const Th = 12; // number of threads | |
int const Sh = Th * Th; // number of shards | |
#if SIZE_MAX > UINT32_MAX | |
std::size_t const Reserve = 1418655 ; | |
//std::size_t const Reserve = 354032 ; | |
#else | |
std::size_t const Reserve = 354032 ; | |
#endif | |
using namespace std::chrono_literals; | |
static void print_time( std::chrono::steady_clock::time_point & t1, char const* label, std::size_t s, std::size_t size ) | |
{ | |
auto t2 = std::chrono::steady_clock::now(); | |
std::cout << " " << label << ": " << ( t2 - t1 ) / 1ms << " ms (s=" << s << ", size=" << size << ")" << std::endl; | |
t1 = t2; | |
} | |
static std::vector<std::string> words; | |
static std::vector<std::string> load_preparsed_words(std::string const& filename) { | |
auto is = std::ifstream(filename); | |
if (!is.is_open()) { | |
return {}; | |
} | |
std::string num_words; | |
std::getline(is, num_words); | |
auto my_words = std::vector<std::string>(); | |
my_words.resize(std::stoull(num_words)); | |
for (auto& word : my_words) { | |
std::getline(is, word); | |
} | |
return my_words; | |
} | |
static void save_preparsed_words(std::vector<std::string> const& words, std::string const& filename) { | |
auto os = std::ofstream(filename); | |
os << std::to_string(words.size()) << '\n'; | |
for (auto const& word : words) { | |
os << word << '\n'; | |
} | |
} | |
static void init_words() | |
{ | |
#if SIZE_MAX > UINT32_MAX | |
char const* fn = "enwik9"; // http://mattmahoney.net/dc/textdata | |
#else | |
char const* fn = "enwik8"; // ditto | |
#endif | |
auto t1 = std::chrono::steady_clock::now(); | |
words = load_preparsed_words(std::string(fn) + ".words"); | |
if (words.empty()) { | |
std::ifstream is( fn ); | |
std::string in( std::istreambuf_iterator<char>( is ), std::istreambuf_iterator<char>{} ); | |
boost::regex re( "[a-zA-Z]+"); | |
boost::sregex_token_iterator it( in.begin(), in.end(), re, 0 ), end; | |
words.assign( it, end ); | |
save_preparsed_words(words, std::string(fn) + ".words"); | |
} | |
auto t2 = std::chrono::steady_clock::now(); | |
std::cout << fn << ": " << words.size() << " words, " << ( t2 - t1 ) / 1ms << " ms\n\n"; | |
} | |
struct ufm_single_threaded | |
{ | |
boost::unordered_flat_map<std::string_view, std::size_t> map; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::size_t s = 0; | |
for( auto const& word: words ) | |
{ | |
++map[ word ]; | |
++s; | |
} | |
print_time( t1, "Word count", s, map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::size_t s = 0; | |
for( auto const& word: words ) | |
{ | |
std::string_view w2( word ); | |
w2.remove_prefix( 1 ); | |
s += map.contains( w2 ); | |
} | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
struct ufm_mutex | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t> map; | |
alignas(64) std::mutex mtx; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
size_t s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto lock = std::lock_guard( mtx ); | |
++map[ words[j] ]; | |
++s; | |
} | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Word count", s, map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
size_t s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto lock = std::lock_guard( mtx ); | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
s += map.contains( w2 ); | |
} | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
struct ufm_spinlock | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t> map; | |
alignas(64) spinlock mtx; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
size_t s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto lock = std::lock_guard( mtx ); | |
++map[ words[j] ]; | |
++s; | |
} | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Word count", s, map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
size_t s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto lock = std::lock_guard( mtx ); | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
s += map.contains( w2 ); | |
} | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
struct ufm_rwlock | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t> map; | |
alignas(64) std::shared_mutex mtx; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::lock_guard<std::shared_mutex> lock( mtx ); | |
++map[ words[j] ]; | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Word count", s, map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::shared_lock<std::shared_mutex> lock(mtx); | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
s2 += map.contains( w2 ); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
template<class Mtx> struct sync_map | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t> map; | |
alignas(64) Mtx mtx; | |
}; | |
struct ufm_sharded_mutex | |
{ | |
sync_map<std::mutex> sync[ Sh ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto const& word = words[ j ]; | |
std::size_t hash = boost::hash<std::string_view>()( word ); | |
std::size_t shard = hash % Sh; | |
std::lock_guard<std::mutex> lock( sync[ shard ].mtx ); | |
++sync[ shard ].map[ word ]; | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
std::size_t hash = boost::hash<std::string_view>()( w2 ); | |
std::size_t shard = hash % Sh; | |
std::lock_guard<std::mutex> lock( sync[ shard ].mtx ); | |
s2 += sync[ shard ].map.contains( w2 ); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
// | |
struct prehashed | |
{ | |
std::string_view x; | |
std::size_t h; | |
explicit prehashed( std::string_view x_ ): x( x_ ), h( boost::hash<std::string_view>()( x_ ) ) { } | |
operator std::string_view () const | |
{ | |
return x; | |
} | |
friend bool operator==( prehashed const& x, prehashed const& y ) | |
{ | |
return x.x == y.x; | |
} | |
friend bool operator==( prehashed const& x, std::string_view y ) | |
{ | |
return x.x == y; | |
} | |
friend bool operator==( std::string_view x, prehashed const& y ) | |
{ | |
return x == y.x; | |
} | |
}; | |
template<> | |
struct boost::hash< prehashed > | |
{ | |
using is_transparent = void; | |
std::size_t operator()( prehashed const& x ) const | |
{ | |
return x.h; | |
} | |
std::size_t operator()( std::string_view x ) const | |
{ | |
return boost::hash<std::string_view>()( x ); | |
} | |
}; | |
template<class Mtx> struct sync_map_prehashed | |
{ | |
alignas(64) boost::unordered_flat_map< std::string_view, std::size_t, boost::hash<prehashed>, std::equal_to<> > map; | |
alignas(64) Mtx mtx; | |
}; | |
template<typename Mutex> | |
struct ufm_sharded_mutex_prehashed | |
{ | |
sync_map_prehashed<Mutex> sync[ Sh ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view word = words[ j ]; | |
prehashed x( word ); | |
std::size_t shard = x.h % Sh; | |
auto lock = std::lock_guard( sync[ shard ].mtx ); | |
++sync[ shard ].map[ x ]; | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
prehashed x( w2 ); | |
std::size_t shard = x.h % Sh; | |
auto lock = std::lock_guard( sync[ shard ].mtx ); | |
s2 += sync[ shard ].map.contains( x ); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
// link against tbb with -ltbb | |
template<typename Mutex> | |
struct ufm_sharded_mutex_prehashed_par | |
{ | |
sync_map_prehashed<Mutex> sync[ Sh ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::for_each(std::execution::par, words.begin(), words.end(), [&](std::string const& word) { | |
prehashed x( word ); | |
std::size_t shard = x.h % Sh; | |
auto lock = std::lock_guard( sync[ shard ].mtx ); | |
++sync[ shard ].map[ x ]; | |
}); | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", words.size(), n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
auto s = std::transform_reduce(std::execution::par, words.begin(), words.end(), std::size_t{}, std::plus<>{}, [&](std::string const& word) { | |
std::string_view w2( word ); | |
w2.remove_prefix( 1 ); | |
prehashed x( w2 ); | |
std::size_t shard = x.h % Sh; | |
auto lock = std::lock_guard( sync[ shard ].mtx ); | |
return sync[ shard ].map.contains( x ) ? 1 : 0; | |
}); | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
// | |
struct ufm_sharded_rwlock | |
{ | |
sync_map<std::shared_mutex> sync[ Sh ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
auto const& word = words[ j ]; | |
std::size_t hash = boost::hash<std::string_view>()( word ); | |
std::size_t shard = hash % Sh; | |
std::lock_guard<std::shared_mutex> lock( sync[ shard ].mtx ); | |
++sync[ shard ].map[ word ]; | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
std::size_t hash = boost::hash<std::string_view>()( w2 ); | |
std::size_t shard = hash % Sh; | |
std::shared_lock<std::shared_mutex> lock( sync[ shard ].mtx ); | |
s2 += sync[ shard ].map.contains( w2 ); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
struct ufm_sharded_rwlock_prehashed | |
{ | |
sync_map_prehashed<std::shared_mutex> sync[ Sh ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view word = words[ j ]; | |
prehashed x( word ); | |
std::size_t shard = x.h % Sh; | |
auto lock = std::lock_guard( sync[ shard ].mtx ); | |
++sync[ shard ].map[ x ]; | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t s2 = 0; | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
prehashed x( w2 ); | |
std::size_t shard = x.h % Sh; | |
std::shared_lock<std::shared_mutex> lock( sync[ shard ].mtx ); | |
s2 += sync[ shard ].map.contains( x ); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Sh; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
// | |
struct ufm_sharded_isolated | |
{ | |
struct | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t> map; | |
} | |
sync[ Th ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, &s]{ | |
std::size_t s2 = 0; | |
for( std::size_t j = 0; j < words.size(); ++j ) | |
{ | |
auto const& word = words[ j ]; | |
std::size_t hash = boost::hash<std::string_view>()( word ); | |
std::size_t shard = hash % Th; | |
if( shard == i ) | |
{ | |
++sync[ i ].map[ word ]; | |
++s2; | |
} | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, &s]{ | |
std::size_t s2 = 0; | |
for( std::size_t j = 0; j < words.size(); ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
std::size_t hash = boost::hash<std::string_view>()( w2 ); | |
std::size_t shard = hash % Th; | |
if( shard == i ) | |
{ | |
s2 += sync[ i ].map.contains( w2 ); | |
} | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
struct ufm_sharded_isolated_prehashed | |
{ | |
struct | |
{ | |
alignas(64) boost::unordered_flat_map<std::string_view, std::size_t, boost::hash<prehashed>, std::equal_to<>> map; | |
} | |
sync[ Th ]; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, &s]{ | |
std::size_t s2 = 0; | |
for( std::size_t j = 0; j < words.size(); ++j ) | |
{ | |
std::string_view word = words[ j ]; | |
prehashed x( word ); | |
std::size_t shard = x.h % Th; | |
if( shard == i ) | |
{ | |
++sync[ i ].map[ x ]; | |
++s2; | |
} | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Word count", s, n ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, &s]{ | |
std::size_t s2 = 0; | |
for( std::size_t j = 0; j < words.size(); ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
prehashed x( w2 ); | |
std::size_t shard = x.h % Th; | |
if( shard == i ) | |
{ | |
s2 += sync[ i ].map.contains( x ); | |
} | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
std::size_t n = 0; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
n += sync[ i ].map.size(); | |
} | |
print_time( t1, "Contains", s, n ); | |
} | |
}; | |
template<typename Key,typename T> | |
struct map_policy | |
{ | |
using key_type=Key; | |
using raw_key_type=typename std::remove_const<Key>::type; | |
using raw_mapped_type=typename std::remove_const<T>::type; | |
using init_type=std::pair<raw_key_type,raw_mapped_type>; | |
using moved_type=std::pair<raw_key_type&&,raw_mapped_type&&>; | |
using value_type=std::pair<const Key,T>; | |
using element_type=value_type; | |
static value_type& value_from(element_type& x) | |
{ | |
return x; | |
} | |
template <class K,class V> | |
static const raw_key_type& extract(const std::pair<K,V>& kv) | |
{ | |
return kv.first; | |
} | |
static moved_type move(value_type& x) | |
{ | |
return{ | |
std::move(const_cast<raw_key_type&>(x.first)), | |
std::move(const_cast<raw_mapped_type&>(x.second)) | |
}; | |
} | |
template<typename Allocator,typename... Args> | |
static void construct(Allocator& al,element_type* p,Args&&... args) | |
{ | |
boost::allocator_traits<Allocator>:: | |
construct(al,p,std::forward<Args>(args)...); | |
} | |
template<typename Allocator> | |
static void destroy(Allocator& al,element_type* p)noexcept | |
{ | |
boost::allocator_traits<Allocator>::destroy(al,p); | |
} | |
}; | |
struct ufm_concurrent | |
{ | |
boost::unordered::detail::cfoa::table< | |
map_policy<std::string_view, std::size_t>, | |
boost::hash<std::string_view>, std::equal_to<std::string_view>, | |
std::allocator<std::pair<const std::string_view,int>>> map; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
map.reserve(Reserve); | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
std::size_t s2 = 0; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
//++map[ words[j] ]; | |
++(map.try_emplace(words[j],0).first->second); | |
++s2; | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Word count", s, map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
std::atomic<std::size_t> s = 0; | |
std::thread th[ Th ]; | |
std::size_t m = words.size() / Th; | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ] = std::thread( [this, i, m, &s]{ | |
std::size_t start = i * m; | |
std::size_t end = i == Th-1? words.size(): (i + 1) * m; | |
std::size_t s2 = 0; | |
for( std::size_t j = start; j < end; ++j ) | |
{ | |
std::string_view w2( words[j] ); | |
w2.remove_prefix( 1 ); | |
s2 += map.find( w2 ) != map.end(); | |
} | |
s += s2; | |
}); | |
} | |
for( std::size_t i = 0; i < Th; ++i ) | |
{ | |
th[ i ].join(); | |
} | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
struct ufm_concurrent_par | |
{ | |
boost::unordered::detail::cfoa::table< | |
map_policy<std::string_view, std::size_t>, | |
boost::hash<std::string_view>, std::equal_to<std::string_view>, | |
std::allocator<std::pair<const std::string_view,int>>> map; | |
BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
map.reserve(Reserve); | |
std::for_each(std::execution::par, words.begin(), words.end(), [&](std::string const& word) { | |
++(map.try_emplace(word,0).first->second); | |
}); | |
print_time( t1, "Word count", words.size(), map.size() ); | |
} | |
BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) | |
{ | |
auto s = std::transform_reduce(std::execution::par, words.begin(), words.end(), std::size_t{}, std::plus<>{}, [&](std::string const& word) { | |
std::string_view w2( word ); | |
w2.remove_prefix( 1 ); | |
return map.find( w2 ) != map.end(); | |
}); | |
print_time( t1, "Contains", s, map.size() ); | |
} | |
}; | |
// | |
struct record | |
{ | |
std::string label_; | |
long long time_; | |
}; | |
static std::vector<record> times; | |
template<class Map> BOOST_NOINLINE void test( int test_number, char const* label ) | |
{ | |
std::cout << "#" << test_number << ": " << label << std::endl; | |
Map map; | |
auto t0 = std::chrono::steady_clock::now(); | |
auto t1 = t0; | |
record rec = { label, 0 }; | |
map.test_word_count( t1 ); | |
map.test_contains( t1 ); | |
auto tN = std::chrono::steady_clock::now(); | |
std::cout << " Total: " << ( tN - t0 ) / 1ms << " ms\n\n"; | |
rec.time_ = ( tN - t0 ) / 1ms; | |
times.push_back( rec ); | |
} | |
boost::unordered_flat_set<int> parse_args(int argc, char** argv) { | |
auto s = boost::unordered_flat_set<int>(); | |
for (int i=1; i<argc; ++i) { | |
auto n = atoi(argv[i]); | |
s.insert(n); | |
} | |
return s; | |
} | |
int main(int argc, char** argv) | |
{ | |
auto numbers = parse_args(argc, argv); | |
init_words(); | |
int i=0; | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_single_threaded>( i, "boost::unordered_flat_map, single threaded" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_mutex>( i, "boost::unordered_flat_map, mutex" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_rwlock>( i, "boost::unordered_flat_map, rwlock" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_mutex>( i, "boost::unordered_flat_map, sharded mutex" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_mutex_prehashed<std::mutex>>( i, "boost::unordered_flat_map, sharded mutex, prehashed" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_mutex_prehashed<spinlock>>( i, "boost::unordered_flat_map, sharded spinlock, prehashed" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_mutex_prehashed_par<std::mutex>>( i, "boost::unordered_flat_map, sharded mutex, prehashed par" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_mutex_prehashed_par<spinlock>>( i, "boost::unordered_flat_map, sharded spinlock, prehashed par" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_rwlock>( i, "boost::unordered_flat_map, sharded rwlock" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_rwlock_prehashed>( i, "boost::unordered_flat_map, sharded rwlock, prehashed" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_isolated>( i, "boost::unordered_flat_map, sharded isolated" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_sharded_isolated_prehashed>( i, "boost::unordered_flat_map, sharded isolated, prehashed" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_concurrent>( i, "ufm_concurrent" ); | |
} | |
if (numbers.empty() || numbers.contains(++i)) { | |
test<ufm_concurrent_par>( i, "ufm_concurrent_par" ); | |
} | |
std::cout << "---\n\n"; | |
for( auto const& x: times ) | |
{ | |
std::cout << std::setw( 70 ) << ( x.label_ + ": " ) << std::setw( 5 ) << x.time_ << " ms\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment