Space efficient resizable arrays. http://sebastiansylvan.wordpress.com/2012/12/19/space-efficient-rresizable-arrays/
#pragma once | |
#pragma warning(push) | |
#pragma warning(disable:4996) | |
#include <cassert> | |
#include <memory> | |
#include <vector> | |
template<typename T> | |
class compact_vector | |
{ | |
std::vector<std::unique_ptr<T[]>> index_block; // Points to data blocks. | |
// When we deallocate, we keep the last block around so we can reuse it. | |
// This is required to get O(1) push/pop. Could leave it in index_block instead, | |
// and store a bit to indicate this. Tracking it separately makes things cleaner. | |
std::unique_ptr<T[]> spare_empty_db; | |
unsigned int num_elems; | |
unsigned int num_elems_in_last_data_block; | |
// Super blocks have no physical representation, but tracking which one we're in | |
// allows us to compute the size of data blocks easily. | |
unsigned int num_super_blocks; | |
// We could compute the number of DBs in all SBs, like in 'locate_data_block_and_elem', | |
// and compute this next value as needed by subtracting from that index_block.size(). | |
// However, this is reasonably expensive, so we'll just track it as we go instead. | |
unsigned int num_dbs_in_last_sb; | |
// How many data blocks fit in a super block | |
__forceinline static unsigned int super_block_size(unsigned int super_block_index) | |
{ | |
return 1 << (super_block_index/2); | |
} | |
// How many elements fit in a data block for a super block | |
__forceinline static unsigned int data_block_size(unsigned int super_block_index) | |
{ | |
return 1 << ((super_block_index+1)/2); | |
} | |
// Compute the block index and element index within that block for a given "virtual" index. | |
//__forceinline static void locate_data_block_and_elem2(unsigned int i, unsigned int& element_within_block, unsigned int& data_block_index) | |
//{ | |
// const long r = i+1; | |
// unsigned long super_block_index; | |
// _BitScanReverse(&super_block_index, r); | |
// | |
// unsigned int num_data_blocks_before_super_block = 2 * ((1 << (super_block_index/2))-1); | |
// if ( super_block_index&1 ) | |
// { | |
// num_data_blocks_before_super_block += 1 << (super_block_index/2); | |
// } | |
// const unsigned int e_bit_count = (super_block_index+1)/2; | |
// // the data block index within the superblock | |
// element_within_block = r & ((1 << e_bit_count) - 1); | |
// long r_msb_cleared = r; | |
// _bittestandreset( &r_msb_cleared, super_block_index); | |
// const unsigned int block_index_within_super_block = r_msb_cleared >> e_bit_count; | |
// | |
// | |
// data_block_index = num_data_blocks_before_super_block + block_index_within_super_block; | |
//} | |
__forceinline static void locate_data_block_and_elem(unsigned int i, unsigned int& element_within_block, unsigned int& data_block_index) | |
{ | |
int r = i+1; | |
unsigned long k; | |
_BitScanReverse(&k, r); | |
// There's some intentional redundancy in these two branches | |
// Ends up slightly faster this way. | |
if (k & 1) | |
{ | |
const int floorKO2 = k / 2; | |
const int ceilKO2 = floorKO2 + 1; | |
const int powFloorKO2 = (1 << floorKO2); | |
const int p = 2 * (powFloorKO2 - 1); | |
const int b = (r >> ceilKO2) & (powFloorKO2 - 1); | |
element_within_block = r & ((1 << ceilKO2) - 1); | |
data_block_index = p+powFloorKO2+b; | |
} | |
else | |
{ | |
const int floorKO2 = k / 2; | |
const int powFloorKO2 = (1 << floorKO2); | |
const int p = 2 * (powFloorKO2 - 1); | |
const int b = (r >> floorKO2) & (powFloorKO2 - 1); | |
element_within_block = r & (powFloorKO2 - 1); | |
data_block_index = p+b; | |
} | |
} | |
__forceinline void allocate_data_block() | |
{ | |
if (spare_empty_db) | |
{ | |
index_block.push_back(std::move(spare_empty_db)); | |
} | |
else | |
{ | |
index_block.emplace_back(new T[data_block_size(num_super_blocks-1)]); | |
} | |
} | |
// Reserves space for one more element | |
__forceinline void grow() | |
{ | |
// Treat the very first allocation specially. | |
// This is so we don't have to keep checking for this special case in the two | |
// size calculations later. | |
if (num_super_blocks == 0) | |
{ | |
allocate_data_block(); | |
num_super_blocks = 1; | |
num_dbs_in_last_sb = 1; | |
num_elems_in_last_data_block = 1; | |
num_elems = 1; | |
return; | |
} | |
// Do we need to allocate? | |
if (num_elems_in_last_data_block == data_block_size(num_super_blocks-1)) | |
{ | |
if (num_dbs_in_last_sb == super_block_size(num_super_blocks-1)) // track super block count | |
{ | |
++num_super_blocks; | |
num_dbs_in_last_sb = 0; | |
} | |
allocate_data_block(); | |
++num_dbs_in_last_sb; | |
num_elems_in_last_data_block = 0; | |
} | |
++num_elems; | |
++num_elems_in_last_data_block; | |
} | |
// Relinquishes space for one element | |
__forceinline void shrink() | |
{ | |
--num_elems; | |
--num_elems_in_last_data_block; | |
if (num_elems_in_last_data_block==0) | |
{ | |
// Last block is empty, store it in our spare. | |
// This causes deallocation if we already had a spare. | |
spare_empty_db = std::move(index_block.back()); | |
index_block.pop_back(); | |
--num_dbs_in_last_sb; | |
if (num_dbs_in_last_sb == 0) // track super block count | |
{ | |
--num_super_blocks; | |
num_dbs_in_last_sb = super_block_size(num_super_blocks-1); | |
} | |
// The new last data block is completely full | |
num_elems_in_last_data_block = data_block_size(num_super_blocks-1); | |
} | |
} | |
public: | |
compact_vector() : | |
num_elems(0), | |
num_super_blocks(0), | |
num_elems_in_last_data_block(0), | |
num_dbs_in_last_sb(0) | |
{ | |
} | |
compact_vector(compact_vector&& other) : | |
num_elems(other.num_elems), | |
num_super_blocks(other.num_super_blocks), | |
num_elems_in_last_data_block(other.num_elems_in_last_data_block), | |
num_dbs_in_last_sb(other.num_dbs_in_last_sb), | |
spare_empty_db(std::move(other.spare_empty_db)), | |
index_block(std::move(other.index_block)) | |
{ | |
} | |
compact_vector(const compact_vector& other) : | |
num_elems(other.num_elems), | |
num_super_blocks(0), | |
num_elems_in_last_data_block(other.num_elems_in_last_data_block), | |
num_dbs_in_last_sb(other.num_dbs_in_last_sb) | |
{ | |
// Step through super blocks, copying data blocks | |
// of the appropriate size from the other array | |
auto block = other.index_block.begin(); | |
while(block != other.index_block.end()) | |
{ | |
// Step through the book-keeping variable for super blocks. This lets us know how | |
// many data blocks to copy for this super block, and how big each one is. | |
++num_super_blocks; | |
const unsigned int last_super_block_size = super_block_size(num_super_blocks-1); | |
const unsigned int last_data_block_size = data_block_size(num_super_blocks-1); | |
// This inner loop will fill up a whole super block, unless we run out | |
for (unsigned int db=0; db < last_super_block_size && block != other.index_block.end(); ++db ) | |
{ | |
// Allocate data block of correct size | |
index_block.emplace_back(new T[last_data_block_size]); | |
// Copy data block over. | |
// TODO: This also copies unused elems in last db (harmless, but wasteful). | |
std::copy_n(block->get(), last_data_block_size, index_block.back().get()); | |
++block; | |
} | |
} | |
assert(num_elems == other.num_elems); | |
assert(index_block.size() == other.index_block.size()); | |
assert(num_super_blocks == other.num_super_blocks); | |
assert(num_dbs_in_last_sb == other.num_dbs_in_last_sb); | |
assert(num_elems_in_last_data_block == other.num_elems_in_last_data_block); | |
} | |
compact_vector& operator=(compact_vector&& other) | |
{ | |
num_elems = other.num_elems; | |
num_super_blocks = other.num_super_blocks; | |
num_elems_in_last_data_block = other.num_elems_in_last_data_block; | |
num_dbs_in_last_sb = other.num_dbs_in_last_sb; | |
spare_empty_db = std::move(other.spare_empty_db); | |
index_block = std::move(other.index_block); | |
return *this; | |
} | |
compact_vector& operator=(const compact_vector& other) | |
{ | |
if (this != &other) | |
{ | |
*this = compact_vector(other); // copy constructor + move-assignment | |
} | |
return *this; | |
} | |
void clear() | |
{ | |
num_elems=num_super_blocks=last_data_block_size=num_elems_in_last_data_block=last_super_block_size=num_dbs_in_last_sb=0; | |
index_block.clear(); | |
spare_empty_db.reset(); | |
} | |
unsigned int size() const | |
{ | |
return num_elems; | |
} | |
T& back() | |
{ | |
return index_block.back()[num_elems_in_last_data_block-1]; | |
} | |
const T& back() const | |
{ | |
return index_block.back()[num_elems_in_last_data_block-1]; | |
} | |
void push_back( T&& x) | |
{ | |
grow(); | |
// todo: allocate raw memory instead, and run constructor on last elem here | |
back() = std::move(x); | |
} | |
void push_back(const T& x) | |
{ | |
grow(); | |
// todo: allocate raw memory instead, and run constructor on last elem here | |
back() = x; | |
} | |
void pop_back() | |
{ | |
// todo: once we run constructors manually, run destructor on last elem here | |
shrink(); | |
} | |
__forceinline T& operator[](unsigned int i) | |
{ | |
unsigned int e, db; | |
locate_data_block_and_elem(i,e,db); | |
return index_block[db][e]; | |
} | |
__forceinline const T& operator[](unsigned int i) const | |
{ | |
unsigned int e, db; | |
locate_data_block_and_elem(i,e,db); | |
return index_block[db][e]; | |
} | |
// iterator stuff | |
class iterator | |
{ | |
T* elem; | |
T* current_db_end; | |
unsigned int current_db; | |
unsigned int current_sb; | |
unsigned int last_db_in_sb; | |
compact_vector* arr; | |
public: | |
iterator() : elem(nullptr) {}; | |
iterator( compact_vector* the_array ) : | |
current_sb(0), | |
last_db_in_sb(0), | |
current_db(0), | |
arr(the_array) | |
{ | |
if ( arr->index_block.size() > 0 ) | |
{ | |
elem = arr->index_block[0].get(); | |
current_db_end = elem+1; // first db is always of size 1 | |
} | |
else | |
{ | |
elem = current_db_end = nullptr; | |
} | |
} | |
bool operator==( const iterator& other ) | |
{ | |
return elem == other.elem; | |
} | |
bool operator!=( const iterator& other ) | |
{ | |
return elem != other.elem; | |
} | |
T& operator*() | |
{ | |
return *elem; | |
} | |
iterator& operator++() | |
{ | |
if (++elem == current_db_end) | |
{ | |
if (++current_db == arr->index_block.size() ) | |
{ | |
// we've reached the end of the whole array | |
elem = nullptr; | |
} | |
else | |
{ | |
elem = arr->index_block[current_db].get(); | |
// Track which super block we're in, so we can use this | |
// to compute the size of the data block later on | |
if(current_db > last_db_in_sb) | |
{ | |
++current_sb; | |
last_db_in_sb += super_block_size(current_sb); | |
} | |
if ( elem == arr->index_block.back().get() ) | |
{ | |
// we're in the last block, it may not be completely full | |
// so we need to treat it specially | |
current_db_end = elem + arr->num_elems_in_last_data_block; | |
} | |
else | |
{ | |
// Not the lats block, so it must be a full block, compute size | |
// using superblock index from before | |
current_db_end = elem + data_block_size(current_sb); | |
} | |
} | |
} | |
return *this; | |
} | |
}; | |
iterator begin() | |
{ | |
return iterator(this); | |
} | |
iterator end() | |
{ | |
return iterator(); | |
} | |
}; | |
#pragma warning(pop) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment