Last active
March 25, 2021 17:01
-
-
Save Kh4n/0618b6620b73aae6c5bde9adbfca1859 to your computer and use it in GitHub Desktop.
TeleportList
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <array> | |
#include <cassert> | |
#include <chrono> | |
#include <cstdlib> | |
#include <iostream> | |
#include <set> | |
#include <vector> | |
template <typename T> | |
struct Node { | |
Node() { } | |
Node(const T& d, Node<T>* n) | |
: data(d) | |
, next(n) | |
{ | |
} | |
T data; | |
Node<T>* next; | |
}; | |
template <typename T> | |
class TeleportList { | |
public: | |
TeleportList() | |
: bucketSize(4) | |
{ | |
} | |
TeleportList(int bucketSize) | |
: bucketSize(bucketSize) | |
{ | |
} | |
~TeleportList(); | |
void insert(const T& val); | |
bool find(const T& val); | |
bool remove(const T& val); | |
void clear(); | |
void printPoles() | |
{ | |
for (auto&& n : poles) { | |
std::cout << n->data << ", "; | |
} | |
} | |
void printAll() | |
{ | |
if (poles.size() == 0) { | |
std::cout << "[]" << std::endl; | |
} | |
size_t p = 0; | |
Node<T>* ptr = poles[0]; | |
std::cout << "["; | |
while (ptr->next != nullptr) { | |
if (p < poles.size() && ptr == poles[p]) { | |
std::cout << ":"; | |
p += 1; | |
} | |
std::cout << ptr->data << ", "; | |
ptr = ptr->next; | |
} | |
std::cout << ptr->data << "]\n"; | |
} | |
size_t calcLength() | |
{ | |
size_t ret = 0; | |
Node<T>* ptr = poles[0]; | |
while (ptr != nullptr) { | |
ret += 1; | |
ptr = ptr->next; | |
} | |
return ret; | |
} | |
int ops = 0; | |
private: | |
size_t binsearch(const T& val) const; | |
const unsigned char bucketSize; | |
std::vector<Node<T>*> poles; | |
std::vector<T> search; | |
}; | |
template <typename T> | |
TeleportList<T>::~TeleportList() | |
{ | |
clear(); | |
} | |
template <typename T> | |
size_t TeleportList<T>::binsearch(const T& val) const | |
{ | |
size_t l = 0; | |
size_t r = this->search.size(); | |
while (l != r) { | |
size_t m = (r - l) / 2 + l; | |
if (this->search[m] <= val) { | |
l = m + 1; | |
} else { | |
r = m; | |
} | |
} | |
return l; | |
} | |
template <typename T> | |
void TeleportList<T>::insert(const T& val) | |
{ | |
if (poles.size() == 0) { | |
poles.push_back(new Node<T>(val, nullptr)); | |
search.push_back(val); | |
return; | |
} | |
size_t pos = binsearch(val); | |
if (pos == 0) { | |
poles[0] = new Node<T>(val, poles[0]); | |
search[0] = val; | |
return; | |
} | |
Node<T>* prev = nullptr; | |
Node<T>* ptr = poles[pos - 1]; | |
unsigned char count = bucketSize; | |
while (ptr != nullptr && ptr->data <= val) { | |
// ops += 1; | |
if (count == 0) { | |
if (pos == poles.size()) { | |
poles.push_back(nullptr); | |
search.push_back(val); | |
} | |
poles[pos] = ptr; | |
search[pos] = ptr->data; | |
count = bucketSize; | |
pos += 1; | |
} | |
prev = ptr; | |
ptr = ptr->next; | |
count -= 1; | |
} | |
prev->next = new Node<T>(val, ptr); | |
} | |
template <typename T> | |
bool TeleportList<T>::find(const T& val) | |
{ | |
size_t pos = binsearch(val); | |
if (pos == 0) { | |
return poles.size() > 0 && poles[0]->data == val; | |
} | |
Node<T>* ptr = poles[pos - 1]; | |
unsigned char count = bucketSize; | |
while (ptr != nullptr && ptr->data < val) { | |
if (count == 0) { | |
if (pos == poles.size()) { | |
poles.push_back(nullptr); | |
search.push_back(val); | |
} | |
poles[pos] = ptr; | |
search[pos] = ptr->data; | |
count = bucketSize; | |
pos += 1; | |
} | |
ptr = ptr->next; | |
count -= 1; | |
} | |
return ptr != nullptr && ptr->data == val; | |
} | |
template <typename T> | |
bool TeleportList<T>::remove(const T& val) | |
{ | |
// TODO: implement | |
return false; | |
} | |
template <typename T> | |
void TeleportList<T>::clear() | |
{ | |
if (poles.size() == 0) { | |
return; | |
} | |
Node<T>* ptr = poles[0]; | |
while (ptr != nullptr) { | |
auto tmp = ptr->next; | |
delete ptr; | |
ptr = tmp; | |
} | |
poles.clear(); | |
search.clear(); | |
} | |
template <typename T> | |
struct UnrolledNode { | |
UnrolledNode(UnrolledNode<T>* next) | |
: next(next) | |
, len(0) | |
{ | |
} | |
UnrolledNode(const T& val, UnrolledNode<T>* next) | |
: next(next) | |
, len(1) | |
{ | |
data[0] = val; | |
} | |
unsigned char len; | |
UnrolledNode<T>* next = nullptr; | |
std::array<T, 12> data; | |
bool full() { return len == data.max_size(); } | |
void insertSorted(const T& val); | |
void remove(const T& val); | |
}; | |
template <typename T> | |
void UnrolledNode<T>::insertSorted(const T& val) | |
{ | |
if (len == 0) { | |
data[0] = val; | |
len += 1; | |
return; | |
} | |
if (full() && (next == nullptr || next->full())) { | |
next = new UnrolledNode<T>(next); | |
} | |
unsigned char pos = 0; | |
while (pos < len && data[pos] < val) { | |
pos += 1; | |
} | |
if (pos == data.max_size()) { | |
next->insertSorted(val); | |
return; | |
} | |
if (full()) { | |
next->insertSorted(data[len - 1]); | |
len -= 1; | |
} | |
std::copy_backward(data.begin() + pos, data.begin() + len, data.begin() + len + 1); | |
data[pos] = val; | |
len += 1; | |
} | |
template <typename T> | |
void UnrolledNode<T>::remove(const T& val) | |
{ | |
if (len == 1) { | |
len = 0; | |
return; | |
} | |
unsigned char pos = 0; | |
while (pos < len && data[pos] != val) { | |
pos += 1; | |
} | |
std::copy(data.begin() + pos + 1, data.begin() + len, data.begin() + pos); | |
len -= 1; | |
} | |
template <typename T> | |
class UnrolledTeleportList { | |
public: | |
UnrolledTeleportList() | |
: bucketSize(1) | |
{ | |
} | |
UnrolledTeleportList(int bucketSize) | |
: bucketSize(bucketSize) | |
{ | |
} | |
~UnrolledTeleportList(); | |
void insert(const T& val); | |
bool find(const T& val); | |
bool remove(const T& val); | |
void rebuild(); | |
void clear(); | |
void printPoles() | |
{ | |
for (auto&& n : poles) { | |
std::cout << n->data[0] << ", "; | |
} | |
} | |
void printAll() | |
{ | |
if (poles.size() == 0) { | |
std::cout << "[]" << std::endl; | |
} | |
size_t p = 0; | |
UnrolledNode<T>* ptr = poles[0]; | |
std::cout << "["; | |
while (ptr != nullptr) { | |
if (p < poles.size() && ptr == poles[p]) { | |
std::cout << ":"; | |
p += 1; | |
} | |
for (int i = 0; i < ptr->len; ++i) { | |
std::cout << ptr->data[i] << ", "; | |
} | |
ptr = ptr->next; | |
} | |
std::cout << "]\n"; | |
} | |
size_t calcLength() | |
{ | |
size_t ret = 0; | |
UnrolledNode<T>* ptr = poles[0]; | |
while (ptr != nullptr) { | |
ret += ptr->len; | |
ptr = ptr->next; | |
} | |
return ret; | |
} | |
size_t size() { return len; } | |
private: | |
size_t binsearch(const T& val) const; | |
size_t nRemoved = 0; | |
size_t len = 0; | |
const unsigned char bucketSize; | |
std::vector<UnrolledNode<T>*> poles; | |
}; | |
template <typename T> | |
UnrolledTeleportList<T>::~UnrolledTeleportList() | |
{ | |
clear(); | |
} | |
template <typename T> | |
size_t UnrolledTeleportList<T>::binsearch(const T& val) const | |
{ | |
size_t l = 0; | |
size_t r = this->poles.size(); | |
while (l != r) { | |
size_t m = (r - l) / 2 + l; | |
if (this->poles[m]->data[0] <= val) { | |
l = m + 1; | |
} else { | |
r = m; | |
} | |
} | |
return l; | |
} | |
template <typename T> | |
void UnrolledTeleportList<T>::insert(const T& val) | |
{ | |
len += 1; | |
if (poles.size() == 0) { | |
poles.push_back(new UnrolledNode<T>(val, nullptr)); | |
return; | |
} | |
size_t pos = binsearch(val); | |
pos = std::max((size_t)1, pos); | |
UnrolledNode<T>* ptr = poles[pos - 1]; | |
unsigned char count = bucketSize; | |
while (ptr->next != nullptr) { | |
// ops += 1; | |
if (count == 0) { | |
if (pos == poles.size()) { | |
poles.push_back(nullptr); | |
} | |
poles[pos] = ptr; | |
count = bucketSize; | |
pos += 1; | |
} | |
for (int i = 0; i < std::max((unsigned char)1, ptr->len); ++i) { | |
if (ptr->data[i] > val) { | |
ptr->insertSorted(val); | |
return; | |
} | |
} | |
ptr = ptr->next; | |
count -= 1; | |
} | |
ptr->insertSorted(val); | |
} | |
template <typename T> | |
bool UnrolledTeleportList<T>::find(const T& val) | |
{ | |
if (poles.size() == 0) { | |
return false; | |
} | |
size_t pos = binsearch(val); | |
if (pos == 0) { | |
return false; | |
} | |
UnrolledNode<T>* ptr = poles[pos - 1]; | |
unsigned char count = bucketSize; | |
while (ptr != nullptr) { | |
// ops += 1; | |
if (count == 0) { | |
if (pos == poles.size()) { | |
poles.push_back(nullptr); | |
} | |
poles[pos] = ptr; | |
count = bucketSize; | |
pos += 1; | |
} | |
for (int i = 0; i < std::max((unsigned char)1, ptr->len); ++i) { | |
if (ptr->data[i] == val && ptr->len > 0) { | |
return true; | |
} | |
if (ptr->data[i] > val) { | |
return false; | |
} | |
} | |
ptr = ptr->next; | |
count -= 1; | |
} | |
return false; | |
} | |
template <typename T> | |
bool UnrolledTeleportList<T>::remove(const T& val) | |
{ | |
if (poles.size() == 0) { | |
return false; | |
} | |
if (nRemoved * 2 >= len) { | |
rebuild(); | |
} | |
size_t pos = binsearch(val); | |
if (pos == 0) { | |
return false; | |
} | |
UnrolledNode<T>* ptr = poles[pos - 1]; | |
unsigned char count = bucketSize; | |
while (ptr != nullptr) { | |
// ops += 1; | |
if (count == 0) { | |
if (pos == poles.size()) { | |
poles.push_back(nullptr); | |
} | |
poles[pos] = ptr; | |
count = bucketSize; | |
pos += 1; | |
} | |
for (int i = 0; i < std::max((unsigned char)1, ptr->len); ++i) { | |
if (ptr->data[i] == val && ptr->len > 0) { | |
ptr->remove(val); | |
len -= 1; | |
nRemoved += 1; | |
return true; | |
} | |
if (ptr->data[i] > val) { | |
return false; | |
} | |
} | |
ptr = ptr->next; | |
count -= 1; | |
} | |
return false; | |
} | |
template <typename T> | |
void UnrolledTeleportList<T>::rebuild() | |
{ | |
if (poles.size() == 0) { | |
return; | |
} | |
UnrolledNode<T>* ptr = poles[0]; | |
UnrolledNode<T>* wptr = poles[0]; | |
unsigned char w = 0; | |
auto maxSize = ptr->data.max_size(); | |
unsigned char count = bucketSize; | |
poles.clear(); | |
poles.push_back(wptr); | |
while (ptr != nullptr) { | |
if (count == 0) { | |
poles.push_back(wptr); | |
count = bucketSize; | |
} | |
auto len = ptr->len; | |
for (unsigned char i = 0; i < len; ++i) { | |
if (w >= maxSize) { | |
w = 0; | |
wptr = wptr->next; | |
count -= 1; | |
} | |
wptr->data[w] = ptr->data[i]; | |
w += 1; | |
wptr->len = w; | |
} | |
ptr = ptr->next; | |
} | |
auto tmp = wptr; | |
wptr = wptr->next; | |
tmp->next = nullptr; | |
while (wptr != nullptr) { | |
tmp = wptr->next; | |
delete wptr; | |
wptr = tmp; | |
} | |
nRemoved = 0; | |
} | |
template <typename T> | |
void UnrolledTeleportList<T>::clear() | |
{ | |
if (poles.size() == 0) { | |
return; | |
} | |
UnrolledNode<T>* ptr = poles[0]; | |
while (ptr != nullptr) { | |
auto tmp = ptr->next; | |
delete ptr; | |
ptr = tmp; | |
} | |
poles.clear(); | |
nRemoved = 0; | |
len = 0; | |
} | |
template <class SetType, typename FindFunc> | |
void benchmark(SetType& set, const std::vector<int>& nums, FindFunc func) | |
{ | |
set.clear(); | |
auto now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
set.insert(v); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " inserts: " << duration.count() << " ms\n"; | |
now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
if (!func(set, v)) { | |
assert(false); | |
} | |
} | |
end = std::chrono::high_resolution_clock::now(); | |
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " finds: " << duration.count() << " ms\n"; | |
} | |
template <class SetType, typename FindFunc> | |
void benchmark2(SetType& set, const std::vector<int>& nums, FindFunc func) | |
{ | |
set.clear(); | |
auto now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
set.insert(v); | |
volatile bool b = func(set, v); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " inserts and finds: " << duration.count() | |
<< " ms\n"; | |
} | |
template <class SetType, typename FindFunc, typename RemFunc> | |
void benchmark3(SetType& set, const std::vector<int>& nums, FindFunc findFunc, RemFunc remFunc) | |
{ | |
set.clear(); | |
auto now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
set.insert(v); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " inserts: " << duration.count() << " ms\n"; | |
now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
if (!findFunc(set, v)) { | |
assert(false); | |
} | |
} | |
end = std::chrono::high_resolution_clock::now(); | |
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " finds: " << duration.count() << " ms\n"; | |
now = std::chrono::high_resolution_clock::now(); | |
for (auto&& v : nums) { | |
if (!remFunc(set, v)) { | |
assert(false); | |
} | |
} | |
end = std::chrono::high_resolution_clock::now(); | |
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); | |
std::cout << "Time for " << nums.size() << " removes: " << duration.count() << " ms\n"; | |
assert(set.size() == 0); | |
} | |
bool stdSetFind(const std::set<int>& set, const int& v) { return set.find(v) != set.end(); } | |
bool telepFind(TeleportList<int>& set, const int& v) { return set.find(v); } | |
bool unrolledTelepFind(UnrolledTeleportList<int>& set, const int& v) { return set.find(v); } | |
bool stdSetRemove(std::set<int>& set, const int& v) { return set.erase(v) == 1; } | |
bool unrolledTelepRemove(UnrolledTeleportList<int>& set, const int& v) { return set.remove(v); } | |
void testTelep() | |
{ | |
TeleportList<int> set; | |
size_t numInserts = 100; | |
for (int i = 0; i < numInserts; ++i) { | |
set.insert(rand() % numInserts); | |
set.find(rand() % numInserts); | |
} | |
set.printAll(); | |
} | |
void testUnrolledTelep() | |
{ | |
UnrolledTeleportList<int> set; | |
size_t numInserts = 100; | |
size_t removed = 0; | |
for (int i = 0; i < numInserts; ++i) { | |
set.insert(rand() % numInserts); | |
set.find(rand() % numInserts); | |
} | |
set.printAll(); | |
for (int i = 0; i < numInserts; ++i) { | |
removed += set.remove(rand() % numInserts); | |
} | |
set.printAll(); | |
std::cout << set.size() << std::endl; | |
set.clear(); | |
// set.printAll(); | |
} | |
int main() | |
{ | |
// testUnrolledTelep(); | |
TeleportList<int> telep; | |
UnrolledTeleportList<int> utelep; | |
std::set<int> stdSet; | |
std::vector<int> nums; | |
size_t numInserts = 1000000; | |
for (int i = 0; i < numInserts; ++i) { | |
nums.push_back(i); | |
} | |
std::random_shuffle(nums.begin(), nums.end()); | |
std::cout << "Random inserts (std::set):\n"; | |
benchmark(stdSet, nums, &stdSetFind); | |
std::cout << "Random inserts (TeleportList):\n"; | |
benchmark(telep, nums, &telepFind); | |
std::cout << "Random inserts (UnrolledTeleportList):\n"; | |
benchmark(utelep, nums, &unrolledTelepFind); | |
std::cout << "\nRandom inserts/finds (std::set):\n"; | |
benchmark2(stdSet, nums, &stdSetFind); | |
std::cout << "Random inserts/finds (TeleportList):\n"; | |
benchmark2(telep, nums, &telepFind); | |
std::cout << "Random inserts/finds (UnrolledTeleportList):\n"; | |
benchmark2(utelep, nums, &unrolledTelepFind); | |
nums.clear(); | |
for (int i = 0; i < numInserts; ++i) { | |
nums.push_back(i); | |
} | |
std::cout << "\nSequential inserts (std::set):\n"; | |
benchmark(stdSet, nums, &stdSetFind); | |
std::cout << "Sequential inserts (TeleportList):\n"; | |
benchmark(telep, nums, &telepFind); | |
std::cout << "Sequential inserts (UnrolledTeleportList):\n"; | |
benchmark(utelep, nums, &unrolledTelepFind); | |
std::reverse(nums.begin(), nums.end()); | |
std::cout << "\nSequential inserts (reverse) (std::set):\n"; | |
benchmark(stdSet, nums, &stdSetFind); | |
std::cout << "Sequential inserts (reverse) (TeleportList):\n"; | |
benchmark(telep, nums, &telepFind); | |
std::cout << "Sequential inserts (reverse) (UnrolledTeleportList):\n"; | |
benchmark(utelep, nums, &unrolledTelepFind); | |
std::cout << "\nSequential inserts/finds (reverse) (std::set):\n"; | |
benchmark2(stdSet, nums, &stdSetFind); | |
std::cout << "Sequential inserts/finds (reverse) (TeleportList):\n"; | |
benchmark2(telep, nums, &telepFind); | |
std::cout << "Sequential inserts/finds (reverse) (UnrolledTeleportList):\n"; | |
benchmark2(utelep, nums, &unrolledTelepFind); | |
std::random_shuffle(nums.begin(), nums.end()); | |
std::cout << "\nRandom inserts (std::set):\n"; | |
benchmark3(stdSet, nums, &stdSetFind, &stdSetRemove); | |
std::cout << "Random inserts (UnrolledTeleportList):\n"; | |
benchmark3(utelep, nums, &unrolledTelepFind, &unrolledTelepRemove); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results on my computer (
nums.size() == 1000000
):