Skip to content

Instantly share code, notes, and snippets.

@i-saint
Created January 8, 2023 03:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save i-saint/50dfcf094c543794889be3f371099076 to your computer and use it in GitHub Desktop.
Save i-saint/50dfcf094c543794889be3f371099076 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <string>
#include <set>
#include <chrono>
#include <random>
#include <algorithm>
// dst: std::set, std::map, etc
// begin & end: iterator of sorted items
template<class Tree, class Iterator>
inline void insert_sorted_sequence(Tree& dst, Iterator begin, Iterator end)
{
if (begin == end)
return;
auto pos = dst.begin();
for (auto it = begin; it != end; ++it) {
pos = dst.emplace_hint(pos, *it);
++pos;
}
}
template<class Tree>
inline void merge_optimized(Tree& dst, Tree& src)
{
auto pos = dst.begin();
while (!src.empty()) {
auto node = src.extract(src.begin());
pos = dst.insert(pos, std::move(node));
++pos;
}
}
// for benchmark
template<class Set>
void gen_random_string(Set& container, int data_count, int seed)
{
std::mt19937 engine(seed);
std::uniform_int_distribution<uint32_t> dist(0x0, 0xf);
char buf[128]{};
for (int i = 0; i < data_count; ++i) {
for (int j = 0; j < 127; ++j)
sprintf(buf + j, "%x", dist(engine));
container.insert(buf);
}
}
class Timer
{
public:
Timer()
{
m_start = now();
}
// in millisec
double elapsed_ms()
{
return double(now() - m_start) / 1000000.0;
}
using nanosec = uint64_t;
static nanosec now()
{
using namespace std::chrono;
return duration_cast<nanoseconds>(steady_clock::now().time_since_epoch()).count();
}
private:
nanosec m_start;
};
template<class T>
struct less
{
static int s_count;
bool operator()(const T& a, const T& b) const
{
++s_count;
return a < b;
}
};
template<class T>
int less<T>::s_count;
//#define LinearAllocator
int g_alloc_count = 0;
#ifdef LinearAllocator
const size_t g_heap_size = 1024 * 1024 * 512;
size_t g_heap_used = 0;
char* g_heap = nullptr;
void* operator new(size_t size)
{
++g_alloc_count;
if (!g_heap) {
g_heap = (char*)malloc(g_heap_size);
}
auto ret = g_heap + g_heap_used;
g_heap_used += size;
if (g_heap_used >= g_heap_size) {
throw std::bad_alloc();
}
return ret;
}
void operator delete(void* addr)
{
// do nothing
}
void operator delete(void* addr, size_t)
{
// do nothing
}
#else
void* operator new(size_t size)
{
++g_alloc_count;
return malloc(size);
}
void operator delete(void* addr)
{
free(addr);
}
void operator delete(void* addr, size_t)
{
free(addr);
}
#endif
int main()
{
using Compare = less<std::string>;
using Compare = less<std::string>;
using Set = std::set<std::string, Compare>;
Set data1, data2;
const int data_count = 100000;
gen_random_string(data1, data_count, 0);
gen_random_string(data2, data_count, 1);
Set result1, result2, result3, result4, result5;
auto reset_state = []() {
Compare::s_count = 0;
g_alloc_count = 0;
};
{
result1 = data1;
reset_state();
Timer timer;
result1.insert(data2.begin(), data2.end());
printf("std::set::insert(): %.2lfms, %d compare, %d alloc\n", timer.elapsed_ms(), Compare::s_count, g_alloc_count);
}
{
result2 = data1;
Set tmp2 = data2;
reset_state();
Timer timer;
result2.merge(std::move(tmp2));
printf("std::set::merge(): %.2lfms, %d compare, %d alloc\n", timer.elapsed_ms(), Compare::s_count, g_alloc_count);
}
{
reset_state();
Timer timer;
std::set_union(data1.begin(), data1.end(), data2.begin(), data2.end(), std::inserter(result3, result3.begin()));
printf("std::set_union(): %.2lfms, %d compare, %d alloc\n", timer.elapsed_ms(), Compare::s_count, g_alloc_count);
}
{
result4 = data1;
reset_state();
Timer timer;
insert_sorted_sequence(result4, data2.begin(), data2.end());
printf("insert_sorted_sequence(): %.2lfms, %d compare, %d alloc\n", timer.elapsed_ms(), Compare::s_count, g_alloc_count);
}
{
result5 = data1;
Set tmp2 = data2;
reset_state();
Timer timer;
merge_optimized(result5, tmp2);
printf("merge_optimized(): %.2lfms, %d compare, %d alloc\n", timer.elapsed_ms(), Compare::s_count, g_alloc_count);
}
#define expect(Body) if(!(Body)) { throw std::runtime_error(#Body); }
expect(result1 == result2);
expect(result1 == result3);
expect(result1 == result4);
expect(result1 == result5);
#undef expect
}
// outputs:
// std::set::insert(): 60.78ms, 2212207 compare, 200000 alloc
// std::set::merge(): 35.13ms, 2112207 compare, 0 alloc
// std::set_union(): 146.14ms, 199999 compare, 400000 alloc
// insert_sorted_sequence(): 67.78ms, 763326 compare, 200000 alloc
// merge_optimized(): 29.25ms, 763326 compare, 0 alloc
// see also:
// https://qiita.com/i_saint/items/a8bdce5146bb38e69f72
// https://godbolt.org/z/38Exrxj7E
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment