Last active
July 11, 2023 17:32
-
-
Save baioc/8d9b21de09dd5f876364923c97f95c89 to your computer and use it in GitHub Desktop.
D vs C++ set micro-benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module main; | |
import core.stdc.string : strcmp; | |
import core.stdc.time : clock, clock_t, CLOCKS_PER_SEC; | |
import core.stdc.stdlib : rand, srand; | |
import eris.math : Accumulator; | |
// ^ tracks a stream of values and computes some statistics (min, max, avg, var, std) in constant space | |
alias stringz = char*; | |
int main(int argc, const(stringz)* argv) { | |
import core.stdc.stdlib : atoi; | |
const(stringz)[] args = argv[0 .. argc]; | |
if (args.length < 3) return __LINE__; | |
const container = args[0]; | |
const element = args[1]; | |
int n = atoi(args[2]); | |
if (strcmp(container, "aaset") == 0) return benchmarkAASet(element, n); | |
if (strcmp(container, "redblack") == 0) return benchmarkRedBlackTree(element, n); | |
if (strcmp(container, "btree") == 0) return benchmarkBTree(element, n); | |
return __LINE__; | |
} | |
Accumulator benchmark( | |
scope void delegate(out clock_t, out clock_t) code, | |
uint replications = 30 | |
) { | |
Accumulator acc; | |
foreach (rep; 0 .. replications) { | |
clock_t begin, end; | |
version (D_BetterC) {} else { | |
import core.memory : GC; | |
GC.collect(); | |
} | |
code(begin, end); | |
double elapsedUs = (end - begin) * 1e6 / CLOCKS_PER_SEC; | |
acc += elapsedUs; | |
} | |
return acc; | |
} | |
nothrow @nogc pure | |
size_t fnv(const(ubyte)[] bytes) { | |
uint hash = 2166136261u; | |
foreach (b; bytes) { | |
hash ^= b; | |
hash *= 16777619u; | |
} | |
return hash; | |
} | |
struct String32 { | |
char[32] str; | |
alias str this; | |
nothrow @nogc: | |
int opCmp(in String32 other) const { | |
import core.stdc.string : strncmp; | |
return strncmp(this.str.ptr, other.str.ptr, str.sizeof); | |
} | |
size_t toHash() const @trusted { | |
return fnv((cast(const(ubyte)*)this.str)[0 .. str.sizeof]); | |
} | |
bool opEquals(in String32 other) const => (this.opCmp(other) == 0); | |
} | |
void randomize(int* output) { | |
*output = rand(); | |
} | |
void randomize(String32* output) { | |
import core.stdc.stdio : snprintf; | |
snprintf(output.ptr, output.length, "%031d", rand()); | |
} | |
int setBenchmarks(string name, alias Set, Element)(int n, uint seed = 0) { | |
if (n < 0) return __LINE__; | |
const n2 = n * 2; | |
import core.stdc.stdlib : calloc, free, exit; | |
auto buffer = cast(Element*) calloc(n2, Element.sizeof); | |
scope(exit) free(buffer); | |
if (n2 != 0 && buffer == null) return __LINE__; | |
srand(seed); | |
foreach (i; 0 .. n2) randomize(&buffer[i]); | |
Element[] xs = buffer[0 .. n]; | |
Element[] xs2 = buffer[0 .. n2]; | |
const upsert = benchmark((out begin, out end){ | |
auto set = Set!Element(); | |
begin = clock(); | |
foreach (ref x; xs) set.upsert(x); | |
end = clock(); | |
}); | |
const lookupFind = benchmark((out begin, out end){ | |
auto set = Set!Element(); | |
foreach (ref x; xs) set.upsert(x); | |
begin = clock(); | |
foreach (ref x; xs) { | |
if (x !in set) exit(__LINE__); | |
} | |
end = clock(); | |
}); | |
import std.algorithm.comparison : max; | |
int rss = 0; | |
const lookupFail = benchmark((out begin, out end){ | |
auto set = Set!Element(); | |
foreach (ref x; xs2) set.upsert(x); | |
rss = max(rss, cast(int) set.length); | |
foreach (ref x; xs) set.remove(x); | |
begin = clock(); | |
foreach (ref x; xs) { | |
if (x in set) exit(__LINE__); | |
} | |
end = clock(); | |
}); | |
const remove = benchmark((out begin, out end){ | |
auto set = Set!Element(); | |
foreach (ref x; xs) set.upsert(x); | |
begin = clock(); | |
foreach (ref x; xs) set.remove(x); | |
end = clock(); | |
}); | |
import std.meta : AliasSeq; | |
static foreach (op; AliasSeq!(upsert, lookupFind, lookupFail, remove)) {{ | |
import core.stdc.stdio : printf; | |
enum string format = | |
"container=" ~ name ~ | |
"\telement=" ~ Element.stringof ~ | |
"\toperation=" ~ op.stringof ~ | |
"\tn=%d\tres=%d\tavg=%.3f\tstd=%.3f\tmin=%.3f\tmax=%.3f\n"; | |
printf(format, n, rss, op.mean, op.std, op.min, op.max); | |
}} | |
return 0; | |
} | |
struct AASet(T) { | |
import eris.core : Unit; | |
Unit[T] aa; | |
void upsert(T x) { aa[x] = Unit.init; } | |
bool opBinaryRight(string op : "in")(in T x) const => (x in aa) != null; | |
void remove(in T x) { aa.remove(x); } | |
@property size_t length() const => aa.length; | |
} | |
int benchmarkAASet(const(stringz) element, int n) { | |
version (D_BetterC) { | |
assert(0, "no AAs in betterC mode"); | |
} else { | |
if (strcmp(element, "int") == 0) { | |
return setBenchmarks!("AA-as-set", AASet, int)(n); | |
} else if (strcmp(element, "String32") == 0) { | |
return setBenchmarks!("AA-as-set", AASet, String32)(n); | |
} else { | |
return __LINE__; | |
} | |
} | |
} | |
struct RedBlackTree(T) { | |
import std.container.rbtree; | |
std.container.rbtree.RedBlackTree!T rbt; | |
static RedBlackTree opCall() { RedBlackTree t = { rbt: redBlackTree!T() }; return t; } | |
void upsert(T x) { rbt.insert(x); } | |
bool opBinaryRight(string op : "in")(in T x) const => x in rbt; | |
void remove(in T x) { rbt.removeKey(x); } | |
@property size_t length() const => rbt.length; | |
} | |
int benchmarkRedBlackTree(const(stringz) element, int n) { | |
version (D_BetterC) { | |
assert(0, "no std.container.rbtree in betterC mode"); | |
} else { | |
if (strcmp(element, "int") == 0) { | |
return setBenchmarks!("std.container.rbtree", RedBlackTree, int)(n); | |
} else if (strcmp(element, "String32") == 0) { | |
return setBenchmarks!("std.container.rbtree", RedBlackTree, String32)(n); | |
} else { | |
return __LINE__; | |
} | |
} | |
} | |
struct BTree(T) { | |
import eris.btree; | |
eris.btree.BTree!T btree; | |
~this() { btree.clear(); } | |
@disable this(this); | |
void upsert(T x) { btree.put(x); } | |
bool opBinaryRight(string op : "in")(in T x) const => x in btree; | |
void remove(in T x) { btree.remove(x); } | |
@property size_t length() const => btree.length; | |
} | |
int benchmarkBTree(const(stringz) element, int n) { | |
if (strcmp(element, "int") == 0) { | |
return setBenchmarks!("eris.btree", BTree, int)(n); | |
} else if (strcmp(element, "String32") == 0) { | |
return setBenchmarks!("eris.btree", BTree, String32)(n); | |
} else { | |
return __LINE__; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#include <time.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <set> | |
#include <unordered_set> | |
#include <functional> | |
#include <memory> | |
extern "C" { | |
#include "eris.h" | |
// ^ C wrapper for import eris.math.Accumulator | |
} | |
size_t fnv(const void *ptr, size_t n) { | |
uint32_t hash = 2166136261u; | |
const unsigned char *bytes = static_cast<const unsigned char *>(ptr); | |
for (size_t i = 0; i < n; ++i) { | |
hash ^= *bytes++; | |
hash *= 16777619; | |
} | |
return hash; | |
} | |
Accumulator benchmark( | |
std::function<void (clock_t&, clock_t&)> code, | |
unsigned replications = 30 | |
) { | |
auto acc = ACCUMULATOR_INIT; | |
for (long rep = 0; rep < replications; ++rep) { | |
clock_t begin, end; | |
code(begin, end); | |
double elapsedUs = (end - begin) * 1e6 / CLOCKS_PER_SEC; | |
accumulator_put(&acc, elapsedUs); | |
} | |
return acc; | |
} | |
struct String32 { | |
char chars[32]; | |
bool operator==(const String32 &other) const { | |
return strncmp(this->chars, other.chars, sizeof(this->chars)) == 0; | |
} | |
struct Less { | |
bool operator()(const String32& a, const String32& b) const { | |
return strncmp(a.chars, b.chars, sizeof(a.chars)) < 0; | |
} | |
}; | |
struct Hash { | |
bool operator()(const String32& self) const { | |
return fnv(self.chars, sizeof(self.chars)); | |
} | |
}; | |
}; | |
void randomize(int *output) { | |
*output = rand(); | |
} | |
void randomize(String32 *output) { | |
snprintf(output->chars, sizeof(output->chars), "%031d", rand()); | |
} | |
template <template <typename, typename> class Set, typename T, typename Aux> | |
int setBenchmarks(const char *container, const char *element, int n, unsigned seed = 0) { | |
if (n < 0) return __LINE__; | |
const auto n2 = n * 2; | |
auto buffer = std::make_unique<T[]>(n2); | |
if (buffer == nullptr) return __LINE__; | |
srand(seed); | |
for (int i = 0; i < n2; ++i) randomize(&buffer[i]); | |
auto upsert = benchmark([&](clock_t& begin, clock_t& end){ | |
auto set = Set<T,Aux>(); | |
begin = clock(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
set.upsert(x); | |
} | |
end = clock(); | |
}); | |
auto lookupFind = benchmark([&](clock_t& begin, clock_t& end){ | |
auto set = Set<T,Aux>(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
set.upsert(x); | |
} | |
begin = clock(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
if (!set.contains(x)) exit(__LINE__); | |
} | |
end = clock(); | |
}); | |
int rss = 0; | |
auto lookupFail = benchmark([&](clock_t& begin, clock_t& end){ | |
auto set = Set<T,Aux>(); | |
for (int i = 0; i < n2; ++i) { | |
auto& x = buffer[i]; | |
set.upsert(x); | |
} | |
rss = set.length() > rss ? set.length() : rss; | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
set.remove(x); | |
} | |
begin = clock(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
if (set.contains(x)) exit(__LINE__); | |
} | |
end = clock(); | |
}); | |
auto remove = benchmark([&](clock_t& begin, clock_t& end){ | |
auto set = Set<T,Aux>(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
set.upsert(x); | |
} | |
begin = clock(); | |
for (int i = 0; i < n; ++i) { | |
auto& x = buffer[i]; | |
set.remove(x); | |
} | |
end = clock(); | |
}); | |
#define PRINT_RESULT(OP) \ | |
printf( \ | |
"container=%s\telement=%s\toperation=%s\tn=%d\tres=%d\tavg=%.3f\tstd=%.3f\tmin=%.3f\tmax=%.3f\n", \ | |
container, element, #OP, n, rss, accumulator_mean(&(OP)), \ | |
accumulator_std(&(OP)), accumulator_min(&(OP)), accumulator_max(&(OP)) \ | |
) | |
PRINT_RESULT(upsert); | |
PRINT_RESULT(lookupFind); | |
PRINT_RESULT(lookupFail); | |
PRINT_RESULT(remove); | |
#undef PRINT_RESULT | |
return 0; | |
} | |
template <typename T, typename Comp> | |
struct StdSet { | |
std::set<T,Comp> treeSet; | |
void upsert(T x) { treeSet.insert(std::move(x)); } | |
bool contains(const T& x) const { return treeSet.count(x) != 0; } | |
void remove(const T& x) { treeSet.erase(x); } | |
std::size_t length() const { return treeSet.size(); } | |
}; | |
int benchmarkStdSet(const char *element, int n) { | |
if (strcmp(element, "int") == 0) { | |
return setBenchmarks<StdSet, int, std::less<int>>("std::set", "int", n); | |
} else if (strcmp(element, "String32") == 0) { | |
return setBenchmarks<StdSet, String32, String32::Less>("std::set", "String32", n); | |
} | |
return __LINE__; | |
} | |
template <typename T, typename Hash> | |
struct StdUnorderedSet { | |
std::unordered_set<T,Hash> hashSet; | |
void upsert(T x) { hashSet.insert(std::move(x)); } | |
bool contains(const T& x) const { return hashSet.count(x) != 0; } | |
void remove(const T& x) { hashSet.erase(x); } | |
std::size_t length() const { return hashSet.size(); } | |
}; | |
int benchmarkStdUnorderedSet(const char *element, int n) { | |
if (strcmp(element, "int") == 0) { | |
return setBenchmarks<StdUnorderedSet, int, std::hash<int>>("std::unordered_set", "int", n); | |
} else if (strcmp(element, "String32") == 0) { | |
return setBenchmarks<StdUnorderedSet, String32, String32::Hash>("std::unordered_set", "String32", n); | |
} | |
return __LINE__; | |
} | |
int main(int argc, char const *argv[]) { | |
if (argc <= 3) return __LINE__; | |
const char *container = argv[1]; | |
const char *element = argv[2]; | |
int n = atoi(argv[3]); | |
if (strcmp(container, "std_set") == 0) return benchmarkStdSet(element, n); | |
if (strcmp(container, "std_unordered_set") == 0) return benchmarkStdUnorderedSet(element, n); | |
return __LINE__; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment