Skip to content

Instantly share code, notes, and snippets.

@baioc
Last active July 11, 2023 17:32
Show Gist options
  • Save baioc/8d9b21de09dd5f876364923c97f95c89 to your computer and use it in GitHub Desktop.
Save baioc/8d9b21de09dd5f876364923c97f95c89 to your computer and use it in GitHub Desktop.
D vs C++ set micro-benchmarks
module main;
import core.stdc.string : strcmp;
import core.stdc.time : clock, clock_t, CLOCKS_PER_SEC;
import core.stdc.stdlib : rand, srand;
import eris.math : Accumulator;
// ^ tracks a stream of values and computes some statistics (min, max, avg, var, std) in constant space
alias stringz = char*;
int main(int argc, const(stringz)* argv) {
import core.stdc.stdlib : atoi;
const(stringz)[] args = argv[0 .. argc];
if (args.length < 3) return __LINE__;
const container = args[0];
const element = args[1];
int n = atoi(args[2]);
if (strcmp(container, "aaset") == 0) return benchmarkAASet(element, n);
if (strcmp(container, "redblack") == 0) return benchmarkRedBlackTree(element, n);
if (strcmp(container, "btree") == 0) return benchmarkBTree(element, n);
return __LINE__;
}
Accumulator benchmark(
scope void delegate(out clock_t, out clock_t) code,
uint replications = 30
) {
Accumulator acc;
foreach (rep; 0 .. replications) {
clock_t begin, end;
version (D_BetterC) {} else {
import core.memory : GC;
GC.collect();
}
code(begin, end);
double elapsedUs = (end - begin) * 1e6 / CLOCKS_PER_SEC;
acc += elapsedUs;
}
return acc;
}
nothrow @nogc pure
size_t fnv(const(ubyte)[] bytes) {
uint hash = 2166136261u;
foreach (b; bytes) {
hash ^= b;
hash *= 16777619u;
}
return hash;
}
struct String32 {
char[32] str;
alias str this;
nothrow @nogc:
int opCmp(in String32 other) const {
import core.stdc.string : strncmp;
return strncmp(this.str.ptr, other.str.ptr, str.sizeof);
}
size_t toHash() const @trusted {
return fnv((cast(const(ubyte)*)this.str)[0 .. str.sizeof]);
}
bool opEquals(in String32 other) const => (this.opCmp(other) == 0);
}
void randomize(int* output) {
*output = rand();
}
void randomize(String32* output) {
import core.stdc.stdio : snprintf;
snprintf(output.ptr, output.length, "%031d", rand());
}
int setBenchmarks(string name, alias Set, Element)(int n, uint seed = 0) {
if (n < 0) return __LINE__;
const n2 = n * 2;
import core.stdc.stdlib : calloc, free, exit;
auto buffer = cast(Element*) calloc(n2, Element.sizeof);
scope(exit) free(buffer);
if (n2 != 0 && buffer == null) return __LINE__;
srand(seed);
foreach (i; 0 .. n2) randomize(&buffer[i]);
Element[] xs = buffer[0 .. n];
Element[] xs2 = buffer[0 .. n2];
const upsert = benchmark((out begin, out end){
auto set = Set!Element();
begin = clock();
foreach (ref x; xs) set.upsert(x);
end = clock();
});
const lookupFind = benchmark((out begin, out end){
auto set = Set!Element();
foreach (ref x; xs) set.upsert(x);
begin = clock();
foreach (ref x; xs) {
if (x !in set) exit(__LINE__);
}
end = clock();
});
import std.algorithm.comparison : max;
int rss = 0;
const lookupFail = benchmark((out begin, out end){
auto set = Set!Element();
foreach (ref x; xs2) set.upsert(x);
rss = max(rss, cast(int) set.length);
foreach (ref x; xs) set.remove(x);
begin = clock();
foreach (ref x; xs) {
if (x in set) exit(__LINE__);
}
end = clock();
});
const remove = benchmark((out begin, out end){
auto set = Set!Element();
foreach (ref x; xs) set.upsert(x);
begin = clock();
foreach (ref x; xs) set.remove(x);
end = clock();
});
import std.meta : AliasSeq;
static foreach (op; AliasSeq!(upsert, lookupFind, lookupFail, remove)) {{
import core.stdc.stdio : printf;
enum string format =
"container=" ~ name ~
"\telement=" ~ Element.stringof ~
"\toperation=" ~ op.stringof ~
"\tn=%d\tres=%d\tavg=%.3f\tstd=%.3f\tmin=%.3f\tmax=%.3f\n";
printf(format, n, rss, op.mean, op.std, op.min, op.max);
}}
return 0;
}
struct AASet(T) {
import eris.core : Unit;
Unit[T] aa;
void upsert(T x) { aa[x] = Unit.init; }
bool opBinaryRight(string op : "in")(in T x) const => (x in aa) != null;
void remove(in T x) { aa.remove(x); }
@property size_t length() const => aa.length;
}
int benchmarkAASet(const(stringz) element, int n) {
version (D_BetterC) {
assert(0, "no AAs in betterC mode");
} else {
if (strcmp(element, "int") == 0) {
return setBenchmarks!("AA-as-set", AASet, int)(n);
} else if (strcmp(element, "String32") == 0) {
return setBenchmarks!("AA-as-set", AASet, String32)(n);
} else {
return __LINE__;
}
}
}
struct RedBlackTree(T) {
import std.container.rbtree;
std.container.rbtree.RedBlackTree!T rbt;
static RedBlackTree opCall() { RedBlackTree t = { rbt: redBlackTree!T() }; return t; }
void upsert(T x) { rbt.insert(x); }
bool opBinaryRight(string op : "in")(in T x) const => x in rbt;
void remove(in T x) { rbt.removeKey(x); }
@property size_t length() const => rbt.length;
}
int benchmarkRedBlackTree(const(stringz) element, int n) {
version (D_BetterC) {
assert(0, "no std.container.rbtree in betterC mode");
} else {
if (strcmp(element, "int") == 0) {
return setBenchmarks!("std.container.rbtree", RedBlackTree, int)(n);
} else if (strcmp(element, "String32") == 0) {
return setBenchmarks!("std.container.rbtree", RedBlackTree, String32)(n);
} else {
return __LINE__;
}
}
}
struct BTree(T) {
import eris.btree;
eris.btree.BTree!T btree;
~this() { btree.clear(); }
@disable this(this);
void upsert(T x) { btree.put(x); }
bool opBinaryRight(string op : "in")(in T x) const => x in btree;
void remove(in T x) { btree.remove(x); }
@property size_t length() const => btree.length;
}
int benchmarkBTree(const(stringz) element, int n) {
if (strcmp(element, "int") == 0) {
return setBenchmarks!("eris.btree", BTree, int)(n);
} else if (strcmp(element, "String32") == 0) {
return setBenchmarks!("eris.btree", BTree, String32)(n);
} else {
return __LINE__;
}
}
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
#include <stdint.h>
#include <set>
#include <unordered_set>
#include <functional>
#include <memory>
extern "C" {
#include "eris.h"
// ^ C wrapper for import eris.math.Accumulator
}
size_t fnv(const void *ptr, size_t n) {
uint32_t hash = 2166136261u;
const unsigned char *bytes = static_cast<const unsigned char *>(ptr);
for (size_t i = 0; i < n; ++i) {
hash ^= *bytes++;
hash *= 16777619;
}
return hash;
}
Accumulator benchmark(
std::function<void (clock_t&, clock_t&)> code,
unsigned replications = 30
) {
auto acc = ACCUMULATOR_INIT;
for (long rep = 0; rep < replications; ++rep) {
clock_t begin, end;
code(begin, end);
double elapsedUs = (end - begin) * 1e6 / CLOCKS_PER_SEC;
accumulator_put(&acc, elapsedUs);
}
return acc;
}
struct String32 {
char chars[32];
bool operator==(const String32 &other) const {
return strncmp(this->chars, other.chars, sizeof(this->chars)) == 0;
}
struct Less {
bool operator()(const String32& a, const String32& b) const {
return strncmp(a.chars, b.chars, sizeof(a.chars)) < 0;
}
};
struct Hash {
bool operator()(const String32& self) const {
return fnv(self.chars, sizeof(self.chars));
}
};
};
void randomize(int *output) {
*output = rand();
}
void randomize(String32 *output) {
snprintf(output->chars, sizeof(output->chars), "%031d", rand());
}
template <template <typename, typename> class Set, typename T, typename Aux>
int setBenchmarks(const char *container, const char *element, int n, unsigned seed = 0) {
if (n < 0) return __LINE__;
const auto n2 = n * 2;
auto buffer = std::make_unique<T[]>(n2);
if (buffer == nullptr) return __LINE__;
srand(seed);
for (int i = 0; i < n2; ++i) randomize(&buffer[i]);
auto upsert = benchmark([&](clock_t& begin, clock_t& end){
auto set = Set<T,Aux>();
begin = clock();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
set.upsert(x);
}
end = clock();
});
auto lookupFind = benchmark([&](clock_t& begin, clock_t& end){
auto set = Set<T,Aux>();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
set.upsert(x);
}
begin = clock();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
if (!set.contains(x)) exit(__LINE__);
}
end = clock();
});
int rss = 0;
auto lookupFail = benchmark([&](clock_t& begin, clock_t& end){
auto set = Set<T,Aux>();
for (int i = 0; i < n2; ++i) {
auto& x = buffer[i];
set.upsert(x);
}
rss = set.length() > rss ? set.length() : rss;
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
set.remove(x);
}
begin = clock();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
if (set.contains(x)) exit(__LINE__);
}
end = clock();
});
auto remove = benchmark([&](clock_t& begin, clock_t& end){
auto set = Set<T,Aux>();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
set.upsert(x);
}
begin = clock();
for (int i = 0; i < n; ++i) {
auto& x = buffer[i];
set.remove(x);
}
end = clock();
});
#define PRINT_RESULT(OP) \
printf( \
"container=%s\telement=%s\toperation=%s\tn=%d\tres=%d\tavg=%.3f\tstd=%.3f\tmin=%.3f\tmax=%.3f\n", \
container, element, #OP, n, rss, accumulator_mean(&(OP)), \
accumulator_std(&(OP)), accumulator_min(&(OP)), accumulator_max(&(OP)) \
)
PRINT_RESULT(upsert);
PRINT_RESULT(lookupFind);
PRINT_RESULT(lookupFail);
PRINT_RESULT(remove);
#undef PRINT_RESULT
return 0;
}
template <typename T, typename Comp>
struct StdSet {
std::set<T,Comp> treeSet;
void upsert(T x) { treeSet.insert(std::move(x)); }
bool contains(const T& x) const { return treeSet.count(x) != 0; }
void remove(const T& x) { treeSet.erase(x); }
std::size_t length() const { return treeSet.size(); }
};
int benchmarkStdSet(const char *element, int n) {
if (strcmp(element, "int") == 0) {
return setBenchmarks<StdSet, int, std::less<int>>("std::set", "int", n);
} else if (strcmp(element, "String32") == 0) {
return setBenchmarks<StdSet, String32, String32::Less>("std::set", "String32", n);
}
return __LINE__;
}
template <typename T, typename Hash>
struct StdUnorderedSet {
std::unordered_set<T,Hash> hashSet;
void upsert(T x) { hashSet.insert(std::move(x)); }
bool contains(const T& x) const { return hashSet.count(x) != 0; }
void remove(const T& x) { hashSet.erase(x); }
std::size_t length() const { return hashSet.size(); }
};
int benchmarkStdUnorderedSet(const char *element, int n) {
if (strcmp(element, "int") == 0) {
return setBenchmarks<StdUnorderedSet, int, std::hash<int>>("std::unordered_set", "int", n);
} else if (strcmp(element, "String32") == 0) {
return setBenchmarks<StdUnorderedSet, String32, String32::Hash>("std::unordered_set", "String32", n);
}
return __LINE__;
}
int main(int argc, char const *argv[]) {
if (argc <= 3) return __LINE__;
const char *container = argv[1];
const char *element = argv[2];
int n = atoi(argv[3]);
if (strcmp(container, "std_set") == 0) return benchmarkStdSet(element, n);
if (strcmp(container, "std_unordered_set") == 0) return benchmarkStdUnorderedSet(element, n);
return __LINE__;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment