skeeto/README.md

## README.md

      
    Raw
  

              README.md
            
          
    MSI hash table benchmarks

These are benchmarks demonstrating claims made in the quick and practical
"MSI" hash table. It pits an MSI hash table set in C using an
integer permutation hash and an AES-NI hash, a C++ std::set, and Go
map[string]struct{}. Inputs are strings of length 1–8.
My results on an i7-8650U, GCC 12.0.1 (libstdc++), Clang 14.0.6 (libc++):
                   Time (s)  Memory (MiB)
GCC MSI/perm       0.27      64
Clang MSI/perm     0.27      "
GCC MSI/AES-NI     0.32      "
Clang MSI/AES-NI   0.32      "
GCC std::set       1.08      320
Clang std::set     1.43      320
Go 1.19            1.46      320

The clear winner is MSI with an integer permutation, followed by MSI with
AES-NI. Overall, MSI is an order of magnitude faster and smaller than a
generic hash table, in large part because it's custom-tailored for the
context.
As an added bonus, UBSan and ASan have practically no performance impact
on the MSI benchmarks, and debug builds (-O0) are only about a 2x
slowdown (i.e. still faster than optimized C++), vs. a 10x slowdown for
C++.
Likely objection: The C++ and Go versions use full strings and dynamically
allocate, while the C version uses fixed-length strings and table, so it's
not fair. However, that's the whole point. The C++ and Go examples are
written idiomatically for this problem, but the use of MSI in the C
version is part of a holistic strategy — something not possible
with generic hash tables — which is also why it's longer.

  
## bench.c
// C hash table benchmarks
// Using an integer hash:
//   $ cc -O3 bench.c
// Using AES-NI:
//   $ cc -O3 -maes bench.c
#include <assert.h>
#include <stdint.h>
#include <string.h>
#if __amd64__ && __AES__
#  include <immintrin.h>
#  include <wmmintrin.h>
#endif

#define N 22

struct val { char s[8]; };

static struct val
make(int x)
{
    struct val v = {{0}};
    int len = 1 + (x>9) + (x>99) + (x>999) + (x>9999) + (x>99999) +
              (x>999999) + (x>9999999);
    switch (len) {
        case 8: v.s[7] = '0' + x%10; x /= 10;  // fallthrough
        case 7: v.s[6] = '0' + x%10; x /= 10;  // fallthrough
        case 6: v.s[5] = '0' + x%10; x /= 10;  // fallthrough
        case 5: v.s[4] = '0' + x%10; x /= 10;  // fallthrough
        case 4: v.s[3] = '0' + x%10; x /= 10;  // fallthrough
        case 3: v.s[2] = '0' + x%10; x /= 10;  // fallthrough
        case 2: v.s[1] = '0' + x%10; x /= 10;  // fallthrough
        case 1: v.s[0] = '0' + x%10;
    }
    return v;
}

static uint64_t
hash(struct val v)
{
    #if __amd64__ && __AES__
    __m128i h = _mm_loadu_si64(v.s);
    h = _mm_aesenc_si128(h, h);
    h = _mm_aesenc_si128(h, h);
    h = _mm_aesenc_si128(h, h);
    return _mm_cvtsi128_si64(h);
    #else
    uint64_t h;
    memcpy(&h, v.s, 8);
    return h * 1111111111111111111;
    #endif
}

static int
equal(struct val a, struct val b)
{
    return !memcmp(a.s, b.s, 8);
}

static int
next(uint64_t hash, int exp, int i)
{
    unsigned mask = (1 << exp) - 1;
    unsigned step = hash>>(64 - exp) | 1;
    return (i + step) & mask;
}

int main(void)
{
    int len = 0;
    static struct val ht[1L<<(N+1)];
    for (int i = 0; i < 1<<N; i++) {
        struct val v = make(i);
        uint64_t h = hash(v);
        for (int i = h;;) {
            i = next(h, N+1, i);
            if (!ht[i].s[0]) {
                ht[i] = v;
                len++;
                break;
            }
            assert(!equal(ht[i], v));
        }
    }
    assert(len == 1<<N);
}

## bench.cpp
// C++ hash table benchmark
#include <assert.h>
#include <set>
#include <string>

#define N 22

int main()
{
    int len = 0;
    std::set<std::string> ht;
    for (int i = 0; i < 1<<N; i++) {
        auto v = std::to_string(i);
        auto r = ht.insert(v);
        len += r.second;
    }
    assert(len == 1<<N);
    return 0;
}

## bench.go
// Go hash table benchmark
package main

import "strconv"

func main() {
	const n = 22
	seen := make(map[string]struct{})
	for i := 0; i < 1<<n; i++ {
		v := strconv.Itoa(i)
		seen[v] = struct{}{}
	}
	if len(seen) != 1<<22 {
		panic(len(seen))
	}
}
	// C hash table benchmarks
	// Using an integer hash:
	// $ cc -O3 bench.c
	// Using AES-NI:
	// $ cc -O3 -maes bench.c
	#include <assert.h>
	#include <stdint.h>
	#include <string.h>
	#if __amd64__ && __AES__
	# include <immintrin.h>
	# include <wmmintrin.h>
	#endif

	#define N 22

	struct val { char s[8]; };

	static struct val
	make(int x)
	{
	struct val v = {{0}};
	int len = 1 + (x>9) + (x>99) + (x>999) + (x>9999) + (x>99999) +
	(x>999999) + (x>9999999);
	switch (len) {
	case 8: v.s[7] = '0' + x%10; x /= 10; // fallthrough
	case 7: v.s[6] = '0' + x%10; x /= 10; // fallthrough
	case 6: v.s[5] = '0' + x%10; x /= 10; // fallthrough
	case 5: v.s[4] = '0' + x%10; x /= 10; // fallthrough
	case 4: v.s[3] = '0' + x%10; x /= 10; // fallthrough
	case 3: v.s[2] = '0' + x%10; x /= 10; // fallthrough
	case 2: v.s[1] = '0' + x%10; x /= 10; // fallthrough
	case 1: v.s[0] = '0' + x%10;
	}
	return v;
	}

	static uint64_t
	hash(struct val v)
	{
	#if __amd64__ && __AES__
	__m128i h = _mm_loadu_si64(v.s);
	h = _mm_aesenc_si128(h, h);
	h = _mm_aesenc_si128(h, h);
	h = _mm_aesenc_si128(h, h);
	return _mm_cvtsi128_si64(h);
	#else
	uint64_t h;
	memcpy(&h, v.s, 8);
	return h * 1111111111111111111;
	#endif
	}

	static int
	equal(struct val a, struct val b)
	{
	return !memcmp(a.s, b.s, 8);
	}

	static int
	next(uint64_t hash, int exp, int i)
	{
	unsigned mask = (1 << exp) - 1;
	unsigned step = hash>>(64 - exp) \| 1;
	return (i + step) & mask;
	}

	int main(void)
	{
	int len = 0;
	static struct val ht[1L<<(N+1)];
	for (int i = 0; i < 1<<N; i++) {
	struct val v = make(i);
	uint64_t h = hash(v);
	for (int i = h;;) {
	i = next(h, N+1, i);
	if (!ht[i].s[0]) {
	ht[i] = v;
	len++;
	break;
	}
	assert(!equal(ht[i], v));
	}
	}
	assert(len == 1<<N);
	}
	// C++ hash table benchmark
	#include <assert.h>
	#include <set>
	#include <string>

	#define N 22

	int main()
	{
	int len = 0;
	std::set<std::string> ht;
	for (int i = 0; i < 1<<N; i++) {
	auto v = std::to_string(i);
	auto r = ht.insert(v);
	len += r.second;
	}
	assert(len == 1<<N);
	return 0;
	}
	// Go hash table benchmark
	package main

	import "strconv"

	func main() {
	const n = 22
	seen := make(map[string]struct{})
	for i := 0; i < 1<<n; i++ {
	v := strconv.Itoa(i)
	seen[v] = struct{}{}
	}
	if len(seen) != 1<<22 {
	panic(len(seen))
	}
	}