Skip to content

Instantly share code, notes, and snippets.

@mjsabby
Created February 12, 2023 10:22
Show Gist options
  • Save mjsabby/d7baf64d524176ee35f69ffa2c278179 to your computer and use it in GitHub Desktop.
Save mjsabby/d7baf64d524176ee35f69ffa2c278179 to your computer and use it in GitHub Desktop.
Hash Displace Algorithm
namespace PerfectHash
{
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static System.Numerics.BitOperations;
public static class PerfectHash
{
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public static int Lookup(ReadOnlySpan<byte> key, ReadOnlySpan<int> values, ReadOnlySpan<int> seeds)
{
ulong hash;
unsafe
{
fixed (byte* ptr = &MemoryMarshal.GetReference(key))
{
hash = Hash64(ptr, key.Length);
}
}
var size = (ulong)values.Length;
var index = hash & (size - 1);
var seed = seeds[(int)index];
if (seed < 0)
{
return values[0 - seed - 1];
}
var x = (ulong)seed + hash;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
index = (x * 0x2545F4914F6CDD1D) & (size - 1);
return values[(int)index];
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static (int[] values, int[] seeds) Create(byte[][] keys)
{
ulong size = 1;
while (size < (ulong)keys.Length)
{
size *= 2;
}
var h = new List<Tuple<int, ulong>>[size];
for (int i = 0; i < h.Length; ++i)
{
h[i] = new List<Tuple<int, ulong>>();
}
for (int i = 0; i < keys.Length; ++i)
{
var k = keys[i];
ulong hash;
unsafe
{
fixed (byte* ptr = &MemoryMarshal.GetReference<byte>(k))
{
hash = Hash64(ptr, k.Length);
}
}
h[hash % size].Add(Tuple.Create(i + 1, hash));
}
Array.Sort(h, (i, j) => j.Count - i.Count);
var values = new int[size];
var seeds = new int[size];
int index;
for (index = 0; index < h.Length && h[index].Count > 1; ++index)
{
var subkeys = h[index];
ulong seed = 0;
var entries = new Dictionary<ulong, int>();
retry:
{
++seed;
foreach (var k in subkeys)
{
var x = k.Item2 + seed;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
var hash = x * 0x2545F4914F6CDD1D % size;
if (!entries.ContainsKey(hash) && values[hash] == 0)
{
entries.Add(hash, k.Item1);
continue;
}
entries.Clear();
goto retry;
}
}
foreach (var entry in entries)
{
values[entry.Key] = entry.Value;
}
seeds[subkeys[0].Item2 % size] = (int)seed;
}
var free = new List<int>();
for (int i = 0; i < values.Length; ++i)
{
if (values[i] == 0)
{
free.Add(i);
}
else
{
--values[i];
}
}
for (int i = 0; index < h.Length && h[index].Count > 0; ++i)
{
var k = h[index++][0];
var dst = free[i];
values[dst] = k.Item1 - 1;
seeds[k.Item2 % size] = 0 - (dst + 1);
}
return (values, seeds);
}
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static unsafe ulong Hash64(byte* ptr, int length)
{
const ulong K0 = 0xD6D018F5UL;
const ulong K1 = 0xA2AA033BUL;
const ulong K2 = 0x62992FC1UL;
const ulong K3 = 0x30BC5B29UL;
byte* end = ptr + length;
var hash = 0x52BC33FEDBE4CBB5UL;
if (length >= 32)
{
byte* limit = end - 32;
var v0 = hash;
var v1 = hash;
var v2 = hash;
var v3 = hash;
do
{
v0 += *(ulong*)ptr * K0;
v0 = RotateRight(v0, 29) + v2;
ptr += 8;
v1 += *(ulong*)ptr * K1;
v1 = RotateRight(v1, 29) + v3;
ptr += 8;
v2 += *(ulong*)ptr * K2;
v2 = RotateRight(v2, 29) + v0;
ptr += 8;
v3 += *(ulong*)ptr * K3;
v3 = RotateRight(v3, 29) + v1;
ptr += 8;
} while (ptr <= limit);
v2 ^= RotateRight(((v0 + v3) * K0) + v1, 37) * K1;
v3 ^= RotateRight(((v1 + v2) * K1) + v0, 37) * K0;
v0 ^= RotateRight(((v0 + v2) * K0) + v3, 37) * K1;
v1 ^= RotateRight(((v1 + v3) * K1) + v2, 37) * K0;
hash += v0 ^ v1;
}
if (end - ptr >= 16)
{
var v0 = hash + (*(ulong*)ptr * K2);
v0 = RotateRight(v0, 29) * K3;
ptr += 8;
var v1 = hash + (*(ulong*)ptr * K2);
v1 = RotateRight(v1, 29) * K3;
ptr += 8;
v0 ^= RotateRight(v0 * K0, 21) + v1;
v1 ^= RotateRight(v1 * K3, 21) + v0;
hash += v1;
}
if (end - ptr >= 8)
{
hash += *(ulong*)ptr * K3;
ptr += 8;
hash ^= RotateRight(hash, 55) * K1;
}
if (end - ptr >= 4)
{
hash += *(uint*)ptr * K3;
ptr += 4;
hash ^= RotateRight(hash, 26) * K1;
}
if (end - ptr >= 2)
{
hash += *(ushort*)ptr * K3;
ptr += 2;
hash ^= RotateRight(hash, 48) * K1;
}
if (end - ptr >= 1)
{
hash += ptr[0] * K3;
hash ^= RotateRight(hash, 37) * K1;
}
hash ^= RotateRight(hash, 28);
hash *= K0;
hash ^= RotateRight(hash, 29);
return hash;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment