Skip to content

Instantly share code, notes, and snippets.

@am11
Last active May 4, 2024 10:06
Show Gist options
  • Save am11/22eaa6584de55483d988d9831899bcd3 to your computer and use it in GitHub Desktop.
Save am11/22eaa6584de55483d988d9831899bcd3 to your computer and use it in GitHub Desktop.
CHM3 Hash Table Generator
using System;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
// input format is hex with 0x prefix; one per line:
// 0x5DF
// 0x123
// etc.
CHM3PerfectHashGenerator hasher = new(File.ReadAllLines(args[0])
.Where(lines => !string.IsNullOrEmpty(lines))
.Select(line => Convert.ToUInt32(line.Substring(2), 16)) // Substring(2) remove 0x prefix.
.ToArray(), Console.Out);
hasher.Execute();
/// <summary>
/// Generates perfect hash using CHM3 algorithm for a given set of unquie keys.
/// </summary>
/// <remarks>
/// This implementation is based on NetBSD's nbperf utlity:
/// <see cref="https://github.com/rurban/nbperf/tree/d878cd8b8ae313541cbe311a19fd5229af7e3aa4"/>.
/// It produces equivalent results to <code>nbperf -Ip list</code> output.
/// </remarks>
public sealed class CHM3PerfectHashGenerator(uint[] keys, TextWriter output)
{
private const uint Seed = 0x85EBCA6B;
private const ulong RandomMultiplier = 0x9DDFEA08EB382D69;
private static uint predictableCounter = 0;
private bool checkDuplicates = false;
public void Execute()
{
while (true)
{
if (Chm3Compute() == 0) break;
checkDuplicates = true;
}
}
private struct Vertex
{
public uint Degree;
public uint Edges;
};
[InlineArray(3)]
private struct Edge
{
private uint _vertices;
};
private struct Graph
{
public Vertex[] Vertices;
public Edge[] Edges;
public uint OutputIndex;
public uint[] OutputOrder;
public uint E, V, Va;
public int HashFudge;
};
private struct State
{
public Graph Graph;
public uint[] G;
public byte[] Visited;
};
private int SortingIntcmp(uint a, uint b, Graph graph)
{
if (a == b) return 0;
ref readonly Edge ea = ref graph.Edges[a];
ref readonly Edge eb = ref graph.Edges[b];
for (int i = 0; i < 3; ++i)
{
if (ea[i] < eb[i])
{
return -1;
}
if (ea[i] > eb[i])
{
return 1;
}
}
if (keys[a] == keys[b])
{
throw new ArgumentException(nameof(keys), $"Duplicate key found! Key: {keys[a]}, a: {a}, b: {b}");
}
return 0;
}
private void GraphCheckDuplicates(Graph graph)
{
uint[] keyIndex = new uint[graph.E];
for (uint i = 0; i < graph.E; ++i)
{
keyIndex[i] = i;
}
Array.Sort(keyIndex, (a, b) => SortingIntcmp(a, b, graph));
}
private void GraphAddEdge(ref Graph graph, uint edge)
{
ref readonly Edge e = ref graph.Edges[edge];
for (int i = 0; i < 3; ++i)
{
ref Vertex v = ref graph.Vertices[e[i]];
v.Edges ^= edge;
++v.Degree;
}
}
private void GraphRemoveEdge(ref Graph graph, uint edge)
{
ref readonly Edge e = ref graph.Edges[edge];
for (int i = 0; i < 3; ++i)
{
ref Vertex v = ref graph.Vertices[e[i]];
v.Edges ^= edge;
--v.Degree;
}
}
private void GraphRemoveVertex(ref Graph graph, uint vertex)
{
Vertex v = graph.Vertices[vertex];
if (v.Degree == 1)
{
uint e = v.Edges;
graph.OutputOrder[--graph.OutputIndex] = e;
GraphRemoveEdge(ref graph, e);
}
}
private int GraphHash(uint iterationSeed1, uint iterationSeed2, ref Graph graph)
{
uint[] hashes = new uint[4];
short[] hashes16 = new short[8];
Array.Clear(graph.Vertices, 0, (int)graph.Va);
graph.HashFudge = 0;
for (int i = 0; i < graph.E; ++i)
{
Inthash4Compute(keys[i], iterationSeed1, iterationSeed2, hashes16);
ref Edge e = ref graph.Edges[i];
for (int j = 0; j < 3; ++j)
{
e[j] = (ushort)hashes16[j] % graph.V;
if (j == 1 && e[0] == e[1])
{
return -1;
}
if (j == 2 && (e[0] == e[2] || e[1] == e[2]))
{
return -1;
}
}
}
for (uint i = 0; i < graph.E; ++i)
{
GraphAddEdge(ref graph, i);
}
if (checkDuplicates)
{
checkDuplicates = false;
GraphCheckDuplicates(graph);
}
return 0;
}
private int GraphOutputOrder(ref Graph graph)
{
graph.OutputIndex = graph.E;
for (uint i = 0; i < graph.V; ++i)
{
GraphRemoveVertex(ref graph, i);
}
for (uint i = graph.E; graph.OutputIndex > 0 && i > graph.OutputIndex;)
{
ref readonly Edge e2 = ref graph.Edges[graph.OutputOrder[--i]];
for (int j = 0; j < 3; ++j)
{
GraphRemoveVertex(ref graph, e2[j]);
}
}
if (graph.OutputIndex != 0)
{
return -1;
}
return 0;
}
private void AssignNodes(ref State state)
{
for (uint i = 0; i < state.Graph.E; ++i)
{
uint e_idx = state.Graph.OutputOrder[i];
ref readonly Edge e = ref state.Graph.Edges[e_idx];
uint v0, v1, v2, g;
if (state.Visited[e[0]] == 0)
{
v0 = e[0];
v1 = e[1];
v2 = e[2];
}
else if (state.Visited[e[1]] == 0)
{
v0 = e[1];
v1 = e[0];
v2 = e[2];
}
else
{
v0 = e[2];
v1 = e[0];
v2 = e[1];
}
g = e_idx - state.G[v1] - state.G[v2];
if (g >= state.Graph.E)
{
g += state.Graph.E;
if (g >= state.Graph.E)
{
g += state.Graph.E;
}
}
state.G[v0] = g;
state.Visited[v0] = 1;
state.Visited[v1] = 1;
state.Visited[v2] = 1;
}
}
private void PrintHash(uint iterationSeed1, uint iterationSeed2, ref State state)
{
output.WriteLine($@"#include <stdint.h>
static inline void _inthash4(const int32_t key, uint64_t *h)
{{
*h = (int64_t)key * (UINT64_C(0x{RandomMultiplier:X}) + UINT64_C({iterationSeed1})) + UINT32_C({iterationSeed2});
}}
uint16_t inthash(const int32_t key)
{{");
const int perLine = 8;
output.Write($" static const uint16_t g[{state.Graph.V}] =\n {{");
int i = 0;
for (; i < state.Graph.V; ++i)
{
output.Write($"{(i % perLine == 0 ? "\n " : " ")}0x{state.G[i]:X},");
}
if (i % perLine != 0)
{
output.WriteLine("\n };");
}
else
{
output.WriteLine("\n };");
}
output.WriteLine($@"
uint16_t h[8];
_inthash4(key, (uint64_t*)h);
h[0] = h[0] % {state.Graph.V};
h[1] = h[1] % {state.Graph.V};
h[2] = h[2] % {state.Graph.V};
return (g[h[0]] + g[h[1]] + g[h[2]]) % {state.Graph.E};
}}");
}
private int Chm3Compute()
{
uint e = (uint)keys.Length;
uint v = (uint)(1.24 * e);
if (v < 8) v = 0;
State state = new()
{
G = new uint[v],
Visited = new byte[v],
Graph = new()
{
V = v,
Va = v,
E = e,
Vertices = new Vertex[v],
Edges = new Edge[e],
OutputOrder = new uint[e]
}
};
uint iterationSeed1 = (uint)(Seed * ++predictableCounter);
uint iterationSeed2 = predictableCounter;
if (GraphHash(iterationSeed1, iterationSeed2, ref state.Graph) != 0)
{
return -1;
}
if (GraphOutputOrder(ref state.Graph) != 0)
{
return -1;
}
AssignNodes(ref state);
PrintHash(iterationSeed1, iterationSeed2, ref state);
return 0;
}
private void Inthash4Compute(uint key, uint iterationSeed1, uint iterationSeed2, short[] hashes)
{
ulong h64 = ((ulong)key * (RandomMultiplier + iterationSeed1)) + iterationSeed2;
hashes[0] = (short)(h64 & 0xFFFF);
hashes[1] = (short)((h64 >> 16) & 0xFFFF);
hashes[2] = (short)((h64 >> 32) & 0xFFFF);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment