Last active
May 4, 2024 10:06
-
-
Save am11/22eaa6584de55483d988d9831899bcd3 to your computer and use it in GitHub Desktop.
CHM3 Hash Table Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Linq; | |
using System.Runtime.CompilerServices; | |
// input format is hex with 0x prefix; one per line: | |
// 0x5DF | |
// 0x123 | |
// etc. | |
CHM3PerfectHashGenerator hasher = new(File.ReadAllLines(args[0]) | |
.Where(lines => !string.IsNullOrEmpty(lines)) | |
.Select(line => Convert.ToUInt32(line.Substring(2), 16)) // Substring(2) remove 0x prefix. | |
.ToArray(), Console.Out); | |
hasher.Execute(); | |
/// <summary> | |
/// Generates perfect hash using CHM3 algorithm for a given set of unquie keys. | |
/// </summary> | |
/// <remarks> | |
/// This implementation is based on NetBSD's nbperf utlity: | |
/// <see cref="https://github.com/rurban/nbperf/tree/d878cd8b8ae313541cbe311a19fd5229af7e3aa4"/>. | |
/// It produces equivalent results to <code>nbperf -Ip list</code> output. | |
/// </remarks> | |
public sealed class CHM3PerfectHashGenerator(uint[] keys, TextWriter output) | |
{ | |
private const uint Seed = 0x85EBCA6B; | |
private const ulong RandomMultiplier = 0x9DDFEA08EB382D69; | |
private static uint predictableCounter = 0; | |
private bool checkDuplicates = false; | |
public void Execute() | |
{ | |
while (true) | |
{ | |
if (Chm3Compute() == 0) break; | |
checkDuplicates = true; | |
} | |
} | |
private struct Vertex | |
{ | |
public uint Degree; | |
public uint Edges; | |
}; | |
[InlineArray(3)] | |
private struct Edge | |
{ | |
private uint _vertices; | |
}; | |
private struct Graph | |
{ | |
public Vertex[] Vertices; | |
public Edge[] Edges; | |
public uint OutputIndex; | |
public uint[] OutputOrder; | |
public uint E, V, Va; | |
public int HashFudge; | |
}; | |
private struct State | |
{ | |
public Graph Graph; | |
public uint[] G; | |
public byte[] Visited; | |
}; | |
private int SortingIntcmp(uint a, uint b, Graph graph) | |
{ | |
if (a == b) return 0; | |
ref readonly Edge ea = ref graph.Edges[a]; | |
ref readonly Edge eb = ref graph.Edges[b]; | |
for (int i = 0; i < 3; ++i) | |
{ | |
if (ea[i] < eb[i]) | |
{ | |
return -1; | |
} | |
if (ea[i] > eb[i]) | |
{ | |
return 1; | |
} | |
} | |
if (keys[a] == keys[b]) | |
{ | |
throw new ArgumentException(nameof(keys), $"Duplicate key found! Key: {keys[a]}, a: {a}, b: {b}"); | |
} | |
return 0; | |
} | |
private void GraphCheckDuplicates(Graph graph) | |
{ | |
uint[] keyIndex = new uint[graph.E]; | |
for (uint i = 0; i < graph.E; ++i) | |
{ | |
keyIndex[i] = i; | |
} | |
Array.Sort(keyIndex, (a, b) => SortingIntcmp(a, b, graph)); | |
} | |
private void GraphAddEdge(ref Graph graph, uint edge) | |
{ | |
ref readonly Edge e = ref graph.Edges[edge]; | |
for (int i = 0; i < 3; ++i) | |
{ | |
ref Vertex v = ref graph.Vertices[e[i]]; | |
v.Edges ^= edge; | |
++v.Degree; | |
} | |
} | |
private void GraphRemoveEdge(ref Graph graph, uint edge) | |
{ | |
ref readonly Edge e = ref graph.Edges[edge]; | |
for (int i = 0; i < 3; ++i) | |
{ | |
ref Vertex v = ref graph.Vertices[e[i]]; | |
v.Edges ^= edge; | |
--v.Degree; | |
} | |
} | |
private void GraphRemoveVertex(ref Graph graph, uint vertex) | |
{ | |
Vertex v = graph.Vertices[vertex]; | |
if (v.Degree == 1) | |
{ | |
uint e = v.Edges; | |
graph.OutputOrder[--graph.OutputIndex] = e; | |
GraphRemoveEdge(ref graph, e); | |
} | |
} | |
private int GraphHash(uint iterationSeed1, uint iterationSeed2, ref Graph graph) | |
{ | |
uint[] hashes = new uint[4]; | |
short[] hashes16 = new short[8]; | |
Array.Clear(graph.Vertices, 0, (int)graph.Va); | |
graph.HashFudge = 0; | |
for (int i = 0; i < graph.E; ++i) | |
{ | |
Inthash4Compute(keys[i], iterationSeed1, iterationSeed2, hashes16); | |
ref Edge e = ref graph.Edges[i]; | |
for (int j = 0; j < 3; ++j) | |
{ | |
e[j] = (ushort)hashes16[j] % graph.V; | |
if (j == 1 && e[0] == e[1]) | |
{ | |
return -1; | |
} | |
if (j == 2 && (e[0] == e[2] || e[1] == e[2])) | |
{ | |
return -1; | |
} | |
} | |
} | |
for (uint i = 0; i < graph.E; ++i) | |
{ | |
GraphAddEdge(ref graph, i); | |
} | |
if (checkDuplicates) | |
{ | |
checkDuplicates = false; | |
GraphCheckDuplicates(graph); | |
} | |
return 0; | |
} | |
private int GraphOutputOrder(ref Graph graph) | |
{ | |
graph.OutputIndex = graph.E; | |
for (uint i = 0; i < graph.V; ++i) | |
{ | |
GraphRemoveVertex(ref graph, i); | |
} | |
for (uint i = graph.E; graph.OutputIndex > 0 && i > graph.OutputIndex;) | |
{ | |
ref readonly Edge e2 = ref graph.Edges[graph.OutputOrder[--i]]; | |
for (int j = 0; j < 3; ++j) | |
{ | |
GraphRemoveVertex(ref graph, e2[j]); | |
} | |
} | |
if (graph.OutputIndex != 0) | |
{ | |
return -1; | |
} | |
return 0; | |
} | |
private void AssignNodes(ref State state) | |
{ | |
for (uint i = 0; i < state.Graph.E; ++i) | |
{ | |
uint e_idx = state.Graph.OutputOrder[i]; | |
ref readonly Edge e = ref state.Graph.Edges[e_idx]; | |
uint v0, v1, v2, g; | |
if (state.Visited[e[0]] == 0) | |
{ | |
v0 = e[0]; | |
v1 = e[1]; | |
v2 = e[2]; | |
} | |
else if (state.Visited[e[1]] == 0) | |
{ | |
v0 = e[1]; | |
v1 = e[0]; | |
v2 = e[2]; | |
} | |
else | |
{ | |
v0 = e[2]; | |
v1 = e[0]; | |
v2 = e[1]; | |
} | |
g = e_idx - state.G[v1] - state.G[v2]; | |
if (g >= state.Graph.E) | |
{ | |
g += state.Graph.E; | |
if (g >= state.Graph.E) | |
{ | |
g += state.Graph.E; | |
} | |
} | |
state.G[v0] = g; | |
state.Visited[v0] = 1; | |
state.Visited[v1] = 1; | |
state.Visited[v2] = 1; | |
} | |
} | |
private void PrintHash(uint iterationSeed1, uint iterationSeed2, ref State state) | |
{ | |
output.WriteLine($@"#include <stdint.h> | |
static inline void _inthash4(const int32_t key, uint64_t *h) | |
{{ | |
*h = (int64_t)key * (UINT64_C(0x{RandomMultiplier:X}) + UINT64_C({iterationSeed1})) + UINT32_C({iterationSeed2}); | |
}} | |
uint16_t inthash(const int32_t key) | |
{{"); | |
const int perLine = 8; | |
output.Write($" static const uint16_t g[{state.Graph.V}] =\n {{"); | |
int i = 0; | |
for (; i < state.Graph.V; ++i) | |
{ | |
output.Write($"{(i % perLine == 0 ? "\n " : " ")}0x{state.G[i]:X},"); | |
} | |
if (i % perLine != 0) | |
{ | |
output.WriteLine("\n };"); | |
} | |
else | |
{ | |
output.WriteLine("\n };"); | |
} | |
output.WriteLine($@" | |
uint16_t h[8]; | |
_inthash4(key, (uint64_t*)h); | |
h[0] = h[0] % {state.Graph.V}; | |
h[1] = h[1] % {state.Graph.V}; | |
h[2] = h[2] % {state.Graph.V}; | |
return (g[h[0]] + g[h[1]] + g[h[2]]) % {state.Graph.E}; | |
}}"); | |
} | |
private int Chm3Compute() | |
{ | |
uint e = (uint)keys.Length; | |
uint v = (uint)(1.24 * e); | |
if (v < 8) v = 0; | |
State state = new() | |
{ | |
G = new uint[v], | |
Visited = new byte[v], | |
Graph = new() | |
{ | |
V = v, | |
Va = v, | |
E = e, | |
Vertices = new Vertex[v], | |
Edges = new Edge[e], | |
OutputOrder = new uint[e] | |
} | |
}; | |
uint iterationSeed1 = (uint)(Seed * ++predictableCounter); | |
uint iterationSeed2 = predictableCounter; | |
if (GraphHash(iterationSeed1, iterationSeed2, ref state.Graph) != 0) | |
{ | |
return -1; | |
} | |
if (GraphOutputOrder(ref state.Graph) != 0) | |
{ | |
return -1; | |
} | |
AssignNodes(ref state); | |
PrintHash(iterationSeed1, iterationSeed2, ref state); | |
return 0; | |
} | |
private void Inthash4Compute(uint key, uint iterationSeed1, uint iterationSeed2, short[] hashes) | |
{ | |
ulong h64 = ((ulong)key * (RandomMultiplier + iterationSeed1)) + iterationSeed2; | |
hashes[0] = (short)(h64 & 0xFFFF); | |
hashes[1] = (short)((h64 >> 16) & 0xFFFF); | |
hashes[2] = (short)((h64 >> 32) & 0xFFFF); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment