Skip to content

Instantly share code, notes, and snippets.

@alnsn
Last active January 11, 2019 05:30
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alnsn/68f599bc9358fcee122d6175392d779f to your computer and use it in GitHub Desktop.
Save alnsn/68f599bc9358fcee122d6175392d779f to your computer and use it in GitHub Desktop.
Lua module for BDZ perfect hash
-- Building block for generating a perfect not order preserving
-- non-minimal hash.
--
-- For a keyset of size N, it builds a random bipartite graph of
-- size N and order 2 * math.floor(1.0625 * N).
-- With a good probability the graph is acyclic and it's possible
-- to select one vertex from each edge/key that is guaranteed to
-- be unique. Vertices are selected by comparing two bits in
-- a bitset that has the same size as the order of the graph.
--
-- The module comes with a selftest and a small demo function.
--
-- References:
--
-- http://cmph.sourceforge.net/bdz.html
--
-- Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui,
-- Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano
-- Vigna.
-- http://zola.di.unipi.it/rossano/wp-content/papercite-data/pdf/dcc14.pdf
--
-- See also https://github.com/alnsn/rgph
local bit = bit32 or bit or require "bit"
-- Array of oriented edges { { edge=e, vert=v, degree=d }, ... }
-- is stored in a flat array { e, v, d, ... } for better performance.
local xored_edge = 1
local xored_vert = 2
local oedge_degree = 3
local function add_remove_oedge(oedges, delta, e, v1, v2)
oedges[3*v1+xored_edge] = bit.bxor(oedges[3*v1+xored_edge], e)
oedges[3*v1+xored_vert] = bit.bxor(oedges[3*v1+xored_vert], v2)
oedges[3*v1+oedge_degree] = oedges[3*v1+oedge_degree] + delta
end
local function add_edge(oedges, e, v1, v2)
add_remove_oedge(oedges, 1, e, v1, v2)
add_remove_oedge(oedges, 1, e, v2, v1)
end
local function remove_vertex(oedges, v1, order, top)
if oedges[3*v1+oedge_degree] == 1 then
oedges[3*v1+oedge_degree] = 0
local e = oedges[3*v1+xored_edge]
local v2 = oedges[3*v1+xored_vert]
add_remove_oedge(oedges, -1, e, v2, v1)
order[top] = e
top = top - 1
end
return top
end
local function peel(edges, nkeys, oedges, nverts, order)
local top = nkeys
for i = 1, nverts do
local v1 = i - 1
top = remove_vertex(oedges, v1, order, top)
end
local i = nkeys
while i > top do
local o = order[i]
top = remove_vertex(oedges, edges[2*o+1], order, top)
top = remove_vertex(oedges, edges[2*o+2], order, top)
i = i - 1
end
return top
end
local graph_mt = {}
graph_mt.__index = graph_mt
local function new_graph(nkeys)
local nverts = nkeys < 12 and 24 or 2 * math.floor(1.0625 * nkeys)
if nkeys <= 0 or nverts >= 2^32 then return nil, "out of range" end
local g = { nkeys = nkeys, nverts = nverts,
edges = { 0 }, oedges = { 0 }, order = { -1 } }
return setmetatable(g, graph_mt)
end
function graph_mt:entries()
return self.nkeys
end
function graph_mt:vertices()
return self.nverts
end
function graph_mt:edge(e)
local edges = self.edges
return edges[2*e+1], edges[2*e+2]
end
function graph_mt:build(seed, hash, reduce, iter, state, key)
local oedges = self.oedges
local nverts = self.nverts
for i = 1, 3*nverts do
oedges[i] = 0
end
local edges = self.edges
local nkeys = self.nkeys
for i = 1, 2*nkeys do
edges[i] = 0
end
local partsz = math.floor(nverts / 2)
assert(nverts == 2 * partsz)
for i = 1, nkeys do
key = iter(state, key)
if key == nil then return nil, "no key" end
local e = i - 1
local h1, h2 = hash(key, seed)
local v1, v2 = reduce(partsz, h1, h2)
edges[2*e+1] = v1
edges[2*e+2] = v2
add_edge(oedges, e, v1, v2)
end
local order = self.order
if #order ~= nkeys then
for i = 1, nkeys do
order[i] = -1
end
end
local core = peel(edges, nkeys, oedges, nverts, order)
self.core = core
return core == 0 or nil, core ~= 0 and "cycle" or nil
end
function graph_mt:assign(assignments)
if not self.core or self.core ~= 0 then
return nil, "not built"
end
local unassigned = 2
assignments = assignments or {}
local nverts = self.nverts
for i = 1, nverts do
assignments[i] = unassigned
end
local nkeys = self.nkeys
local edges = self.edges
local order = self.order
for i = 1, nkeys do
local e = order[i]
local v1 = edges[2*e+1] + 1
local v2 = edges[2*e+2] + 1
if assignments[v2] == unassigned then
assignments[v2] = (1 - assignments[v1]) % unassigned
end
if assignments[v1] == unassigned then
assignments[v1] = (0 - assignments[v2]) % unassigned
end
end
return assignments
end
-- selftest
if not ... then
-- Equivalent to mi_vector_hash(htole32(value), 4, seed, h) on NetBSD.
local function jenkins2v_u32(value, seed)
local mod = 2^32
local h1 = (0x9e3779b9 + value) % mod
local h2 = 0x9e3779b9
local h3 = (seed + 4) % mod
-- mix
h1 = (mod + mod + h1 - h2 - h3) % mod
h1 = (mod + bit.bxor(h1, bit.rshift(h3, 13))) % mod
h2 = (mod + mod + h2 - h3 - h1) % mod
h2 = (mod + bit.bxor(h2, bit.lshift(h1, 8))) % mod
h3 = (mod + mod + h3 - h1 - h2) % mod
h3 = (mod + bit.bxor(h3, bit.rshift(h2, 13))) % mod
h1 = (mod + mod + h1 - h2 - h3) % mod
h1 = (mod + bit.bxor(h1, bit.rshift(h3, 12))) % mod
h2 = (mod + mod + h2 - h3 - h1) % mod
h2 = (mod + bit.bxor(h2, bit.lshift(h1, 16))) % mod
h3 = (mod + mod + h3 - h1 - h2) % mod
h3 = (mod + bit.bxor(h3, bit.rshift(h2, 5))) % mod
h1 = (mod + mod + h1 - h2 - h3) % mod
h1 = (mod + bit.bxor(h1, bit.rshift(h3, 3))) % mod
h2 = (mod + mod + h2 - h3 - h1) % mod
h2 = (mod + bit.bxor(h2, bit.lshift(h1, 10))) % mod
h3 = (mod + mod + h3 - h1 - h2) % mod
h3 = (mod + bit.bxor(h3, bit.rshift(h2, 15))) % mod
return h1, h2, h3
end
local function mod_reduce(sz, h1, h2)
return h1 % sz, (h2 % sz) + sz
end
local keys = {
-- Values don't matter.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
-- This particular seed value generates an acyclic graph.
-- In general, random seeds should be passed repeatedly
-- until the build succeeds.
local seed = 2
local g = new_graph(#keys)
assert(g:build(seed, jenkins2v_u32, mod_reduce, ipairs(keys)))
local assignments = assert(g:assign())
local unassigned = 2
local duplicates = {}
for e = 0, g:entries() - 1 do
local v1, v2 = g:edge(e)
assert(assignments[v1+1] < unassigned, "must be assigned")
assert(assignments[v2+1] < unassigned, "must be assigned")
-- Select v1 or v2.
local eq = assignments[v1+1] == assignments[v2+1]
local h = eq and v1 or v2
assert(not duplicates[h], "hashes must be unique")
duplicates[h] = true
end
local partition_size = g:vertices() / 2
-- demo
local function perfect_hash(value)
local h1, h2 = jenkins2v_u32(value, seed)
local v1, v2 = mod_reduce(partition_size, h1, h2)
local eq = assignments[v1+1] == assignments[v2+1]
return eq and v1 or v2
end
for i, _ in ipairs(keys) do
print(perfect_hash(i))
end
end
return { new_graph = new_graph }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment