Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Compare some hash function for lua table implementation
--[[
370104 words from https://github.com/dwyl/english-words/blob/master/words_alpha.txt
3/4 5/8 7/8 9/16 11/16 13/16 15/32 17/32 19/32 21/32
(h<<5) + (h>>2) + x (3)84874 (4)81670 (4)113326 (4)80336 (2)96169 (5)111520 (4)125243 (3)79309 (4)87762 (5)95643
(h<<4) + (h>>3) + x (2)84819 (1)81351 (3)113283 (5)80439 (3)96264 (2)111197 (3)125200 (4)79486 (2)87582 (2)95430
(h<<5) - (h>>2) + x (1)84464 (2)81428 (2)113108 (2)80201 (4)96464 (4)111469 (1)125052 (2)79222 (3)87666 (3)95466
(h<<4) - (h>>3) + x (4)85050 (3)81587 (1)113084 (1)80112 (1)96131 (1)111134 (2)125185 (1)79163 (1)87361 (1)95239
((h + x) * 0xAAAB) >> 3 (5)85143 (5)81973 (5)113538 (3)80323 (5)96593 (3)111401 (5)125306 (5)79636 (5)88120 (4)95536
]]
local seed = 0
local dictfile = "words_alpha.txt"
local hash = {}
function hash.h129(h, x)
return (h<<5) + (h>>2) + x
end
function hash.h129_43(h,x)
return (h<<4) + (h>>3) + x
end
function hash.h127(h, x)
return (h<<4) - (h>>3) + x
end
function hash.h127_52(h, x)
return (h<<5) - (h>>2) + x
end
function hash.aaab(h, x)
return ((h + x) * 0xAAAB) >> 3
end
local function shuffle(t)
local n = #t
for i = 1, n-1 do
local idx = math.random(i, n)
t[i], t[idx] = t[idx], t[i]
end
return t
end
local function pow2(x)
local r = 2
while r < x do
r = r << 1
end
return r
end
local function readdict(filename)
local dict = {}
local n = 1
for line in io.lines(filename) do
dict[n] = line
n = n + 1
end
return dict
end
local function hashstring(str, func)
local h = seed ~ #str
str:gsub(".", function (c)
h = h ~ ( func(h, c:byte()) & 0xffffffff )
return "" -- this make gsub faster
end)
return h
end
local function test(dict, slots, hashfunc)
local size = pow2(slots)
local colliding = 0
for i = 1, #dict-slots+1, slots do
local tbl = {}
for j = 0, slots-1 do
local h = hashstring(dict[i+j], hashfunc) % size
if tbl[h] then
colliding = colliding + 1
else
tbl[h] = true
end
end
end
return colliding
end
local function genguid(n)
local dict = {}
for i = 1, n do
local tmp = {}
for j = 1, 16 do
tmp[j] = string.format("%02x", math.random(0,255))
end
dict[i] = table.concat(tmp)
end
return dict
end
local dict = shuffle(readdict(dictfile))
-- local dict = genguid(100000)
print(#dict, "words")
local results = {}
for key in pairs(hash) do
results[key] = {}
end
local N = 22
for i = 3, N, 2 do
local group = i
print(string.format("%d/%d", group, pow2(group)))
local rank = {}
for key, func in pairs(hash) do
local c = test(dict, group, func)
table.insert(rank, { c = c , key = key })
end
table.sort(rank, function(a,b) return a.c < b.c end)
for r, obj in ipairs(rank) do
results[obj.key][i] = { c = obj.c , rank = r }
end
end
for key in pairs(hash) do
local tmp = {}
for i = 3, N, 2 do
local obj = results[key][i]
table.insert(tmp, string.format("(%d)%d", obj.rank, obj.c))
end
print(key, table.concat(tmp, "\t"))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.