Skip to content

Instantly share code, notes, and snippets.

@verdy-p
Forked from cloudwu/hashcompare.lua
Last active May 29, 2020 10:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.
Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.
Compare some hash function for lua table implementation
--[[
370104 words from https://github.com/dwyl/english-words/blob/master/words_alpha.txt
3/4 5/8 7/8 9/16 11/16 13/16 15/32 17/32 19/32 21/32
(h<<5) + (h>>2) + x (3)84874 (4)81670 (4)113326 (4)80336 (2)96169 (5)111520 (4)125243 (3)79309 (4)87762 (5)95643
(h<<4) + (h>>3) + x (2)84819 (1)81351 (3)113283 (5)80439 (3)96264 (2)111197 (3)125200 (4)79486 (2)87582 (2)95430
(h<<5) - (h>>2) + x (1)84464 (2)81428 (2)113108 (2)80201 (4)96464 (4)111469 (1)125052 (2)79222 (3)87666 (3)95466
(h<<4) - (h>>3) + x (4)85050 (3)81587 (1)113084 (1)80112 (1)96131 (1)111134 (2)125185 (1)79163 (1)87361 (1)95239
((h + x) * 0xAAAB) >> 3 (5)85143 (5)81973 (5)113538 (3)80323 (5)96593 (3)111401 (5)125306 (5)79636 (5)88120 (4)95536
]]
local seed = 0
local dictfile = "words_alpha.txt"
local hash = {}
function hash.h129(h, x)
return (h<<5) + (h>>2) + x
end
function hash.h129_43(h,x)
return (h<<4) + (h>>3) + x
end
function hash.h127(h, x)
return (h<<4) - (h>>3) + x
end
function hash.h127_52(h, x)
return (h<<5) - (h>>2) + x
end
function hash.aaab(h, x)
return ((h + x) * 0xAAAB) >> 3
end
local function shuffle(t)
local n = #t
for i = 1, n-1 do
local idx = math.random(i, n)
t[i], t[idx] = t[idx], t[i]
end
return t
end
local function readdict(filename)
local dict = {}
local n = 1
for line in io.lines(filename) do
dict[n] = line
n = n + 1
end
return dict
end
local function hashstring(str, func)
local h = seed ~ #str
str:gsub(".", function (c)
h = h ~ ( func(h, c:byte()) & 0xffffffff )
return "" -- this make gsub faster
end)
return h
end
local function test(dict, slots, hashfunc)
local size = 1 << slots
local colliding = 0
for i = 1, #dict-slots+1, slots do
local tbl = {}
for j = 0, slots-1 do
local h = hashstring(dict[i+j], hashfunc) % size
if tbl[h] then
colliding = colliding + 1
else
tbl[h] = true
end
end
end
return colliding
end
local function genguid(n)
local random = math.random
local dict = {}
for i = 1, n do
local tmp = {}
for j = 1, 16 do
tmp[j] = string.format("%02x", random(0,255))
end
dict[i] = table.concat(tmp)
end
return dict
end
local dict = shuffle(readdict(dictfile))
-- local dict = genguid(100000)
print(#dict, "words")
local results = {}
for key in pairs(hash) do
results[key] = {}
end
local N = 22
for i = 3, N, 2 do
local group = i
print(string.format("%d/%d", group, 1<<group))
local rank = {}
for key, func in pairs(hash) do
local c = test(dict, group, func)
table.insert(rank, { c = c , key = key })
end
table.sort(rank, function(a,b) return a.c < b.c end)
for r, obj in ipairs(rank) do
results[obj.key][i] = { c = obj.c , rank = r }
end
end
for key in pairs(hash) do
local tmp = {}
for i = 3, N, 2 do
local obj = results[key][i]
table.insert(tmp, string.format("(%d)%d", obj.rank, obj.c))
end
print(key, table.concat(tmp, "\t"))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment