Created
July 30, 2009 02:15
-
-
Save pcarrier/158506 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function pack(...) | |
return arg | |
end --function | |
local function as_table(s) | |
return pack(s:byte(1, #s)) | |
end --function | |
-- Levenshtein distance | |
local function distance_between_words(t1,t2) | |
local l1 = #t1 | |
local l2 = #t2 | |
local mt = {} | |
local cost = 0 | |
-- Initial matrix | |
for i = 0, l1 do | |
mt[i] = {} | |
mt[i][0] = i | |
end --for | |
for j = 0, l2 do | |
mt[0][j] = j | |
end --for | |
for j = 1, l2 do | |
for i = 1, l1 do | |
if(t1[i] == t2[j]) | |
then cost = 0 | |
else cost = 1 | |
end --if | |
mt[i][j] = math.min( | |
mt[i-1][j] + 1, -- insertion | |
mt[i][j-1] + 1, -- deletion | |
mt[i-1][j-1] + cost -- substitution | |
) | |
end --for i | |
end --for j | |
return mt[l1][l2] | |
end --function | |
-- Loads an Unix-like dictionary in words | |
f = io.open(arg[1],"r") | |
local words = {} | |
for line in f:lines() do | |
words[#words + 1] = line | |
end --for | |
-- KISS | |
local r = {} | |
local mine = arg[2] | |
local mine_as_table = as_table(mine) | |
for k, w in ipairs(words) do | |
r[k] = {w, distance_between_words(mine_as_table, as_table(w))} | |
end --for | |
local function r_order(a, b) | |
return (a[2] < b[2]) | |
end --function | |
table.sort(r, r_order) | |
for _, wc in ipairs(r) do | |
print(wc[1], wc[2]) | |
end --for | |
--[[ | |
-- For each word, this evaluates its distance from every other word | |
local total = #words | |
local dist = {} | |
local t1 | |
local t2 | |
for counter, w1 in pairs(words) do | |
print("Done "..(counter/total)) | |
dist[w1] = {} | |
t1 = as_table(w1) | |
for _, w2 in pairs(words) do | |
if(w2 ~= w1) then | |
if(dist[w2]) then -- already computed distances for w2 | |
dist[w1][w2] = dist[w2][w1] | |
else | |
t2 = as_table(w2) | |
dist[w1][w2] = dist_between_words(t1,t2) | |
end --if | |
end --if | |
end --for w2 | |
end --for w1 | |
--]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
First version: | |
% time lua test.lua web2 a >! bench | |
lua test.lua web2 a >| bench 14,72s user 0,19s system 98% cpu 15,166 total | |
% time lua test.lua web2 albator >! bench | |
lua test.lua web2 albator >| bench 64,38s user 0,56s system 97% cpu 1:06,72 total | |
local'ing all functions: | |
% time lua test.lua web2 a >! bench | |
lua test.lua web2 a >| bench 14,75s user 0,21s system 97% cpu 15,350 total | |
Switching to min3: | |
% time lua test.lua web2 a >! bench | |
lua test.lua web2 a >| bench 9,83s user 0,15s system 98% cpu 10,150 total | |
Switching to math.min (slower than min3!): | |
% time lua test.lua web2 a >! bench | |
lua test.lua web2 a >| bench 9,94s user 0,15s system 98% cpu 10,259 total |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment