Skip to content

Instantly share code, notes, and snippets.

@pcarrier
Created July 30, 2009 02:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pcarrier/158506 to your computer and use it in GitHub Desktop.
Save pcarrier/158506 to your computer and use it in GitHub Desktop.
local function pack(...)
return arg
end --function
local function as_table(s)
return pack(s:byte(1, #s))
end --function
-- Levenshtein distance
local function distance_between_words(t1,t2)
local l1 = #t1
local l2 = #t2
local mt = {}
local cost = 0
-- Initial matrix
for i = 0, l1 do
mt[i] = {}
mt[i][0] = i
end --for
for j = 0, l2 do
mt[0][j] = j
end --for
for j = 1, l2 do
for i = 1, l1 do
if(t1[i] == t2[j])
then cost = 0
else cost = 1
end --if
mt[i][j] = math.min(
mt[i-1][j] + 1, -- insertion
mt[i][j-1] + 1, -- deletion
mt[i-1][j-1] + cost -- substitution
)
end --for i
end --for j
return mt[l1][l2]
end --function
-- Loads an Unix-like dictionary in words
f = io.open(arg[1],"r")
local words = {}
for line in f:lines() do
words[#words + 1] = line
end --for
-- KISS
local r = {}
local mine = arg[2]
local mine_as_table = as_table(mine)
for k, w in ipairs(words) do
r[k] = {w, distance_between_words(mine_as_table, as_table(w))}
end --for
local function r_order(a, b)
return (a[2] < b[2])
end --function
table.sort(r, r_order)
for _, wc in ipairs(r) do
print(wc[1], wc[2])
end --for
--[[
-- For each word, this evaluates its distance from every other word
local total = #words
local dist = {}
local t1
local t2
for counter, w1 in pairs(words) do
print("Done "..(counter/total))
dist[w1] = {}
t1 = as_table(w1)
for _, w2 in pairs(words) do
if(w2 ~= w1) then
if(dist[w2]) then -- already computed distances for w2
dist[w1][w2] = dist[w2][w1]
else
t2 = as_table(w2)
dist[w1][w2] = dist_between_words(t1,t2)
end --if
end --if
end --for w2
end --for w1
--]]
First version:
% time lua test.lua web2 a >! bench
lua test.lua web2 a >| bench 14,72s user 0,19s system 98% cpu 15,166 total
% time lua test.lua web2 albator >! bench
lua test.lua web2 albator >| bench 64,38s user 0,56s system 97% cpu 1:06,72 total
local'ing all functions:
% time lua test.lua web2 a >! bench
lua test.lua web2 a >| bench 14,75s user 0,21s system 97% cpu 15,350 total
Switching to min3:
% time lua test.lua web2 a >! bench
lua test.lua web2 a >| bench 9,83s user 0,15s system 98% cpu 10,150 total
Switching to math.min (slower than min3!):
% time lua test.lua web2 a >! bench
lua test.lua web2 a >| bench 9,94s user 0,15s system 98% cpu 10,259 total
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment