Skip to content

Instantly share code, notes, and snippets.

@jaspervdj
Created January 31, 2011 09:55
Show Gist options
  • Save jaspervdj/803839 to your computer and use it in GitHub Desktop.
Save jaspervdj/803839 to your computer and use it in GitHub Desktop.
String diff using higher order functions for character distances
#!/usr/bin/lua
--------------------------------------------------------------------------------
-- Distance implementation (abstract) --
--------------------------------------------------------------------------------
-- Simple minimum function for 3 arguments
local function min3(a, b, c)
local x = a < b and a or b
return x < c and x or c
end
-- Distance from str1 to str2
function get_distance(replace_cost, insert_cost, delete_cost, str1, str2)
local cache, len1, len2 = {}, string.len(str1), string.len(str2)
-- Initialize cache
local cache = {}
for i = 0, len1 do cache[i] = {} end
cache[0][0] = 0
-- Initial values
for i = 1, len1 do
cache[i][0] = cache[i - 1][0] + delete_cost(string.sub(str1, i, i))
end
for j = 1, len2 do
cache[0][j] = cache[0][j - 1] + insert_cost(string.sub(str2, j, j))
end
-- Build cache
for i = 1, len1 do
for j = 1, len2 do
-- Find the two characters
local c1 = string.sub(str1, i, i)
local c2 = string.sub(str2, j, j)
cache[i][j] = min3(
-- Replace
cache[i - 1][j - 1] + replace_cost(c1, c2),
-- Insert
cache[i - 1][j] + insert_cost(c1),
-- Delete
cache[i][j - 1] + delete_cost(c2)
)
end
end
-- Take result
return cache[len1][len2]
end
--------------------------------------------------------------------------------
-- Concrete functions, example: vowels vs. consonants --
--------------------------------------------------------------------------------
-- Test if a character is a vowel
function is_vowel(c)
if string.find("aeiou", c) then return true else return false end
end
-- Vowels are hard to replace by consonants, and vice versa
function replace_cost(c1, c2)
-- Special case: characters are equal: no cost
if c1 == c2 then return 0 end
if is_vowel(c1) == is_vowel(c2) then
return 1
else
return 10000
end
end
-- Consonants are harder to delete and insert
function cheap_vowels(c)
if is_vowel(c) then return 1 else return 3 end
end
-- Curried get_distance
function get_distance_vowels(str1, str2)
return get_distance(replace_cost, cheap_vowels, cheap_vowels, str1, str2)
end
--------------------------------------------------------------------------------
-- Tests --
--------------------------------------------------------------------------------
function test(str1, str2)
print(str1 .. " -> " .. str2 .. ": " .. get_distance_vowels(str1, str2))
end
test("jasper", "japser")
test("jasper", "jasr")
test("jasper", "josper")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment