Skip to content

Instantly share code, notes, and snippets.

@regiskuckaertz
Last active July 10, 2018 15:16
Show Gist options
  • Save regiskuckaertz/caf4839430de259f8e3f8605d58280ad to your computer and use it in GitHub Desktop.
Save regiskuckaertz/caf4839430de259f8e3f8605d58280ad to your computer and use it in GitHub Desktop.
Edit distance with the Wagner-Fischer algorithm
module Levenshtein where
pairs :: [a] -> [(a,a)]
pairs [x,y] = [(x,y)]
pairs (x:y:xs) = (x,y) : pairs (y:xs)
vowels :: String
vowels = "aeiou"
cost :: Rational -> Rational -> Rational -> Char -> Char -> Rational
cost a b c s t | s == t = b
| otherwise = (a + insert t) `min` (b + subst s t) `min` (c + delete s)
-- Cost of substitution
subst :: Char -> Char -> Rational
subst s t | s `elem` vowels = 0.5
| t `elem` vowels = 0.5
| otherwise = 1
-- Cost of insertion/removal
insert, delete :: Char -> Rational
insert s | s `elem` vowels = 0.5
| otherwise = 1
delete = insert
-- Coefficients for row `i` of the matrix given coefficients for row `i-1`
costs :: [Rational] -> Rational -> Char -> String -> [Rational]
costs row i s ts = reverse . foldl f [i] $ xss
where f (a:as) ((b,c), t) = cost a b c s t : (a:as)
xss = zip (pairs row) ts
levenshtein :: String -> String -> Double
levenshtein "" ts = fromRational . sum . map insert $ ts
levenshtein ss "" = fromRational . sum . map insert $ ss
levenshtein ss ts = fromRational . last $ row
where row = foldl f row0 (reverse $ foldl h [(0,'-')] ss)
row0 = reverse $ foldl g [0] ts
f row (i,s) = costs row i s ts
g l@(x:xs) t = x + delete t : l
h l@((x,_):xs) s = (x + insert s, s) : l
-- The cost matrix of `levenshtein "Pneumonoultramicroscopicsilicovolcanoconiosis" "Pseudopseudohypoparathyroidism"
--
-- P s e u d o p s e u d o h y p o p a r a t h y r o i d i s m
-- 0 1 2 2.5 3 4 4.5 5.5 6.5 7 7.5 8.5 9 10 11 12 12.5 13.5 14 15 15.5 16.5 17.5 18.5 19.5 20 20.5 21.5 22 23 24
-- P 1 0 1 1.5 2 3 3.5 4.5 5.5 6 6.5 7.5 8 9 10 11 11.5 12.5 13 14 14.5 15.5 16.5 17.5 18.5 19 19.5 20.5 21 22 23
-- n 2 1 1 1.5 2 3 3.5 4.5 5.5 6 6.5 7.5 8 9 10 11 11.5 12.5 13 14 14.5 15.5 16.5 17.5 18.5 19 19.5 20.5 21 22 23
-- e 2.5 1.5 1.5 1 1.5 2.5 3 4 5 5.5 6 7 7.5 8.5 9.5 10.5 11 12 12.5 13.5 14 15 16 17 18 18.5 19 20 20.5 21.5 22.5
-- u 3 2 2 1.5 1 2 2.5 3.5 4.5 5 5.5 6.5 7 8 9 10 10.5 11.5 12 13 13.5 14.5 15.5 16.5 17.5 18 18.5 19.5 20 21 22
-- m 4 3 3 2.5 2 2 2.5 3.5 4.5 5 5.5 6.5 7 8 9 10 10.5 11.5 12 13 13.5 14.5 15.5 16.5 17.5 18 18.5 19.5 20 21 21
-- o 4.5 3.5 3.5 3 2.5 2.5 2 3 4 4.5 5 6 6.5 7.5 8.5 9.5 10 11 11.5 12.5 13 14 15 16 17 17.5 18 19 19.5 20.5 21.5
-- n 5.5 4.5 4.5 4 3.5 3.5 3 3 4 4.5 5 6 6.5 7.5 8.5 9.5 10 11 11.5 12.5 13 14 15 16 17 17.5 18 19 19.5 20.5 21.5
-- o 6 5 5 4.5 4 4 3.5 3.5 3.5 4 4.5 5.5 6 7 8 9 9.5 10.5 11 12 12.5 13.5 14.5 15.5 16.5 17 17.5 18.5 19 20 21
-- u 6.5 5.5 5.5 5 4.5 4.5 4 4 4 4 4 5 5.5 6.5 7.5 8.5 9 10 10.5 11.5 12 13 14 15 16 16.5 17 18 18.5 19.5 20.5
-- l 7.5 6.5 6.5 6 5.5 5.5 5 5 5 4.5 4.5 5 5.5 6.5 7.5 8.5 9 10 10.5 11.5 12 13 14 15 16 16.5 17 18 18.5 19.5 20.5
-- t 8.5 7.5 7.5 7 6.5 6.5 6 6 6 5.5 5 5.5 5.5 6.5 7.5 8.5 9 10 10.5 11.5 12 12 13 14 15 15.5 16 17 17.5 18.5 19.5
-- r 9.5 8.5 8.5 8 7.5 7.5 7 7 7 6.5 6 6 6 6.5 7.5 8.5 9 10 10.5 10.5 11 12 13 14 14 14.5 15 16 16.5 17.5 18.5
-- a 10 9 9 8.5 8 8 7.5 7.5 7.5 7 6.5 6.5 6.5 6.5 7 8 8.5 9.5 10 11 10.5 11.5 12.5 13.5 14.5 14.5 15 15.5 16 17 18
-- m 11 10 10 9.5 9 9 8.5 8.5 8.5 8 7.5 7.5 7 7.5 7.5 8 8.5 9.5 10 11 11.5 11.5 12.5 13.5 14.5 15 15 16 16 17 17
-- i 11.5 10.5 10.5 10 9.5 9.5 9 9 9 8.5 8 8 7.5 7.5 8 8 8.5 9 9.5 10.5 11 12 12 13 14 14.5 15 15.5 16 16.5 17.5
-- c 12.5 11.5 11.5 11 10.5 10.5 10 10 10 9.5 9 9 8.5 8.5 8.5 9 8.5 9.5 9.5 10.5 11 12 13 13 14 14.5 15 16 16 17 17.5
-- r 13.5 12.5 12.5 12 11.5 11.5 11 11 11 10.5 10 10 9.5 9.5 9.5 9.5 9.5 9.5 10 9.5 10 11 12 13 13 13.5 14 15 15.5 16.5 17.5
-- o 14 13 13 12.5 12 12 11.5 11.5 11.5 11 10.5 10.5 10 10 10 10 9.5 10 10 10 10 10.5 11.5 12.5 13.5 13 13.5 14.5 15 16 17
-- s 15 14 13 13.5 13 13 12.5 12.5 11.5 12 11.5 11.5 11 11 11 11 10.5 10.5 10.5 11 10.5 11 11.5 12.5 13.5 14 13.5 14.5 15 15 16
-- c 16 15 14 13.5 14 14 13.5 13.5 12.5 12 12.5 12.5 12 12 12 12 11.5 11.5 11 11.5 11.5 11.5 12 12.5 13.5 14 14.5 14.5 15 16 16
-- o 16.5 15.5 14.5 14 14 14.5 14 14 13 12.5 12.5 13 12.5 12.5 12.5 12.5 12 12 11.5 11.5 12 12 12 12.5 13 13.5 14 15 15 15.5 16.5
-- p 17.5 16.5 15.5 15 14.5 15 15 14 14 13.5 13 13.5 13.5 13.5 13.5 12.5 13 12 12.5 12.5 12 13 13 13 13.5 13.5 14 15 15.5 16 16.5
-- i 18 17 16 15.5 15 15 15.5 14.5 14.5 14 13.5 13.5 14 14 14 13 13 12.5 12.5 13 12.5 12.5 13.5 13.5 13.5 14 13.5 14.5 15 16 16.5
-- c 19 18 17 16.5 16 16 15.5 15.5 15.5 15 14.5 14.5 14 15 15 14 13.5 13.5 13 13.5 13.5 13.5 13.5 14.5 14.5 14 14.5 14.5 15 16 17
-- s 20 19 18 17.5 17 17 16.5 16.5 15.5 16 15.5 15.5 15 15 16 15 14.5 14.5 14 14 14 14.5 14.5 14.5 15.5 15 14.5 15.5 15 15 16
-- i 20.5 19.5 18.5 18 17.5 17.5 17 17 16 16 16 16 15.5 15.5 15.5 15.5 15 15 14.5 14.5 14.5 14.5 15 15 15 15.5 15 15 15.5 15.5 15.5
-- l 21.5 20.5 19.5 19 18.5 18.5 18 18 17 16.5 16.5 17 16.5 16.5 16.5 16.5 16 16 15.5 15.5 15 15.5 15.5 16 16 15.5 16 16 15.5 16.5 16.5
-- i 22 21 20 19.5 19 19 18.5 18.5 17.5 17 17 17 17 17 17 17 16.5 16.5 16 16 15.5 15.5 16 16 16.5 16 15.5 16.5 16 16 17
-- c 23 22 21 20.5 20 20 19.5 19.5 18.5 18 17.5 18 17.5 18 18 18 17.5 17.5 17 17 16.5 16.5 16.5 17 17 17 16.5 16.5 17 17 17
-- o 23.5 22.5 21.5 21 20.5 20.5 20 20 19 18.5 18 18 18 18 18.5 18.5 18 18 17.5 17.5 17 17 17 17 17.5 17 17 17 17 17.5 17.5
-- v 24.5 23.5 22.5 22 21.5 21.5 21 21 20 19.5 19 19 18.5 19 19 19.5 19 19 18.5 18.5 18 18 18 18 18 18 17.5 18 17.5 18 18.5
-- o 25 24 23 22.5 22 22 21.5 21.5 20.5 20 19.5 19.5 19 19 19.5 19.5 19.5 19.5 19 19 18.5 18.5 18.5 18.5 18.5 18 18 18 18 18 18.5
-- l 26 25 24 23.5 23 23 22.5 22.5 21.5 21 20.5 20.5 20 20 20 20.5 20 20.5 20 20 19.5 19.5 19.5 19.5 19.5 19 18.5 19 18.5 19 19
-- c 27 26 25 24.5 24 24 23.5 23.5 22.5 22 21.5 21.5 21 21 21 21 21 21 21 21 20.5 20.5 20.5 20.5 20.5 20 19.5 19.5 19.5 19.5 20
-- a 27.5 26.5 25.5 25 24.5 24.5 24 24 23 22.5 22 22 21.5 21.5 21.5 21.5 21.5 21.5 21 21.5 21 21 21 21 21 20.5 20 20 20 20 20
-- n 28.5 27.5 26.5 26 25.5 25.5 25 25 24 23.5 23 23 22.5 22.5 22.5 22.5 22 22.5 22 22 22 22 22 22 22 21.5 21 21 20.5 21 21
-- o 29 28 27 26.5 26 26 25.5 25.5 24.5 24 23.5 23.5 23 23 23 23 22.5 22.5 22.5 22.5 22.5 22.5 22.5 22.5 22.5 22 21.5 21.5 21 21 21.5
-- c 30 29 28 27.5 27 27 26.5 26.5 25.5 25 24.5 24.5 24 24 24 24 23.5 23.5 23 23.5 23 23.5 23.5 23.5 23.5 23 22.5 22.5 22 22 22
-- o 30.5 29.5 28.5 28 27.5 27.5 27 27 26 25.5 25 25 24.5 24.5 24.5 24.5 24 24 23.5 23.5 23.5 23.5 24 24 24 23.5 23 23 22.5 22.5 22.5
-- n 31.5 30.5 29.5 29 28.5 28.5 28 28 27 26.5 26 26 25.5 25.5 25.5 25.5 25 25 24.5 24.5 24 24.5 24.5 25 25 24.5 24 24 23.5 23.5 23.5
-- i 32 31 30 29.5 29 29 28.5 28.5 27.5 27 26.5 26.5 26 26 26 26 25.5 25.5 25 25 24.5 24.5 25 25 25.5 25 24.5 24.5 24 24 24
-- o 32.5 31.5 30.5 30 29.5 29.5 29 29 28 27.5 27 27 26.5 26.5 26.5 26.5 26 26 25.5 25.5 25 25 25 25.5 25.5 25.5 25 25 24.5 24.5 24.5
-- s 33.5 32.5 31.5 31 30.5 30.5 30 30 29 28.5 28 28 27.5 27.5 27.5 27.5 27 27 26.5 26.5 26 26 26 26 26.5 26 26 26 25.5 24.5 25.5
-- i 34 33 32 31.5 31 31 30.5 30.5 29.5 29 28.5 28.5 28 28 28 28 27.5 27.5 27 27 26.5 26.5 26.5 26.5 26.5 26.5 26 26.5 26 25 25
-- s 35 34 33 32.5 32 32 31.5 31.5 30.5 30 29.5 29.5 29 29 29 29 28.5 28.5 28 28 27.5 27.5 27.5 27.5 27.5 27 27 27 27 26 26
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment