Last active
August 31, 2017 17:54
-
-
Save keleshev/1ed2ae5b08c1603a5008b5f258d6933f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* Naive Levenshtein distance and the related diffing algorithm | |
$ ocaml levenshtein.ml | |
*) | |
let (=>) left right = print_char (if left = right then '.' else 'F') | |
module Levenshtein = struct | |
let minimum = List.fold_left min max_int | |
let rec distance left right = | |
match left, right with | |
| [], list | list, [] -> | |
List.length list | |
| x :: xs, y :: ys -> | |
let cost = if x = y then 0 else 1 in | |
minimum [ | |
distance xs right + 1; | |
distance left ys + 1; | |
distance xs ys + cost; | |
] | |
let () = () | |
; distance [] [] => 0 | |
; distance [1] [1] => 0 | |
; distance [1] [2] => 1 | |
; distance [] [1; 2] => 2 | |
; distance [1; 2] [] => 2 | |
; distance ['n'; 'e'; 't'; 't'; 'i'; 'k'] | |
['g'; 'n'; 'i'; 't'; 't'; 'i'; 's'] => 3 | |
; distance ['k'; 'i'; 't'; 't'; 'e'; 'n'] | |
['s'; 'i'; 't'; 't'; 'i'; 'n'; 'g'] => 3 | |
end | |
module Diff = struct | |
let shortest (head :: tail) = | |
List.fold_left | |
(fun l r -> if List.length l < List.length r then l else r) head tail | |
let rec diff left right = | |
match left, right with | |
| [], list -> | |
List.map ((^) "+") list | |
| list, [] -> | |
List.map ((^) "-") list | |
| x :: xs, y :: ys -> | |
let d = if x = y then x else x ^ "->" ^ y in | |
shortest [ | |
("-" ^ x) :: diff xs right; | |
("+" ^ y) :: diff left ys; | |
d :: diff xs ys; | |
] | |
let () = () | |
; diff [] [] => [] | |
; diff ["1"] ["1"] => ["1"] | |
; diff ["1"] ["2"] => ["1->2"] | |
; diff [] ["1"; "2"] => ["+1"; "+2"] | |
; diff ["1"; "2"] [] => ["-1"; "-2"] | |
; diff [ "k"; "i"; "t"; "t"; "e"; "n"] | |
[ "s"; "i"; "t"; "t"; "i"; "n"; "g"] | |
=> ["k->s"; "i"; "t"; "t"; "e->i"; "n"; "+g"] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment