Skip to content

Instantly share code, notes, and snippets.

@keleshev
Last active May 30, 2023 11:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keleshev/fa7c868f0d1176434341024aed4eaeb7 to your computer and use it in GitHub Desktop.
Save keleshev/fa7c868f0d1176434341024aed4eaeb7 to your computer and use it in GitHub Desktop.
let distance' ~bound a b =
let n = String.length a and m = String.length b in
assert (n >= m);
if n - m > bound then None else
let fill_bound = min m bound in
let previous = Array.make (m + 1) max_int in
for j = 0 to fill_bound do previous.(j) <- j done;
let current = Array.make (m + 1) max_int in
for i = 0 to n - 1 do
current.(0) <- i + 1;
(* Compute stripe indices *)
let stripe_start = max 0 (i - fill_bound)
and stripe_end = min m (i + fill_bound) - 1 in
(* Ignore entry left of leftmost *)
if stripe_start > 0 then current.(stripe_start) <- max_int;
for j = stripe_start to stripe_end do
current.(j + 1) <-
if a.[i] = b.[j] then
previous.(j)
else
let deletion = previous.(j + 1)
and insertion = current.(j)
and substitution = previous.(j) in
let smallest = min deletion (min insertion substitution) in
if smallest = max_int then max_int else smallest + 1;
done;
for j = 0 to m do previous.(j) <- current.(j) done;
done;
let distance = previous.(m) in
if distance > bound then None else Some distance
(** Bounded Levenshtein distance
Based on https://www.baeldung.com/cs/levenshtein-distance-computation
let m = min(length(a), length(b))
Time: O(m * min(m, bound))
Space: O(m)
By default, bound is max_int (unbounded).
*)
let distance ?(bound=max_int) a b =
let n = String.length a and m = String.length b in
if n > m then distance' ~bound a b else distance' ~bound b a
let test =
assert (distance ~bound:max_int "asdfghjkl" "lkjhgfdsa" = Some 8);
assert (distance ~bound:100 "asdfghjkl" "lkjhxfdsa" = Some 9);
assert (distance ~bound:2 "sydny meyer" "sydney meier" = Some 2);
assert (distance ~bound:2 "sydny meyer" "sydney meiex" = None);
assert (distance "<helljlkjlkjlkjlkjlkjlklkjlkjlo>"
"--hellosdfsdfsdfsdfsdfsdfsdfsdfsdfsdfsdf" = Some 36)
let (let*) = Option.bind
let test2 =
let contenders = ["pull"; "push"; "clone"; "init"; "filter-branch"] in
let typo = "puls" in
let contenders = List.filter_map (fun s ->
let* d = distance ~bound:2 typo s in
Some (d, s)) contenders in
assert (List.sort compare contenders = [1, "pull"; 2, "push"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment