Skip to content

Instantly share code, notes, and snippets.

@hideaki-t
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hideaki-t/1fa507f8bc7cad495f5b to your computer and use it in GitHub Desktop.
Save hideaki-t/1fa507f8bc7cad495f5b to your computer and use it in GitHub Desktop.
Unicode-aware Levenshtein distance by Rust
fn main() {
fn f(a:&str, b:&str) {
println!("{}:{} => {}", a, b, levenshtein_distance(a, b));
}
f("こんにちは", "こんばんは");
f("hello", "ell0");
f("hello", "hello");
f("こんにちは", "hello");
f("ABC", "ABC");
f("\u{1F369}", "\u{1F36A}");
}
fn levenshtein_distance(a:&str, b:&str) -> usize {
let ac: Vec<char> = a.chars().collect();
let bc: Vec<char> = b.chars().collect();
let alen = ac.len() + 1;
let blen = bc.len() + 1;
let mut mat = vec![vec![0; blen]; alen];
for i in (0..blen) { mat[0][i] = i; }
for i in (1..alen) {
mat[i][0] = i;
for j in (1..blen) {
let m = [mat[i - 1][j] + 1,
mat[i][j - 1] + 1,
mat[i - 1][j - 1] + if ac[i - 1] == bc[j - 1] { 0 } else { 1 }];
mat[i][j] = *m.iter().min().unwrap();
}
}
return mat[alen - 1][blen - 1];
}
こんにちは:こんばんは => 2
hello:ell0 => 2
hello:hello => 0
こんにちは:hello => 5
ABC:ABC => 1
🍩:🍪 => 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment