Skip to content

Instantly share code, notes, and snippets.

@mAlishera
Created March 3, 2019 21:57
Show Gist options
  • Save mAlishera/0ff25d94f09f307f127d6dcfe5da12d6 to your computer and use it in GitHub Desktop.
Save mAlishera/0ff25d94f09f307f127d6dcfe5da12d6 to your computer and use it in GitHub Desktop.
Proper Damerau-Levenshtein edit distance algorithm in Go
package damerau_levenshtein
// O(|s| x |t|)
// a and b are zero-indexed, not one-indexed
func Distance(s, t string) int {
var i, j, cost int
m, n := len(s), len(t)
d := make([][]int, m+1)
for i = 0; i <= m; i++ {
d[i] = make([]int, n+1)
d[i][0] = i
}
// d[0][0] already set by previous loop
for j = 1; j <= n; j++ {
d[0][j] = j
}
for i = 0; i < m; i++ {
for j = 0; j < n; j++ {
if s[i] == t[j] {
cost = 0
} else {
cost = 1
}
new_d := d[i][j+1] + 1 // deletion
if insert_cost := d[i+1][j] + 1; insert_cost < new_d {
new_d = insert_cost
}
if subst_cost := d[i][j] + cost; subst_cost < new_d {
new_d = subst_cost
}
if i > 0 && j > 0 && s[i] == t[j-1] && s[i-1] == t[j] {
if trans_cost := d[i-1][j-1] + cost; trans_cost < new_d {
new_d = trans_cost
}
}
d[i+1][j+1] = new_d
}
}
return d[m][n]
}
package damerau_levenshtein
import (
"testing"
"testing/quick"
)
func TestFuzz(t *testing.T) {
f := func(s0, s1 string) bool {
t.Logf("testing Distance(len: %d, len: %d)\n", len(s0), len(s1))
d := Distance(s0, s1)
t.Logf("testing Distance(len: %d, len: %d) -> %d\n", len(s0), len(s1), d)
return d >= 0
}
if err := quick.Check(f, &quick.Config{MaxCount: 1000, MaxCountScale: 1.0}); err != nil {
t.Error(err)
}
}
func TestTwoEmpty(t *testing.T) {
if Distance("", "") != 0 {
t.Error("oops")
}
}
func TestOneEmpty(t *testing.T) {
if Distance("a", "") != 1 {
t.Error("oops0")
}
if Distance("", "a") != 1 {
t.Error("oops1")
}
}
func TestOneEqual(t *testing.T) {
if Distance("a", "a") != 0 {
t.Error("oops0")
}
if Distance("b", "b") != 0 {
t.Error("oops1")
}
}
func TestTrans(t *testing.T) {
if d := Distance("ab", "ba"); d != 1 {
t.Error("oops0", d, "!= 1")
}
if d := Distance("abc", "bac"); d != 1 {
t.Error("oops1", d, "!= 1")
}
if d := Distance("abb", "bab"); d != 1 {
t.Error("oops2", d, "!= 1")
}
if d := Distance("abc", "acb"); d != 1 {
t.Error("oops3", d, "!= 1")
}
if d := Distance("ab", "b"); d != 1 {
t.Error("oops4", d, "!= 1")
}
if d := Distance("abcdef", "abdcef"); d != 1 {
t.Error("oops5", d, "!= 1")
}
}
func TestSubst(t *testing.T) {
if d := Distance("abcdefgh", "abcZefgh"); d != 1 {
t.Error("oops0", d, "!= 1")
}
}
func TestDel(t *testing.T) {
if d := Distance("abcdefgh", "abcefgh"); d != 1 {
t.Error("oops0", d, "!= 1")
}
if d := Distance("bcdefgh", "abcdefgh"); d != 1 {
t.Error("oops1", d, "!= 1")
}
if d := Distance("abcdefgh", "abcdefg"); d != 1 {
t.Error("oops2", d, "!= 1")
}
}
func TestAdd(t *testing.T) {
if d := Distance("abcdefgh", "abcdefghi"); d != 1 {
t.Error("oops0", d, "!= 1")
}
if d := Distance("abcdefgh", "abcdZefgh"); d != 1 {
t.Error("oops1", d, "!= 1")
}
if d := Distance("abcdefgh", "0abcdefgh"); d != 1 {
t.Error("oops2", d, "!= 1")
}
}
func TestTwoUnequal(t *testing.T) {
if d := Distance("abcdefgh", "abcZegh"); d != 2 {
t.Error("oops0", d, "!= 2")
}
if d := Distance("abcdefgh", "abZegh"); d != 3 {
t.Error("oops1", d, "!= 3")
}
if d := Distance("abcdefgh", "abcZgh"); d != 3 {
t.Error("oops2", d, "!= 3")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment