Skip to content

Instantly share code, notes, and snippets.

@wong2
Created October 5, 2012 08:01
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save wong2/3838670 to your computer and use it in GitHub Desktop.
Levenshtein Distance Algorithm
package main
import (
"fmt"
"levenshtein"
)
func main() {
r1 := levenshtein.GetDistance("今天的话", "明天再说")
r2 := levenshtein.GetSimilarity("今天天气不错, hello world", "今天天气不错呢, bye bye world")
fmt.Println(r1, r2)
}
package levenshtein
import (
"math"
"unicode/utf8"
)
// http://en.wikipedia.org/wiki/Levenshtein_distance
func GetDistance(s1, s2 string) int {
var cost, m, n, i, j int
m, n = utf8.RuneCountInString(s1), utf8.RuneCountInString(s2)
if m == 0 {
return n
}
if n == 0 {
return m
}
d := make([][]int, m + 1)
for i = 0; i <= m; i++ {
d[i] = make([]int, n + 1)
}
for i = 0; i <= m; i++ {
d[i][0] = i
}
for j = 0; j <= n; j++ {
d[0][j] = j
}
i = 1
for _, c1 := range s1 {
j = 1
for _, c2 := range s2 {
if c1 == c2 {
cost = 0
} else {
cost = 1
}
min1 := float64(d[i-1][j] + 1)
min2 := float64(d[i][j-1] + 1)
min3 := float64(d[i-1][j-1] + cost)
d[i][j] = int(math.Min(math.Min(min1, min2), min3))
j += 1
}
i += 1
}
return d[m][n]
}
// http://www.miislita.com/searchito/levenshtein-edit-distance.html
func GetSimilarity(s1, s2 string) float64 {
dis := float64(GetDistance(s1, s2))
max_len := math.Max( float64(utf8.RuneCountInString(s1)), float64(utf8.RuneCountInString(s2)) )
return 1 - ( dis / max_len )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment