Last active
July 19, 2021 18:52
-
-
Save shanenoi/991623842360b8f4338f469f0ce094b7 to your computer and use it in GitHub Desktop.
Find Substring Using Levenshtein Distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"math" | |
"regexp" | |
) | |
var removePattern string = `(\s)` | |
var standarPatern string = `` | |
const stringMaxSize int = 100 | |
var defaultConst float64 = 1 // in some doc the cost is 2 | |
func LevenshteinDistance(string1, string2 string) int { | |
var lenStr1 = len(string1) | |
var lenStr2 = len(string2) | |
var matrix = [stringMaxSize][stringMaxSize]float64{} | |
var max = math.Max(float64(lenStr1), float64(lenStr2)) | |
var index float64 = 0 | |
for index = 0; index < max; index++ { | |
if index < float64(lenStr1) { | |
matrix[int(index)][0] = index | |
} | |
if index < float64(lenStr2) { | |
matrix[0][int(index)] = index | |
} | |
} | |
var cost float64 = 0 | |
for indexStr1 := 1; indexStr1 < lenStr1; indexStr1++ { | |
for indexStr2 := 1; indexStr2 < lenStr2; indexStr2++ { | |
if string1[indexStr1] == string2[indexStr2] { | |
cost = 0 | |
} else { | |
// cost = 1 | |
cost = defaultConst | |
} | |
matrix[indexStr1][indexStr2] = math.Min( | |
float64(matrix[indexStr1 - 1][indexStr2] + 1), | |
math.Min( | |
float64(matrix[indexStr1][indexStr2 - 1] + 1), | |
float64(matrix[indexStr1 - 1][indexStr2 - 1] + cost))) | |
} | |
} | |
return int(matrix[lenStr1 - 1][lenStr2 - 1]) | |
} | |
func GetPercentageSimular(distance, lenStr1, lenStr2 int) float64 { | |
return float64(lenStr1 + lenStr2 - distance) / float64(lenStr1 + lenStr2) | |
} | |
func SearchInclude(string1, string2 string) float64 { | |
reg := regexp.MustCompile(removePattern) | |
string1 = reg.ReplaceAllString(string1, standarPatern) | |
string2 = reg.ReplaceAllString(string2, standarPatern) | |
var lenStr1 int = len(string1) | |
var lenStr2 int = len(string2) | |
if lenStr1 > lenStr2 { | |
string1, string2 = string2, string1 | |
} | |
var index int = 0 | |
var result int = LevenshteinDistance(string1, string2[0 : lenStr1]) | |
for true { | |
index += lenStr1 | |
if !(index < lenStr2) { | |
break | |
} | |
tmp := LevenshteinDistance(string1, string2[index - lenStr1 : index]) | |
if tmp < result { | |
result = tmp | |
} | |
} | |
return GetPercentageSimular(result, lenStr1, lenStr2) | |
} | |
func main() { | |
// fmt.Println(LevenshteinDistance("một lầu + sân thượng", "một lầu sân thượng")) | |
fmt.Println(SearchInclude("Terminal In Required Directory", "once your code is done. just run the command \" gofmt -s -w .\" in terminal in required directory or else in needed file.")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment