Last active
May 3, 2020 11:11
-
-
Save xinlc/306fea9f9506f63a8849aab0e1804948 to your computer and use it in GitHub Desktop.
similarity算法,编辑距离
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function compare(x, y) { | |
var z = 0; | |
var s = x.length + y.length;; | |
x.sort(); | |
y.sort(); | |
var a = x.shift(); | |
var b = y.shift(); | |
while(a !== undefined && b !== undefined) { | |
if (a === b) { | |
z++; | |
a = x.shift(); | |
b = y.shift(); | |
} else if (a < b) { | |
a = x.shift(); | |
} else if (a > b) { | |
b = y.shift(); | |
} | |
} | |
return z/s * 200; | |
} | |
console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'])) | |
console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'].sort())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* 相似度算法之编辑距离(Levenshtein Distance) | |
* | |
* @author Leo | |
* @since 2020.05-03 | |
*/ | |
public class SimilarityUtil { | |
public static float getSimilarityRatio(String str, String target) { | |
return 1 - (float) compare(str, target) / Math.max(str.length(), target.length()); | |
} | |
private static int compare(String str, String target) { | |
int d[][]; // 矩阵 | |
int n = str.length(); | |
int m = target.length(); | |
int i; // 遍历str的 | |
int j; // 遍历target的 | |
char ch1; // str的 | |
char ch2; // target的 | |
int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1 | |
if (n == 0) { | |
return m; | |
} | |
if (m == 0) { | |
return n; | |
} | |
d = new int[n + 1][m + 1]; | |
for (i = 0; i <= n; i++) { // 初始化第一列 | |
d[i][0] = i; | |
} | |
for (j = 0; j <= m; j++) { // 初始化第一行 | |
d[0][j] = j; | |
} | |
for (i = 1; i <= n; i++) { // 遍历str | |
ch1 = str.charAt(i - 1); | |
// 去匹配target | |
for (j = 1; j <= m; j++) { | |
ch2 = target.charAt(j - 1); | |
if (ch1 == ch2) { | |
temp = 0; | |
} else { | |
temp = 1; | |
} | |
// 左边+1,上边+1, 左上角+temp取最小 | |
d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp); | |
} | |
} | |
return d[n][m]; | |
} | |
private static int min(int one, int two, int three) { | |
return (one = one < two ? one : two) < three ? one : three; | |
} | |
/** | |
* 采用动态规划的方法(字符串匹配相似度) | |
* | |
* @param source 源 | |
* @param target 要匹配的字符串 | |
* @return | |
*/ | |
public static int EditDistance(String source, String target) { | |
char[] sources = source.toCharArray(); | |
char[] targets = target.toCharArray(); | |
int sourceLen = sources.length; | |
int targetLen = targets.length; | |
int[][] d = new int[sourceLen + 1][targetLen + 1]; | |
for (int i = 0; i <= sourceLen; i++) { | |
d[i][0] = i; | |
} | |
for (int i = 0; i <= targetLen; i++) { | |
d[0][i] = i; | |
} | |
for (int i = 1; i <= sourceLen; i++) { | |
for (int j = 1; j <= targetLen; j++) { | |
if (sources[i - 1] == targets[j - 1]) { | |
d[i][j] = d[i - 1][j - 1]; | |
} else { | |
//插入 | |
int insert = d[i][j - 1] + 1; | |
//删除 | |
int delete = d[i - 1][j] + 1; | |
//替换 | |
int replace = d[i - 1][j - 1] + 1; | |
// d[i][j] = Math.min(Math.min(insert, delete), Math.min(delete, replace)); | |
d[i][j] = Math.min(insert, delete) > Math.min(delete, replace) ? Math.min(delete, replace) : | |
Math.min(insert, delete); | |
} | |
} | |
} | |
return d[sourceLen][targetLen]; | |
} | |
public static void main(String[] args) { | |
System.out.println(SimilarityUtil.getSimilarityRatio("My string", "My tsring")); | |
System.out.println(SimilarityUtil.getSimilarityRatio("My string", "My ntrisg")); | |
System.out.println(SimilarityUtil.getSimilarityRatio("我爱中国", "我爱国")); | |
System.out.println(SimilarityUtil.EditDistance("My string", "My tsring")); | |
System.out.println(SimilarityUtil.EditDistance("My string", "My ntrisg")); | |
System.out.println(SimilarityUtil.EditDistance("我爱中国", "我爱国")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment