Last active
December 28, 2015 15:39
-
-
Save hydra1983/7523550 to your computer and use it in GitHub Desktop.
Jaro-Winkler Distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class JaroWinklerDistance { | |
public static void main(String[] args) { | |
String s1 = "MARTHA"; | |
String s2 = "MARHTA"; | |
// String s1 = "MARTHA"; | |
// String s2 = "MARHTA"; | |
// int range = Math.floor(Math.max(s1.length(),s2.length())/2) - 1 = 2; | |
// int isMatched = (s1.charAt(row) == s2.charAt(col) && Math.abs(row - col) <= range) ? 0 : 1; | |
// 0 1 2 3 4 5 t | |
// M A R H T A | |
//0 M 0 -1 -1 -1 -1 -1 0 == 0 | |
//1 A -1 1 -1 -1 -1 -1 1 == 1 | |
//2 R -1 -1 2 -1 -1 -1 2 == 2 | |
//3 T -1 -1 -1 -1 4 -1 4 != 3 | |
//4 H -1 -1 -1 3 -1 -1 3 != 4 | |
//5 A -1 -1 -1 -1 -1 5 5 == 5 | |
System.out.println(MessageFormat | |
.format("Jaro Distance of ({0} , {1}) is : {2}", s1, s2, | |
jaroDist(s1, s2))); | |
System.out.println(MessageFormat.format( | |
"Jaro Winkler Distance of ({0} , {1}) is : {2}", s1, s2, | |
jaroWinklerDist(s1, s2))); | |
} | |
private static double jaroDist(String s1, String s2) { | |
int len1 = s1.length(); | |
int len2 = s2.length(); | |
int[] array = new int[len2]; | |
for (int j = 0; j < len2; j++) { | |
array[j] = -1; | |
} | |
int max = Math.max(s1.length(), s2.length()); | |
int range = (int) Math.floor((double) max / 2) - 1; | |
double m = 0; | |
for (int i = 0; i < len1; i++) { | |
boolean isMatched = false; | |
for (int j = 0; j < len2; j++) { | |
if (s1.charAt(i) == s2.charAt(j) && Math.abs(j - i) <= range) { | |
array[j] = (int) m++; | |
isMatched = true; | |
} | |
if (isMatched) { | |
break; | |
} | |
} | |
} | |
if (m == 0) { | |
return 0; | |
} | |
int i = 0; | |
double t = 0; | |
for (int j = 0; j < len2; j++) { | |
if (array[j] != -1) { | |
if (array[j] != i++) { | |
t++; | |
} | |
} | |
} | |
t = t / 2; | |
return (m / len1 + m / len2 + (m - t) / m) / 3; | |
} | |
private static double jaroWinklerDist(String s1, String s2) { | |
double p = 0.1; | |
int l = 0; | |
int minL = Math.min(Math.min(s1.length(), s2.length()), 4); | |
for (int i = 0; i < minL; i++) { | |
if (s1.charAt(i) != s2.charAt(i)) { | |
break; | |
} | |
l++; | |
} | |
double dj = jaroDist(s1, s2); | |
return dj + p * l * (1 - dj); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment