Skip to content

Instantly share code, notes, and snippets.

@hydra1983
Last active December 28, 2015 15:39
Show Gist options
  • Save hydra1983/7523550 to your computer and use it in GitHub Desktop.
Save hydra1983/7523550 to your computer and use it in GitHub Desktop.
Jaro-Winkler Distance
public class JaroWinklerDistance {
public static void main(String[] args) {
String s1 = "MARTHA";
String s2 = "MARHTA";
// String s1 = "MARTHA";
// String s2 = "MARHTA";
// int range = Math.floor(Math.max(s1.length(),s2.length())/2) - 1 = 2;
// int isMatched = (s1.charAt(row) == s2.charAt(col) && Math.abs(row - col) <= range) ? 0 : 1;
// 0 1 2 3 4 5 t
// M A R H T A
//0 M 0 -1 -1 -1 -1 -1 0 == 0
//1 A -1 1 -1 -1 -1 -1 1 == 1
//2 R -1 -1 2 -1 -1 -1 2 == 2
//3 T -1 -1 -1 -1 4 -1 4 != 3
//4 H -1 -1 -1 3 -1 -1 3 != 4
//5 A -1 -1 -1 -1 -1 5 5 == 5
System.out.println(MessageFormat
.format("Jaro Distance of ({0} , {1}) is : {2}", s1, s2,
jaroDist(s1, s2)));
System.out.println(MessageFormat.format(
"Jaro Winkler Distance of ({0} , {1}) is : {2}", s1, s2,
jaroWinklerDist(s1, s2)));
}
private static double jaroDist(String s1, String s2) {
int len1 = s1.length();
int len2 = s2.length();
int[] array = new int[len2];
for (int j = 0; j < len2; j++) {
array[j] = -1;
}
int max = Math.max(s1.length(), s2.length());
int range = (int) Math.floor((double) max / 2) - 1;
double m = 0;
for (int i = 0; i < len1; i++) {
boolean isMatched = false;
for (int j = 0; j < len2; j++) {
if (s1.charAt(i) == s2.charAt(j) && Math.abs(j - i) <= range) {
array[j] = (int) m++;
isMatched = true;
}
if (isMatched) {
break;
}
}
}
if (m == 0) {
return 0;
}
int i = 0;
double t = 0;
for (int j = 0; j < len2; j++) {
if (array[j] != -1) {
if (array[j] != i++) {
t++;
}
}
}
t = t / 2;
return (m / len1 + m / len2 + (m - t) / m) / 3;
}
private static double jaroWinklerDist(String s1, String s2) {
double p = 0.1;
int l = 0;
int minL = Math.min(Math.min(s1.length(), s2.length()), 4);
for (int i = 0; i < minL; i++) {
if (s1.charAt(i) != s2.charAt(i)) {
break;
}
l++;
}
double dj = jaroDist(s1, s2);
return dj + p * l * (1 - dj);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment