Skip to content

Instantly share code, notes, and snippets.

@cordje
Created January 28, 2016 13:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cordje/bd13c781337d8ffc30bf to your computer and use it in GitHub Desktop.
Save cordje/bd13c781337d8ffc30bf to your computer and use it in GitHub Desktop.
Damerau Levenshtein Distance With Threshold
public class DamerauLevenshteinDistanceWithThreshold {
public static int distance(String source, String target, int threshold) {
//this code was ported to Java from http://stackoverflow.com/questions/9453731/how-to-calculate-distance-similarity-measure-of-given-2-strings/9454016#9454016
int length1 = source.length();
int length2 = target.length();
// Return trivial case - difference in string lengths exceeds threshhold
if (Math.abs(length1 - length2) > threshold) { return 2147483647; }
// Ensure arrays [i] / length1 use shorter length
if (length1 > length2) {
int tempLength = length1;
length1 = length2;
length2 = tempLength;
String tempArg = target;
target = source;
source = tempArg;
}
int maxi = length1;
int maxj = length2;
int[] dCurrent = new int[maxi + 1];
int[] dMinus1 = new int[maxi + 1];
int[] dMinus2 = new int[maxi + 1];
int[] dSwap;
for (int i = 0; i <= maxi; i++) { dCurrent[i] = i; }
int jm1 = 0, im1 = 0, im2 = -1;
for (int j = 1; j <= maxj; j++) {
// Rotate
dSwap = dMinus2;
dMinus2 = dMinus1;
dMinus1 = dCurrent;
dCurrent = dSwap;
// Initialize
int minDistance = 2147483647;
dCurrent[0] = j;
im1 = 0;
im2 = -1;
for (int i = 1; i <= maxi; i++) {
int cost = source.charAt(im1) == target.charAt(jm1) ? 0 : 1;
int del = dCurrent[im1] + 1;
int ins = dMinus1[i] + 1;
int sub = dMinus1[im1] + cost;
//Fastest execution for min value of 3 integers
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del);
if (i > 1 && j > 1 && source.charAt(im2) == target.charAt(jm1) && source.charAt(im1) == target.charAt(j - 2))
min = Math.min(min, dMinus2[im2] + cost);
dCurrent[i] = min;
if (min < minDistance) { minDistance = min; }
im1++;
im2++;
}
jm1++;
if (minDistance > threshold) { return 2147483647; }
}
int result = dCurrent[maxi];
return (result > threshold) ? 2147483647 : result;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment