Created
January 28, 2016 13:32
-
-
Save cordje/bd13c781337d8ffc30bf to your computer and use it in GitHub Desktop.
Damerau Levenshtein Distance With Threshold
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class DamerauLevenshteinDistanceWithThreshold { | |
public static int distance(String source, String target, int threshold) { | |
//this code was ported to Java from http://stackoverflow.com/questions/9453731/how-to-calculate-distance-similarity-measure-of-given-2-strings/9454016#9454016 | |
int length1 = source.length(); | |
int length2 = target.length(); | |
// Return trivial case - difference in string lengths exceeds threshhold | |
if (Math.abs(length1 - length2) > threshold) { return 2147483647; } | |
// Ensure arrays [i] / length1 use shorter length | |
if (length1 > length2) { | |
int tempLength = length1; | |
length1 = length2; | |
length2 = tempLength; | |
String tempArg = target; | |
target = source; | |
source = tempArg; | |
} | |
int maxi = length1; | |
int maxj = length2; | |
int[] dCurrent = new int[maxi + 1]; | |
int[] dMinus1 = new int[maxi + 1]; | |
int[] dMinus2 = new int[maxi + 1]; | |
int[] dSwap; | |
for (int i = 0; i <= maxi; i++) { dCurrent[i] = i; } | |
int jm1 = 0, im1 = 0, im2 = -1; | |
for (int j = 1; j <= maxj; j++) { | |
// Rotate | |
dSwap = dMinus2; | |
dMinus2 = dMinus1; | |
dMinus1 = dCurrent; | |
dCurrent = dSwap; | |
// Initialize | |
int minDistance = 2147483647; | |
dCurrent[0] = j; | |
im1 = 0; | |
im2 = -1; | |
for (int i = 1; i <= maxi; i++) { | |
int cost = source.charAt(im1) == target.charAt(jm1) ? 0 : 1; | |
int del = dCurrent[im1] + 1; | |
int ins = dMinus1[i] + 1; | |
int sub = dMinus1[im1] + cost; | |
//Fastest execution for min value of 3 integers | |
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del); | |
if (i > 1 && j > 1 && source.charAt(im2) == target.charAt(jm1) && source.charAt(im1) == target.charAt(j - 2)) | |
min = Math.min(min, dMinus2[im2] + cost); | |
dCurrent[i] = min; | |
if (min < minDistance) { minDistance = min; } | |
im1++; | |
im2++; | |
} | |
jm1++; | |
if (minDistance > threshold) { return 2147483647; } | |
} | |
int result = dCurrent[maxi]; | |
return (result > threshold) ? 2147483647 : result; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment