Skip to content

Instantly share code, notes, and snippets.

@richardkundl
Created November 14, 2013 15:15
Show Gist options
  • Save richardkundl/7468541 to your computer and use it in GitHub Desktop.
Save richardkundl/7468541 to your computer and use it in GitHub Desktop.
Get Sift3 distance from two words. Simmilar to the Levenshtein distance, but it's four time faster. Verifying that(http://norvig.com/ngrams/) data sets.
/// <summary>
/// Get Sift3 distance from two words.
/// (eg: spell checking)
/// </summary>
/// <param name="source"> source string </param>
/// <param name="actual"> actual string </param>
/// <param name="maxOffset"> max offset </param>
/// <returns> The distance of the two words. </returns>
public static float GetSift3Distance(string source, string actual, int maxOffset)
{
if (string.IsNullOrEmpty(source))
{
return string.IsNullOrEmpty(actual) ? 0 : actual.Length;
}
if (string.IsNullOrEmpty(actual))
{
return source.Length;
}
int c = 0;
int offset1 = 0;
int offset2 = 0;
int lcs = 0;
while ((c + offset1 < source.Length) && (c + offset2 < actual.Length))
{
if (source[c + offset1] == actual[c + offset2])
{
lcs++;
}
else
{
offset1 = 0;
offset2 = 0;
for (int i = 0; i < maxOffset; i++)
{
if ((c + i < source.Length) && (source[c + i] == actual[c]))
{
offset1 = i;
break;
}
if ((c + i < actual.Length) && (source[c] == actual[c + i]))
{
offset2 = i;
break;
}
}
}
c++;
}
return ((source.Length + actual.Length) / 2) - lcs;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment