Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save thedom85/a8bf25cb9fd609fe88cd to your computer and use it in GitHub Desktop.
Save thedom85/a8bf25cb9fd609fe88cd to your computer and use it in GitHub Desktop.
CSharp_ JaroWinklerDistance_distance_ MeasureOfSimilarityBetweenTwoStrings
void Main()
{
//Inspired By: http://stackoverflow.com/questions/19123506/jaro-winkler-distance-algorithm-in-c-sharp
//Inspired By: http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
Console.Write(string.Format("distance(apple,mela): {0} \n",JaroWinklerDistance.distance("apple","apple")));
Console.Write(string.Format("distance(aa,ab): {0} \n",JaroWinklerDistance.distance("aa","ab")));
Console.Write(string.Format("distance(apple,aplpe): {0} \n",JaroWinklerDistance.distance("apple","aplpe")));
Console.Write(string.Format("distance(apple,pear): {0} \n",JaroWinklerDistance.distance("apple","pear")));
Console.Write(string.Format("distance(apple,Banana): {0} \n",JaroWinklerDistance.distance("apple","Banana")));
Console.Write(string.Format("distance(apple,Kiwi): {0} \n",JaroWinklerDistance.distance("apple","Kiwi")));
//Out:
//distance(apple,mela): 0
//distance(aa,ab): 0,333333333333333
//distance(apple,aplpe): 0,0533333333333335
//distance(apple,pear): 0,516666666666667
//distance(apple,Banana): 0,544444444444445
//distance(apple,Kiwi): 1
}
public static class JaroWinklerDistance
{
/* The Winkler modification will not be applied unless the
* percent match was at or above the mWeightThreshold percent
* without the modification.
* Winkler's paper used a default value of 0.7
*/
private static readonly double mWeightThreshold = 0.7;
/* Size of the prefix to be concidered by the Winkler modification.
* Winkler's paper used a default value of 4
*/
private static readonly int mNumChars = 4;
/// <summary>
/// Returns the Jaro-Winkler distance between the specified
/// strings. The distance is symmetric and will fall in the
/// range 0 (perfect match) to 1 (no match).
/// </summary>
/// <param name="aString1">First String</param>
/// <param name="aString2">Second String</param>
/// <returns></returns>
public static double distance(string aString1, string aString2) {
return 1.0 - proximity(aString1,aString2);
}
/// <summary>
/// Returns the Jaro-Winkler distance between the specified
/// strings. The distance is symmetric and will fall in the
/// range 0 (no match) to 1 (perfect match).
/// </summary>
/// <param name="aString1">First String</param>
/// <param name="aString2">Second String</param>
/// <returns></returns>
public static double proximity(string aString1, string aString2)
{
int lLen1 = aString1.Length;
int lLen2 = aString2.Length;
if (lLen1 == 0)
return lLen2 == 0 ? 1.0 : 0.0;
int lSearchRange = Math.Max(0,Math.Max(lLen1,lLen2)/2 - 1);
bool[] lMatched1 = new bool[lLen1];
for (int i = 0; i < lMatched1.Length; i++)
{
lMatched1[i] = false;
}
bool[] lMatched2 = new bool[lLen2];
for (int i = 0; i < lMatched2.Length; i++)
{
lMatched2[i] = false;
}
int lNumCommon = 0;
for (int i = 0; i < lLen1; ++i) {
int lStart = Math.Max(0,i-lSearchRange);
int lEnd = Math.Min(i+lSearchRange+1,lLen2);
for (int j = lStart; j < lEnd; ++j) {
if (lMatched2[j]) continue;
if (aString1[i] != aString2[j])
continue;
lMatched1[i] = true;
lMatched2[j] = true;
++lNumCommon;
break;
}
}
if (lNumCommon == 0) return 0.0;
int lNumHalfTransposed = 0;
int k = 0;
for (int i = 0; i < lLen1; ++i) {
if (!lMatched1[i]) continue;
while (!lMatched2[k]) ++k;
if (aString1[i] != aString2[k])
++lNumHalfTransposed;
++k;
}
// System.Diagnostics.Debug.WriteLine("numHalfTransposed=" + numHalfTransposed);
int lNumTransposed = lNumHalfTransposed/2;
// System.Diagnostics.Debug.WriteLine("numCommon=" + numCommon + " numTransposed=" + numTransposed);
double lNumCommonD = lNumCommon;
double lWeight = (lNumCommonD/lLen1
+ lNumCommonD/lLen2
+ (lNumCommon - lNumTransposed)/lNumCommonD)/3.0;
if (lWeight <= mWeightThreshold) return lWeight;
int lMax = Math.Min(mNumChars,Math.Min(aString1.Length,aString2.Length));
int lPos = 0;
while (lPos < lMax && aString1[lPos] == aString2[lPos])
++lPos;
if (lPos == 0) return lWeight;
return lWeight + 0.1 * lPos * (1.0 - lWeight);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment