Created
February 1, 2015 21:24
-
-
Save MatthewVines/e7f00481abad5b00326e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class LevenshteinDistance | |
{ | |
/// <summary> | |
/// Compute the distance between two strings. | |
/// The distance between the strings is defined as the minimum | |
/// number of edits needed to transform one string into the other, | |
/// with the allowable edit operations being insertion, deletion, | |
/// or substitution of a single character. | |
/// </summary> | |
public static int CalculateDistance(string s, string t) | |
{ | |
int n = s.Length; | |
int m = t.Length; | |
int[,] d = new int[n + 1, m + 1]; | |
if (s == t) { return 0; } | |
if (n == 0) { return m; } | |
if (m == 0) { return n; } | |
for (int i = 0; i <= n; d[i, 0] = i++) {} | |
for (int i = 0; i <= m; d[0, i] = i++) {} | |
for (int i = 1; i <= n; i++) | |
{ | |
for (int j = 1; j <= m; j++) | |
{ | |
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1; | |
d[i, j] = Math.Min(Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost); | |
} | |
} | |
return d[n, m]; | |
} | |
/// <summary> | |
/// Gets the similarity between 2 strings as a fractional value between [0, 1]. | |
/// </summary> | |
/// <returns> | |
/// Returns relation scores as a percentage in the [0, 1] range, | |
/// which means that if the score gets a maximum value (equal to 1) | |
/// then the two string are absolutely similar | |
/// </returns> | |
public static double GetSimilarity(String s1, String s2) | |
{ | |
if (s1 == null || s2 == null) | |
{ | |
return 0d; | |
} | |
double distance = CalculateDistance(s1, s2); | |
double maxLength = Math.Max(s1.Length, s2.Length); | |
if (maxLength == 0d) | |
{ | |
return 1d; | |
} | |
return 1d - (distance / maxLength); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment