Skip to content

Instantly share code, notes, and snippets.

@whyrusleeping
Created December 1, 2012 21:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save whyrusleeping/4185399 to your computer and use it in GitHub Desktop.
Save whyrusleeping/4185399 to your computer and use it in GitHub Desktop.
fuzzy string comp
public const float SCALE = 1.5;
public double compStr(string a, string b)
{
return (double)lcSubString2(a, b)/ Math.Pow((a.Length > b.Length ? a.Length : b.Length), SCALE);
}
public int lcSubString2(string _a, string _b)
{
string max = "";
int maxai = 0;
int maxbi = 0;
string substr = "";
for (int i = 0; i < _a.Length; i++)
{
for (int j = 0; j < _b.Length; j++)
{
substr = string.Empty;
for (int k = 0; i + k < _a.Length && j + k < _b.Length; k++)
{
if (_a[i + k] == ' ' || _b[j + k] == ' ')
break;
if (_a[i + k] == _b[j + k])
{
substr += _a[i + k];
}
else
{
break;
}
if (substr.Length > max.Length)
{
max = substr;
maxai = i;
maxbi = j;
}
}
}
}
string string1_part1 = _a.Substring(0, _a.IndexOf(max));
string string1_part2 = _a.Substring(_a.IndexOf(max) + max.Length, _a.Length - (_a.IndexOf(max) + max.Length));
string minusa = string1_part1 + " " + string1_part2;
string1_part1 = _b.Substring(0, _b.IndexOf(max));
string1_part2 = _b.Substring(_b.IndexOf(max) + max.Length, _b.Length - (_b.IndexOf(max) + max.Length));
string minusb = string1_part1 + " " + string1_part2;
int retval = Math.Pow( max.Length, SCALE);
if (minusa.Length > 1 && minusb.Length > 1 && max.Length > 0)
retval += lcSubString2(minusa, minusb);
return retval;
}
//Compare Strings
//Returns a percentage likelyhood that strings a and b are the same
double compStr(string a, string b)
{
return (double)lcSubStr(a,b) / (a.length() > b.length() ? a.length() : b.length());
}
//Longest Common Substrings
//returns the number of characters in all substrings that a and b share
int lcSubStr(string a, string b)
{
string max = "";
int maxai = 0;
int maxbi = 0;
string substr = "";
for(int i = 0; i < a.length(); i++)
{
for(int j = 0; j < b.length(); j++)
{
substr = "";
for(int k = 0; k + i < a.length() && k + j < b.length(); k++)
{
if(a[i+k] == b[j+k])
substr += a[i+k];
else
break;
}
if(substr.length() > max.length())
{
max = substr;
maxai = i;
maxbi = j;
}
}
}
int retval = 0;
if(max.length() > 0)
{
//the left portion of the string 'a'
string aleft = a.substr(0,maxai);
//the right portion of the string 'a'
string aright = a.substr(maxai + max.length());
string bleft = b.substr(0,maxbi);
string bright = b.substr(maxbi + max.length());
//if there are characters to compare on the left side, do so
if(aleft.length() > 0 && bleft.length() > 0)
retval += lcSubStr(aleft, bleft);
//same for the right side
if(aright.length() > 0 && bright.length() > 0)
retval += lcSubStr(aright, bright);
}
//return the total number of shared characters
return retval + max.length();
}
double compStr(string a, string b)
{
return (double)lcSubStr(a,b) / (a.length() > b.length() ? a.length() : b.length());
}
int lcSubStr(string a, string b)
{
string max = "";
int maxai = 0;
int maxbi = 0;
string substr = "";
for(int i = 0; i < a.length(); i++)
{
for(int j = 0; j < b.length(); j++)
{
substr = "";
for(int k = 0; k + i < a.length() && k + j < b.length(); k++)
{
if(a[i+k] == b[j+k])
substr += a[i+k];
else
break;
}
if(substr.length() > max.length())
{
max = substr;
maxai = i;
maxbi = j;
}
}
}
int retval = 0;
if(max.length() > 0)
{
string aleft = a.substr(0,maxai);
string aright = a.substr(maxai + max.length());
string bleft = b.substr(0,maxbi);
string bright = b.substr(maxbi + max.length());
if(aleft.length() > 0 && bleft.length() > 0)
retval += lcSubStr(aleft, bleft);
if(aright.length() > 0 && bright.length() > 0)
retval += lcSubStr(aright, bright);
}
return retval + max.length();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment