Skip to content

Instantly share code, notes, and snippets.

@RomanN
Created August 13, 2009 11:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RomanN/167120 to your computer and use it in GitHub Desktop.
Save RomanN/167120 to your computer and use it in GitHub Desktop.
// no perl - just simple Aho-Corasick implementation
private static string CleanInput(string badString)
{
CharReplacement[] replacements = new[]
{
new CharReplacement() { GoodChar = (char) 39,
BadChars = new[] { (char) 96, (char) 8216, (char) 8217, (char) 8242, (char) 769, (char) 768 }
},
new CharReplacement() { GoodChar = (char) 34,
BadChars = new[] { (char) 8220, (char) 8221, (char) 8243, (char) 12291 }
},
new CharReplacement() { GoodChar = (char) 32,
BadChars = new[] { (char)160, (char)8195, (char)8194 }
},
new CharReplacement() { GoodChar = (char) 45,
BadChars = new[] { (char)8208, (char)8211, (char)8212, (char)8722, (char)173, (char)8209, (char)8259 }
}
};
// from:http://www.codeproject.com/KB/recipes/ahocorasick.aspx
// but TURN OFF caching in app domain in it's implementation
IList<EeekSoft.Text.StringSearch> searchers = new List<EeekSoft.Text.StringSearch>();
foreach (var item in replacements)
{
var searcher = new EeekSoft.Text.StringSearch();
IList<string> keywords = new List<string>();
foreach (var keyword in item.BadChars)
{
keywords.Add(Convert.ToString(keyword));
}
searcher.Keywords = keywords.ToArray();
searchers.Add(searcher);
}
StringBuilder sb = new StringBuilder(badString);
for (int i = 0; i < searchers.Count; i++)
{
var results = searchers[i].FindAll(badString);
foreach (var result in results)
{
sb[result.Index] = replacements[i].GoodChar;
}
}
return sb.ToString();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment