Skip to content

Instantly share code, notes, and snippets.

@musicm122
Created July 12, 2014 22:02
Show Gist options
  • Save musicm122/58a1313b95b9d577df84 to your computer and use it in GitHub Desktop.
Save musicm122/58a1313b95b9d577df84 to your computer and use it in GitHub Desktop.
TheTokenizer
public static List<string> Tokenize(string expression)
{
//essentiall replaces double quotes with &quot; and apostrophys with "&apos;"
//I prefer dealing with searching for &quot; then " as they tend to be a pain in the regex
var tempExpression = System.Security.SecurityElement.Escape(expression);
string doubleQuote = "&quot;";
//looks for an optional not and whitespace then an expression starting with a quote
//and ending with a quote" our representation of a string
//followed by whitespace and an optional logical operation that identifies the
//relationship of the condition to it sibling on the right if there is one
var regEx = string.Format("(not\\s+)?{0}(.+?){0}(and|or)?",doubleQuote);
Regex RE = new Regex(regEx);
//splits the expression by each capture group and trims the result
var result = (RE.Split(tempExpression)).Select (r => r.Trim());
//While there shouldnt be any additional whitespace in the expression
//i make a second pass at removing it and converting the expression to a list
//(so I can use linq on it later)
return result.Where (re => !String.IsNullOrWhiteSpace(re)).ToList();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment