Created
July 12, 2014 22:02
-
-
Save musicm122/58a1313b95b9d577df84 to your computer and use it in GitHub Desktop.
TheTokenizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static List<string> Tokenize(string expression) | |
{ | |
//essentiall replaces double quotes with " and apostrophys with "'" | |
//I prefer dealing with searching for " then " as they tend to be a pain in the regex | |
var tempExpression = System.Security.SecurityElement.Escape(expression); | |
string doubleQuote = """; | |
//looks for an optional not and whitespace then an expression starting with a quote | |
//and ending with a quote" our representation of a string | |
//followed by whitespace and an optional logical operation that identifies the | |
//relationship of the condition to it sibling on the right if there is one | |
var regEx = string.Format("(not\\s+)?{0}(.+?){0}(and|or)?",doubleQuote); | |
Regex RE = new Regex(regEx); | |
//splits the expression by each capture group and trims the result | |
var result = (RE.Split(tempExpression)).Select (r => r.Trim()); | |
//While there shouldnt be any additional whitespace in the expression | |
//i make a second pass at removing it and converting the expression to a list | |
//(so I can use linq on it later) | |
return result.Where (re => !String.IsNullOrWhiteSpace(re)).ToList(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment