Skip to content

Instantly share code, notes, and snippets.

@sonyseng
Last active April 17, 2016 04:23
Show Gist options
  • Save sonyseng/8657ba1f811df6f9b2e26b00f89fe9cd to your computer and use it in GitHub Desktop.
Save sonyseng/8657ba1f811df6f9b2e26b00f89fe9cd to your computer and use it in GitHub Desktop.
Will tokenize quoted and unquoted strings for searching
var str = '\"hello world\" arg1 arg2 \'multi arg that has nested strings \"haha hehe\"\' arg3 anotherArg _arg4_\'';
function tokenizer (str) {
var tokens = [];
var k, i, temp;
var strLen = str.length;
for (i = 0; i < strLen; i++) {
if (str[i] === '\"') {
for (temp = [], k = i+1; k < strLen && str[k] !== '\"'; k++) {
temp.push(str[k]);
}
tokens.push(temp.join(''));
i = k+1;
}
if (str[i] === '\'') {
for (temp = [], k = i+1; k < strLen && str[k] !== '\''; k++) {
temp.push(str[k]);
}
tokens.push(temp.join(''));
i = k+1;
}
if (str[i] && str[i] !== ' ') {
for (temp = [], k = i; k < strLen && str[k] !== ' '; k++) {
temp.push(str[k]);
}
tokens.push(temp.join(''));
i = k;
}
}
return tokens;
}
tokenizer(str);
/* Output:
array ==>
0:"hello world"
1:arg1"
2:"arg2"
3:"multi arg that has nested strings "haha hehe""
4:"arg3"
5:"anotherArg"
6:"_arg4_'"
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment