CLI Tokenizer - feels like bash
function tokenize (input) { | |
function Token(type, isOpenUntil) { | |
this.type = type; | |
this.value = ""; | |
this.isOpenUntil = isOpenUntil || false; | |
} | |
var i = 0, | |
len = input.length, | |
tokens = [], | |
currentToken = new Token("EMPTY"), | |
currentChar; | |
function next (type, isOpenUntil) { | |
tokens.push(currentToken); | |
currentToken = new Token(type, isOpenUntil); | |
} | |
while (i < len) { | |
currentChar = input.charAt(i); | |
if (currentToken.isOpenUntil) { | |
if (currentChar == currentToken.isOpenUntil) | |
currentToken.isOpenUntil = false; | |
else | |
currentToken.value += currentChar; | |
} else { | |
if (currentChar == "\\") { | |
if (currentToken.type != "WORD") next("WORD"); | |
i++; | |
currentToken.value += input.charAt(i); | |
} else if (currentChar == "&") { | |
if (currentToken.type == "AMP") currentToken.type = "AND"; | |
else next("AMP"); | |
} else if (currentChar == "|") { | |
if (currentToken.type == "PIPE") currentToken.type = "OR"; | |
else next("PIPE"); | |
} else if (currentChar == ">") next("OUT"); | |
else if (currentChar == "<") next("IN"); | |
else if (currentChar == ";") next("SEMICOLON"); | |
else if (currentChar == "\"") next("STRING", "\""); | |
else if (currentChar == "'") next("STRING", "'"); | |
else if (currentChar == "(") next("SUBOPEN"); | |
else if (currentChar == ")") next("SUBCLOSE"); | |
else { | |
if (currentToken.type == "WORD") | |
currentToken.value += currentChar; | |
else { | |
next("WORD"); | |
currentToken.value = currentChar; | |
} | |
} | |
} | |
i++; | |
} | |
tokens.push(currentToken); // append current token | |
tokens.shift(); // remove empty token | |
return tokens; | |
} | |
var input = '(true||false)&& echo foo bar \\> > baz | world; echo "bla bla" && \'foo\''; | |
require("console").log(tokenize(input)); | |
/* result: | |
[ { type: 'SUBOPEN', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: 'true', isOpenUntil: false }, | |
{ type: 'OR', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: 'false', isOpenUntil: false }, | |
{ type: 'SUBCLOSE', value: '', isOpenUntil: false }, | |
{ type: 'AND', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: ' echo foo bar > ', isOpenUntil: false }, | |
{ type: 'OUT', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: ' baz ', isOpenUntil: false }, | |
{ type: 'PIPE', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: ' world', isOpenUntil: false }, | |
{ type: 'SEMICOLON', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: ' echo ', isOpenUntil: false }, | |
{ type: 'STRING', value: 'bla bla', isOpenUntil: false }, | |
{ type: 'WORD', value: ' ', isOpenUntil: false }, | |
{ type: 'AND', value: '', isOpenUntil: false }, | |
{ type: 'WORD', value: ' ', isOpenUntil: false }, | |
{ type: 'STRING', value: 'foo', isOpenUntil: false } ]*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment