Skip to content

Instantly share code, notes, and snippets.

@trcio
Last active February 16, 2019 03:44
Show Gist options
  • Save trcio/6a9276c7891ba83d6a834f9f2269c340 to your computer and use it in GitHub Desktop.
Save trcio/6a9276c7891ba83d6a834f9f2269c340 to your computer and use it in GitHub Desktop.
SE 3377 CLI Parser and Tokenizer
#include <iostream>
#include <iomanip>
#include <list>
#include <stack>
#include <string>
using namespace std;
enum class TokenType
{
Command,
Option,
Argument,
SpecialCharacter,
Comment
};
class Token
{
public:
string Value;
TokenType Type;
Token(string value, TokenType type)
{
Value = value;
Type = type;
};
string TypeToString()
{
switch (Type)
{
case TokenType::Command:
return "Command";
case TokenType::Option:
return "Option";
case TokenType::Argument:
return "Argument";
case TokenType::SpecialCharacter:
return "Special Character";
case TokenType::Comment:
return "Comment";
}
return "";
};
void Print()
{
// max() ensures there is always space between the value and type of token
cout << left << setw(max(50, (int) Value.length() + 5)) << setfill(' ') << Value << TypeToString() << endl;
};
};
class Parser
{
private:
stack<char> CharStack;
string Input, Buffer;
TokenType CurrentType;
bool IsInsideOfPair(char c)
{
return !CharStack.empty() && CharStack.top() == c;
};
bool IsInsideOfQuotes()
{
return IsInsideOfPair('\'') || IsInsideOfPair('\"');
};
bool IsEscaped(int i)
{
// true if there are an odd # of consecutive \'s behind the 'i' index
int count = 0;
for (int j = i - 1; j >= 0; j++)
{
if (Input[j] == '\\')
count++;
else
break;
}
return count % 2 > 0;
};
bool HandleSpecialCharacter(int i)
{
char c = Input[i];
// escape all characters with a backslash behind them, outside of single quotes
if (IsEscaped(i) && !IsInsideOfPair('\''))
return false;
// escape all characters inside of single quotes
if (c != '\'' && IsInsideOfPair('\''))
return false;
if (c == '-' && Buffer.length() < 1)
{
CurrentType = TokenType::Option;
return false;
}
if (c == '\'')
{
if (IsInsideOfPair('\''))
CharStack.pop();
else
CharStack.push('\'');
AddSpecialToken("\'");
}
else if (c == '\"')
{
if (IsInsideOfPair('\"'))
CharStack.pop();
else
CharStack.push('\"');
AddSpecialToken("\"");
}
else if (c == '|')
{
AddSpecialToken("|");
CurrentType = TokenType::Command;
}
else if (c == ';')
{
AddSpecialToken(";");
CurrentType = TokenType::Command;
}
else if (c == '\\')
AddSpecialToken("\\");
else if (c == '$')
AddSpecialToken("$");
else if (c == '!')
AddSpecialToken("!");
else if (c == '>')
AddSpecialToken(">");
else if (c == '<')
AddSpecialToken("<");
else if (c == '~')
AddSpecialToken("~");
else if (c == '(')
AddSpecialToken("(");
else if (c == ')')
AddSpecialToken(")");
else if (c == '{')
AddSpecialToken("{");
else if (c == '}')
AddSpecialToken("}");
else if (c == '[')
AddSpecialToken("[");
else if (c == ']')
AddSpecialToken("]");
else if (c == ' ' && IsInsideOfQuotes())
return false;
else if (c != ' ')
return false;
return true;
};
void AddCurrentToken()
{
Tokens.push_back(Token(Buffer, CurrentType));
CurrentType = TokenType::Argument;
Buffer = "";
};
void AddSpecialToken(string v)
{
if (Buffer.length() > 0)
AddCurrentToken();
Tokens.push_back(Token(v, TokenType::SpecialCharacter));
};
void ParseInput()
{
// loop through each character of the input
for (int i = 0; i < Input.length(); i++)
{
char c = Input[i];
// handle comments
if (c == '#')
{
// add the current token if it exists
if (Buffer.length() > 0)
AddCurrentToken();
// add the comment token that contains the rest of the input string
Tokens.push_back(Token(Input.substr(i), TokenType::Comment));
// break out of the loop because we're done here
break;
}
// a space means the token has ended, unless we're inside of quotes
if (c == ' ' && Buffer.length() > 0 && !IsInsideOfQuotes())
AddCurrentToken();
// if the character wasnt handled, add it to the buffer
if (!HandleSpecialCharacter(i))
Buffer += c;
}
// make sure we add the token that ended with the loop
if (Buffer.length() > 0)
AddCurrentToken();
};
public:
list<Token> Tokens;
Parser(string input)
{
Tokens = list<Token>();
CharStack = stack<char>();
Input = input;
Buffer = "";
CurrentType = TokenType::Command;
ParseInput();
};
};
int main()
{
cout << endl;
while (true)
{
string input;
getline(cin, input);
if (input == "QUIT")
break;
Parser p(input);
cout << string(80, '-') << endl;
cout << "Number of tokens: " << p.Tokens.size() << endl;
cout << "Command name: " << p.Tokens.front().Value << endl;
cout << string(30, '-') << endl;
for (Token t : p.Tokens)
{
t.Print();
}
cout << string(80, '-') << endl << endl;
}
}
using System.Collections.Generic;
namespace CLITokenizer
{
public class Parser
{
public List<Token> Tokens { get; }
private Stack<char> CharStack { get; }
private string Input { get; }
private string Buffer { get; set; }
private TokenType CurrentType { get; set; }
public Parser(string input)
{
Tokens = new List<Token>();
CharStack = new Stack<char>();
Buffer = string.Empty;
CurrentType = TokenType.Command;
Input = input;
Parse();
}
private void Parse()
{
for (var i = 0; i < Input.Length; i++)
{
var c = Input[i];
if (c == '#')
{
if (Buffer.Length > 0)
AddCurrentToken();
Tokens.Add(new Token { Value = Input.Substring(i), Type = TokenType.Comment });
break;
}
if (c == ' ' && Buffer.Length > 0 && !IsInsideOfQuotes())
AddCurrentToken();
if (!HandleSpecialCharacter(i))
Buffer += c;
}
if (Buffer.Length > 0)
AddCurrentToken();
}
private bool HandleSpecialCharacter(int i)
{
var c = Input[i];
// escape all characters with a backslash behind them, outside of single quotes
if (IsEscaped(i) && !IsInsideOfPair('\''))
return false;
// escape all characters inside of single quotes
if (c != '\'' && IsInsideOfPair('\''))
return false;
if (c == '-' && Buffer.Length < 1)
{
CurrentType = TokenType.Option;
return false;
}
if (c == '\'')
{
if (IsInsideOfPair('\''))
CharStack.Pop();
else
CharStack.Push('\'');
AddSpecialToken("\'");
}
else if (c == '\"')
{
if (IsInsideOfPair('\"'))
CharStack.Pop();
else
CharStack.Push('\"');
AddSpecialToken("\"");
}
else if (c == '|')
{
AddSpecialToken("|");
CurrentType = TokenType.Command;
}
else if (c == ';')
{
AddSpecialToken(";");
CurrentType = TokenType.Command;
}
else if (c == '\\')
AddSpecialToken("\\");
else if (c == '$')
AddSpecialToken("$");
else if (c == '!')
AddSpecialToken("!");
else if (c == '>')
AddSpecialToken(">");
else if (c == '<')
AddSpecialToken("<");
else if (c == '~')
AddSpecialToken("~");
else if (c == '(')
AddSpecialToken("(");
else if (c == ')')
AddSpecialToken(")");
else if (c == '{')
AddSpecialToken("{");
else if (c == '}')
AddSpecialToken("}");
else if (c == '[')
AddSpecialToken("[");
else if (c == ']')
AddSpecialToken("]");
else if (c == ' ' && IsInsideOfQuotes())
return false;
else if (c != ' ')
return false;
return true;
}
private bool IsInsideOfPair(char c)
{
return CharStack.Count > 0 && CharStack.Peek() == c;
}
private bool IsInsideOfQuotes()
{
return IsInsideOfPair('\'') || IsInsideOfPair('\"');
}
private void AddSpecialToken(string t)
{
if (Buffer.Length > 0)
AddCurrentToken();
Tokens.Add(new Token { Value = t, Type = TokenType.SpecialCharacter });
}
private bool IsEscaped(int i)
{
// escaped if there are an odd # of \'s behind current index, no space
var count = 0;
for (var j = i - 1; j >= 0; j--)
{
if (Input[j] == '\\')
count++;
else
break;
}
return count % 2 > 0;
}
private void AddCurrentToken()
{
Tokens.Add(new Token {Value = Buffer, Type = CurrentType});
CurrentType = TokenType.Argument;
Buffer = string.Empty;
}
public class Token
{
public string Value { get; set; }
public TokenType Type { get; set; }
}
public enum TokenType
{
Command,
Option,
Argument,
SpecialCharacter,
Comment
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment