ngynkvn/parser.cpp

## parser.cpp

enum TOKEN_TYPE
{
  TOK_OPAREN,
  TOK_CPAREN,
  TOK_OPERATOR,
  TOK_FOR,
  TOK_BEGIN,
  TOK_END,
  TOK_DELIMITER,
  TOK_CONSTANT,
  TOK_IDENTIFIER,
  TOK_NULL,
  TOK_BADNAME
};

typedef std::pair<TOKEN_TYPE, std::string> token;
typedef std::vector<token> token_v;

class Parser
{
public:
  Parser(token_v);
  void parseTokens();
  void printOutput();
  void checkDepth();
  void checkBalancedParenthesis();

private:
  int depth = 0;
  std::set<token> keywords;
  std::set<token> identifiers;
  std::set<token> constants;
  std::set<token> operators;
  std::set<token> delimiters;
  std::set<token> syntax_errors;
  token_v tokens;
  Stack<token> depthCheck;
  Stack<token> balancedParenthesis;
  std::string outStr(std::set<token>);
};


/*
 * Parse tokens goes through the vector of tokens and determines what actions need to be taken for further processing.
 * For simple tokens such as identifiers and constants, this is simply just inserting it into a set to be displayed later
 * For keywords or parenthesis, we push onto a stack to later determine the maximum depth of the program and whether
 * parenthesis are well formed.
 */
void Parser::parseTokens()
{
    auto iter = tokens.begin();
    while (iter != tokens.end())
    {
        token t = *iter;
        switch (t.first)
        {
        case TOK_FOR:
        case TOK_BEGIN:
        case TOK_END:
            depthCheck.push(t);
            keywords.insert(t);
            break;
        case TOK_OPAREN:
            balancedParenthesis.push(t);
            break;
        case TOK_CPAREN:
            balancedParenthesis.push(t);
            break;
        case TOK_CONSTANT:
            constants.insert(t);
            break;
        case TOK_DELIMITER:
            delimiters.insert(t);
            break;
        case TOK_IDENTIFIER:
            identifiers.insert(t);
            break;
        case TOK_OPERATOR:
            operators.insert(t);
            break;
        case TOK_BADNAME:
            syntax_errors.insert(t);
            break;
        default:
            throw runtime_error("Not able to parse tokens.");
        }
        ++iter;
    }

    printOutput();
}

/**
 * Extracts the string values from the set of tokens.
 */
string Parser::outStr(set<token> s)
{
    string out;
    if (s.empty())
    {
        return "N/A";
    }
    for (auto iter = s.begin(); iter != s.end(); ++iter)
    {
        out.append((*iter).second + " ");
    }
    return out;
}

void Parser::printOutput()
{
    checkDepth();
    checkBalancedParenthesis();
    cout << "Keywords: " << outStr(keywords) << '\n'
         << "Identifiers: " << outStr(identifiers) << '\n'
         << "Constants: " << outStr(constants) << '\n'
         << "Operators: " << outStr(operators) << '\n'
         << "Delimiters: " << outStr(delimiters) << '\n'
         << "Syntax Error(s): " << outStr(syntax_errors) << '\n';
}

/**
 * We check depth by evaluating the stack and ensuring that it is well formed.
 * We know a for-loop is valid when it contains FOR, BEGIN, and END.
 * Therefore we count the number of necessary fors and begins by incrementing everytime we encounter an end,
 * and decrementing the count when we reach BEGIN.
 * When the FOR token is reached, we check that begins is equal to fors - 1 (since the stack will have already evaluated one begin)
 * if it is, we know that the current depth that for loop is in is equal to begins + 1.
 */
void Parser::checkDepth()
{
    int begins = 0;
    int fors = 0;
    int countFors = 0;
    int countEnds = 0;
    int countBegins = 0;
    while (!depthCheck.empty())
    {
        token t = depthCheck.pop();
        switch (t.first)
        {
        case TOK_FOR:
            countFors++;
            if (begins == fors - 1 && begins >= 0 && fors - 1 >= 0)
            {
                depth = max(depth, begins + 1);
                fors--;
            }
            break;
        case TOK_BEGIN:
            countBegins++;
            begins--;
            break;
        case TOK_END:
            countEnds++;
            begins++;
            fors++;
            break;
        default:
            break;
        }
    }

    cout << "The depth of nested loop(s) is " << depth << endl;
    if (begins < 0 || countFors > countEnds)
    {
        syntax_errors.insert(token(TOK_END, "END"));
    }
    if (begins > 0 || countFors > countBegins)
    {
        syntax_errors.insert(token(TOK_BEGIN, "BEGIN"));
    }
}

void Parser::checkBalancedParenthesis()
{
    int pairs = 0;
    while (!balancedParenthesis.empty())
    {
        token t = balancedParenthesis.pop();
        switch (t.first)
        {
        case TOK_CPAREN:
            pairs--;
            break;
        case TOK_OPAREN:
            pairs++;
            break;
        default:
            break;
        }
    }
    if (pairs > 0)
    {
        syntax_errors.insert(token(TOK_OPAREN, "("));
    }
    else if (pairs < 0)
    {
        syntax_errors.insert(token(TOK_CPAREN, ")"));
    }
}

	enum TOKEN_TYPE
	{
	TOK_OPAREN,
	TOK_CPAREN,
	TOK_OPERATOR,
	TOK_FOR,
	TOK_BEGIN,
	TOK_END,
	TOK_DELIMITER,
	TOK_CONSTANT,
	TOK_IDENTIFIER,
	TOK_NULL,
	TOK_BADNAME
	};

	typedef std::pair<TOKEN_TYPE, std::string> token;
	typedef std::vector<token> token_v;

	class Parser
	{
	public:
	Parser(token_v);
	void parseTokens();
	void printOutput();
	void checkDepth();
	void checkBalancedParenthesis();

	private:
	int depth = 0;
	std::set<token> keywords;
	std::set<token> identifiers;
	std::set<token> constants;
	std::set<token> operators;
	std::set<token> delimiters;
	std::set<token> syntax_errors;
	token_v tokens;
	Stack<token> depthCheck;
	Stack<token> balancedParenthesis;
	std::string outStr(std::set<token>);
	};


	/*
	* Parse tokens goes through the vector of tokens and determines what actions need to be taken for further processing.
	* For simple tokens such as identifiers and constants, this is simply just inserting it into a set to be displayed later
	* For keywords or parenthesis, we push onto a stack to later determine the maximum depth of the program and whether
	* parenthesis are well formed.
	*/
	void Parser::parseTokens()
	{
	auto iter = tokens.begin();
	while (iter != tokens.end())
	{
	token t = *iter;
	switch (t.first)
	{
	case TOK_FOR:
	case TOK_BEGIN:
	case TOK_END:
	depthCheck.push(t);
	keywords.insert(t);
	break;
	case TOK_OPAREN:
	balancedParenthesis.push(t);
	break;
	case TOK_CPAREN:
	balancedParenthesis.push(t);
	break;
	case TOK_CONSTANT:
	constants.insert(t);
	break;
	case TOK_DELIMITER:
	delimiters.insert(t);
	break;
	case TOK_IDENTIFIER:
	identifiers.insert(t);
	break;
	case TOK_OPERATOR:
	operators.insert(t);
	break;
	case TOK_BADNAME:
	syntax_errors.insert(t);
	break;
	default:
	throw runtime_error("Not able to parse tokens.");
	}
	++iter;
	}

	printOutput();
	}

	/**
	* Extracts the string values from the set of tokens.
	*/
	string Parser::outStr(set<token> s)
	{
	string out;
	if (s.empty())
	{
	return "N/A";
	}
	for (auto iter = s.begin(); iter != s.end(); ++iter)
	{
	out.append((*iter).second + " ");
	}
	return out;
	}

	void Parser::printOutput()
	{
	checkDepth();
	checkBalancedParenthesis();
	cout << "Keywords: " << outStr(keywords) << '\n'
	<< "Identifiers: " << outStr(identifiers) << '\n'
	<< "Constants: " << outStr(constants) << '\n'
	<< "Operators: " << outStr(operators) << '\n'
	<< "Delimiters: " << outStr(delimiters) << '\n'
	<< "Syntax Error(s): " << outStr(syntax_errors) << '\n';
	}

	/**
	* We check depth by evaluating the stack and ensuring that it is well formed.
	* We know a for-loop is valid when it contains FOR, BEGIN, and END.
	* Therefore we count the number of necessary fors and begins by incrementing everytime we encounter an end,
	* and decrementing the count when we reach BEGIN.
	* When the FOR token is reached, we check that begins is equal to fors - 1 (since the stack will have already evaluated one begin)
	* if it is, we know that the current depth that for loop is in is equal to begins + 1.
	*/
	void Parser::checkDepth()
	{
	int begins = 0;
	int fors = 0;
	int countFors = 0;
	int countEnds = 0;
	int countBegins = 0;
	while (!depthCheck.empty())
	{
	token t = depthCheck.pop();
	switch (t.first)
	{
	case TOK_FOR:
	countFors++;
	if (begins == fors - 1 && begins >= 0 && fors - 1 >= 0)
	{
	depth = max(depth, begins + 1);
	fors--;
	}
	break;
	case TOK_BEGIN:
	countBegins++;
	begins--;
	break;
	case TOK_END:
	countEnds++;
	begins++;
	fors++;
	break;
	default:
	break;
	}
	}

	cout << "The depth of nested loop(s) is " << depth << endl;
	if (begins < 0 \|\| countFors > countEnds)
	{
	syntax_errors.insert(token(TOK_END, "END"));
	}
	if (begins > 0 \|\| countFors > countBegins)
	{
	syntax_errors.insert(token(TOK_BEGIN, "BEGIN"));
	}
	}

	void Parser::checkBalancedParenthesis()
	{
	int pairs = 0;
	while (!balancedParenthesis.empty())
	{
	token t = balancedParenthesis.pop();
	switch (t.first)
	{
	case TOK_CPAREN:
	pairs--;
	break;
	case TOK_OPAREN:
	pairs++;
	break;
	default:
	break;
	}
	}
	if (pairs > 0)
	{
	syntax_errors.insert(token(TOK_OPAREN, "("));
	}
	else if (pairs < 0)
	{
	syntax_errors.insert(token(TOK_CPAREN, ")"));
	}
	}