tngo0508/lexer.h

## lexer.h
#include<iostream>
#include<iomanip>
#include<fstream>
#include<string>

using namespace std;

#pragma once

struct LexTok
{
	string lexeme;
	string token;
};

/*
This function checks if given string is keyword.
*/
bool CheckKeywordList(string identifier)
{
	string keyWords[] = { "program","begin","end","if","else","elseif","print","get","function","read","write","while","elsif","do","until","return" };

	for each(string keyword in keyWords)
	{
		if (keyword == identifier)
		{
			return true;
		}
	}

	return false;
}

/*
This function checks if given string is type.
*/
bool CheckTypeList(string identifier)
{
	string types[] = { "int","string","real" };

	for each(string type in types)
	{
		if (type == identifier)
		{
			return true;
		}
	}

	return false;
}

/*
This function checks if given string is an operator.
*/
bool CheckOperatorList(char opercharacter)
{
	char operators[] = { '+' , '-' ,'*' ,'/' ,'%' ,'(' ,')' ,';' ,'.' ,',' , ':' };

	for each (char ch in operators)
	{
		if (ch == opercharacter)
		{
			return true;
		}
	}

	return false;
}

LexTok lexer(ifstream& file)
{
	char ch;
	LexTok returnItem = { "","" };

	// Get the next valid character. Ignore all white space and new line.
	file.get(ch);
	while ((ch == '\n' || ch == '\t' || ch == 32 || ch == -52) && file.eof() == false)
	{
		file.get(ch);
	}

	if (file.eof() == false)
	{
		if (isalpha(ch))		//	If ch turned out to be a character.
		{
			returnItem.lexeme += ch;
			file.get(ch);
			while (isalpha(ch) || isdigit(ch) || ch == '_')
			{
				returnItem.lexeme += ch;
				file.get(ch);
			}
			file.putback(ch);

			if (CheckKeywordList(returnItem.lexeme))
			{
				returnItem.token = "Keyword";
			}
			else if (CheckTypeList(returnItem.lexeme))
			{
				returnItem.token = "Type";
			}
			else
			{
				returnItem.token = "Identifier";
			}
		}
		else if (isdigit(ch))	//	If ch turned out to be a number.
		{
			returnItem.lexeme += ch;
			file.get(ch);
			while (isdigit(ch))
			{
				returnItem.lexeme += ch;
				file.get(ch);
			}
			returnItem.token = "IntConst";
			if (ch == '.')
			{
				string floatingpoints = "";
				file.get(ch);
				while (isdigit(ch))
				{
					floatingpoints += ch;
					file.get(ch);
				}

				if (floatingpoints != "")
				{
					returnItem.lexeme += "." + floatingpoints;
					returnItem.token = "RealConst";
				}
			}

			file.putback(ch);
		}
		else if (ch == '"')		//	If ch turned out to be start of string.
		{
			returnItem.lexeme += ch;
			file.get(ch);
			while (ch != '"' && file.eof() == false)
			{
				returnItem.lexeme += ch;
				file.get(ch);
			}

			if (file.eof() == false)
			{
				returnItem.lexeme += ch;
				returnItem.token = "StrConst";
			}
			else
			{
				returnItem.lexeme = "";
				returnItem.token = "eof";
			}

		}
		else if (ch == '<' || ch == '>' || ch == '!' || ch == '=')
		{
			returnItem.lexeme += ch;
			file.get(ch);
			if (ch == '=')
			{
				returnItem.lexeme += ch;
			}
			else
			{
				file.putback(ch);
			}

			if (returnItem.lexeme == "!")
			{
				returnItem.token = "error";
			}
			else
			{
				returnItem.token = "RelOp";
			}
		}
		else if (CheckOperatorList(ch))
		{
			returnItem.lexeme += ch;
			returnItem.token = "Operator";
			if (ch == ':')
			{
				file.get(ch);
				if (ch == '=')
				{
					returnItem.lexeme += ch;
				}
				else
				{
					file.putback(ch);
				}
			}
		}
		else
		{
			returnItem.lexeme += ch;
			returnItem.token = "error";
		}
	}
	else
	{
		returnItem.lexeme = "";
		returnItem.token = "eof";
	}

	return returnItem;
}
	#include<iostream>
	#include<iomanip>
	#include<fstream>
	#include<string>

	using namespace std;

	#pragma once

	struct LexTok
	{
	string lexeme;
	string token;
	};

	/*
	This function checks if given string is keyword.
	*/
	bool CheckKeywordList(string identifier)
	{
	string keyWords[] = { "program","begin","end","if","else","elseif","print","get","function","read","write","while","elsif","do","until","return" };

	for each(string keyword in keyWords)
	{
	if (keyword == identifier)
	{
	return true;
	}
	}

	return false;
	}

	/*
	This function checks if given string is type.
	*/
	bool CheckTypeList(string identifier)
	{
	string types[] = { "int","string","real" };

	for each(string type in types)
	{
	if (type == identifier)
	{
	return true;
	}
	}

	return false;
	}

	/*
	This function checks if given string is an operator.
	*/
	bool CheckOperatorList(char opercharacter)
	{
	char operators[] = { '+' , '-' ,'*' ,'/' ,'%' ,'(' ,')' ,';' ,'.' ,',' , ':' };

	for each (char ch in operators)
	{
	if (ch == opercharacter)
	{
	return true;
	}
	}

	return false;
	}

	LexTok lexer(ifstream& file)
	{
	char ch;
	LexTok returnItem = { "","" };

	// Get the next valid character. Ignore all white space and new line.
	file.get(ch);
	while ((ch == '\n' \|\| ch == '\t' \|\| ch == 32 \|\| ch == -52) && file.eof() == false)
	{
	file.get(ch);
	}

	if (file.eof() == false)
	{
	if (isalpha(ch)) // If ch turned out to be a character.
	{
	returnItem.lexeme += ch;
	file.get(ch);
	while (isalpha(ch) \|\| isdigit(ch) \|\| ch == '_')
	{
	returnItem.lexeme += ch;
	file.get(ch);
	}
	file.putback(ch);

	if (CheckKeywordList(returnItem.lexeme))
	{
	returnItem.token = "Keyword";
	}
	else if (CheckTypeList(returnItem.lexeme))
	{
	returnItem.token = "Type";
	}
	else
	{
	returnItem.token = "Identifier";
	}
	}
	else if (isdigit(ch)) // If ch turned out to be a number.
	{
	returnItem.lexeme += ch;
	file.get(ch);
	while (isdigit(ch))
	{
	returnItem.lexeme += ch;
	file.get(ch);
	}
	returnItem.token = "IntConst";
	if (ch == '.')
	{
	string floatingpoints = "";
	file.get(ch);
	while (isdigit(ch))
	{
	floatingpoints += ch;
	file.get(ch);
	}

	if (floatingpoints != "")
	{
	returnItem.lexeme += "." + floatingpoints;
	returnItem.token = "RealConst";
	}
	}

	file.putback(ch);
	}
	else if (ch == '"') // If ch turned out to be start of string.
	{
	returnItem.lexeme += ch;
	file.get(ch);
	while (ch != '"' && file.eof() == false)
	{
	returnItem.lexeme += ch;
	file.get(ch);
	}

	if (file.eof() == false)
	{
	returnItem.lexeme += ch;
	returnItem.token = "StrConst";
	}
	else
	{
	returnItem.lexeme = "";
	returnItem.token = "eof";
	}

	}
	else if (ch == '<' \|\| ch == '>' \|\| ch == '!' \|\| ch == '=')
	{
	returnItem.lexeme += ch;
	file.get(ch);
	if (ch == '=')
	{
	returnItem.lexeme += ch;
	}
	else
	{
	file.putback(ch);
	}

	if (returnItem.lexeme == "!")
	{
	returnItem.token = "error";
	}
	else
	{
	returnItem.token = "RelOp";
	}
	}
	else if (CheckOperatorList(ch))
	{
	returnItem.lexeme += ch;
	returnItem.token = "Operator";
	if (ch == ':')
	{
	file.get(ch);
	if (ch == '=')
	{
	returnItem.lexeme += ch;
	}
	else
	{
	file.putback(ch);
	}
	}
	}
	else
	{
	returnItem.lexeme += ch;
	returnItem.token = "error";
	}
	}
	else
	{
	returnItem.lexeme = "";
	returnItem.token = "eof";
	}

	return returnItem;
	}