Created
September 10, 2017 20:58
-
-
Save tngo0508/44d56799408a51fcb92c57cb8242665b to your computer and use it in GitHub Desktop.
vijay's lexer code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<iostream> | |
#include<iomanip> | |
#include<fstream> | |
#include<string> | |
using namespace std; | |
#pragma once | |
struct LexTok | |
{ | |
string lexeme; | |
string token; | |
}; | |
/* | |
This function checks if given string is keyword. | |
*/ | |
bool CheckKeywordList(string identifier) | |
{ | |
string keyWords[] = { "program","begin","end","if","else","elseif","print","get","function","read","write","while","elsif","do","until","return" }; | |
for each(string keyword in keyWords) | |
{ | |
if (keyword == identifier) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
/* | |
This function checks if given string is type. | |
*/ | |
bool CheckTypeList(string identifier) | |
{ | |
string types[] = { "int","string","real" }; | |
for each(string type in types) | |
{ | |
if (type == identifier) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
/* | |
This function checks if given string is an operator. | |
*/ | |
bool CheckOperatorList(char opercharacter) | |
{ | |
char operators[] = { '+' , '-' ,'*' ,'/' ,'%' ,'(' ,')' ,';' ,'.' ,',' , ':' }; | |
for each (char ch in operators) | |
{ | |
if (ch == opercharacter) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
LexTok lexer(ifstream& file) | |
{ | |
char ch; | |
LexTok returnItem = { "","" }; | |
// Get the next valid character. Ignore all white space and new line. | |
file.get(ch); | |
while ((ch == '\n' || ch == '\t' || ch == 32 || ch == -52) && file.eof() == false) | |
{ | |
file.get(ch); | |
} | |
if (file.eof() == false) | |
{ | |
if (isalpha(ch)) // If ch turned out to be a character. | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
while (isalpha(ch) || isdigit(ch) || ch == '_') | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
} | |
file.putback(ch); | |
if (CheckKeywordList(returnItem.lexeme)) | |
{ | |
returnItem.token = "Keyword"; | |
} | |
else if (CheckTypeList(returnItem.lexeme)) | |
{ | |
returnItem.token = "Type"; | |
} | |
else | |
{ | |
returnItem.token = "Identifier"; | |
} | |
} | |
else if (isdigit(ch)) // If ch turned out to be a number. | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
while (isdigit(ch)) | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
} | |
returnItem.token = "IntConst"; | |
if (ch == '.') | |
{ | |
string floatingpoints = ""; | |
file.get(ch); | |
while (isdigit(ch)) | |
{ | |
floatingpoints += ch; | |
file.get(ch); | |
} | |
if (floatingpoints != "") | |
{ | |
returnItem.lexeme += "." + floatingpoints; | |
returnItem.token = "RealConst"; | |
} | |
} | |
file.putback(ch); | |
} | |
else if (ch == '"') // If ch turned out to be start of string. | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
while (ch != '"' && file.eof() == false) | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
} | |
if (file.eof() == false) | |
{ | |
returnItem.lexeme += ch; | |
returnItem.token = "StrConst"; | |
} | |
else | |
{ | |
returnItem.lexeme = ""; | |
returnItem.token = "eof"; | |
} | |
} | |
else if (ch == '<' || ch == '>' || ch == '!' || ch == '=') | |
{ | |
returnItem.lexeme += ch; | |
file.get(ch); | |
if (ch == '=') | |
{ | |
returnItem.lexeme += ch; | |
} | |
else | |
{ | |
file.putback(ch); | |
} | |
if (returnItem.lexeme == "!") | |
{ | |
returnItem.token = "error"; | |
} | |
else | |
{ | |
returnItem.token = "RelOp"; | |
} | |
} | |
else if (CheckOperatorList(ch)) | |
{ | |
returnItem.lexeme += ch; | |
returnItem.token = "Operator"; | |
if (ch == ':') | |
{ | |
file.get(ch); | |
if (ch == '=') | |
{ | |
returnItem.lexeme += ch; | |
} | |
else | |
{ | |
file.putback(ch); | |
} | |
} | |
} | |
else | |
{ | |
returnItem.lexeme += ch; | |
returnItem.token = "error"; | |
} | |
} | |
else | |
{ | |
returnItem.lexeme = ""; | |
returnItem.token = "eof"; | |
} | |
return returnItem; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment