Skip to content

Instantly share code, notes, and snippets.

@tngo0508
Created September 10, 2017 20:58
Show Gist options
  • Save tngo0508/44d56799408a51fcb92c57cb8242665b to your computer and use it in GitHub Desktop.
Save tngo0508/44d56799408a51fcb92c57cb8242665b to your computer and use it in GitHub Desktop.
vijay's lexer code
#include<iostream>
#include<iomanip>
#include<fstream>
#include<string>
using namespace std;
#pragma once
struct LexTok
{
string lexeme;
string token;
};
/*
This function checks if given string is keyword.
*/
bool CheckKeywordList(string identifier)
{
string keyWords[] = { "program","begin","end","if","else","elseif","print","get","function","read","write","while","elsif","do","until","return" };
for each(string keyword in keyWords)
{
if (keyword == identifier)
{
return true;
}
}
return false;
}
/*
This function checks if given string is type.
*/
bool CheckTypeList(string identifier)
{
string types[] = { "int","string","real" };
for each(string type in types)
{
if (type == identifier)
{
return true;
}
}
return false;
}
/*
This function checks if given string is an operator.
*/
bool CheckOperatorList(char opercharacter)
{
char operators[] = { '+' , '-' ,'*' ,'/' ,'%' ,'(' ,')' ,';' ,'.' ,',' , ':' };
for each (char ch in operators)
{
if (ch == opercharacter)
{
return true;
}
}
return false;
}
LexTok lexer(ifstream& file)
{
char ch;
LexTok returnItem = { "","" };
// Get the next valid character. Ignore all white space and new line.
file.get(ch);
while ((ch == '\n' || ch == '\t' || ch == 32 || ch == -52) && file.eof() == false)
{
file.get(ch);
}
if (file.eof() == false)
{
if (isalpha(ch)) // If ch turned out to be a character.
{
returnItem.lexeme += ch;
file.get(ch);
while (isalpha(ch) || isdigit(ch) || ch == '_')
{
returnItem.lexeme += ch;
file.get(ch);
}
file.putback(ch);
if (CheckKeywordList(returnItem.lexeme))
{
returnItem.token = "Keyword";
}
else if (CheckTypeList(returnItem.lexeme))
{
returnItem.token = "Type";
}
else
{
returnItem.token = "Identifier";
}
}
else if (isdigit(ch)) // If ch turned out to be a number.
{
returnItem.lexeme += ch;
file.get(ch);
while (isdigit(ch))
{
returnItem.lexeme += ch;
file.get(ch);
}
returnItem.token = "IntConst";
if (ch == '.')
{
string floatingpoints = "";
file.get(ch);
while (isdigit(ch))
{
floatingpoints += ch;
file.get(ch);
}
if (floatingpoints != "")
{
returnItem.lexeme += "." + floatingpoints;
returnItem.token = "RealConst";
}
}
file.putback(ch);
}
else if (ch == '"') // If ch turned out to be start of string.
{
returnItem.lexeme += ch;
file.get(ch);
while (ch != '"' && file.eof() == false)
{
returnItem.lexeme += ch;
file.get(ch);
}
if (file.eof() == false)
{
returnItem.lexeme += ch;
returnItem.token = "StrConst";
}
else
{
returnItem.lexeme = "";
returnItem.token = "eof";
}
}
else if (ch == '<' || ch == '>' || ch == '!' || ch == '=')
{
returnItem.lexeme += ch;
file.get(ch);
if (ch == '=')
{
returnItem.lexeme += ch;
}
else
{
file.putback(ch);
}
if (returnItem.lexeme == "!")
{
returnItem.token = "error";
}
else
{
returnItem.token = "RelOp";
}
}
else if (CheckOperatorList(ch))
{
returnItem.lexeme += ch;
returnItem.token = "Operator";
if (ch == ':')
{
file.get(ch);
if (ch == '=')
{
returnItem.lexeme += ch;
}
else
{
file.putback(ch);
}
}
}
else
{
returnItem.lexeme += ch;
returnItem.token = "error";
}
}
else
{
returnItem.lexeme = "";
returnItem.token = "eof";
}
return returnItem;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment