Created
October 3, 2021 16:34
-
-
Save nsssayom/bd5babd6de81c157570111e54bdb9a1f to your computer and use it in GitHub Desktop.
Simple lexical analyzer for arithmetic expression using C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <sstream> | |
#include <cstring> | |
#include <boost/algorithm/string.hpp> // for detail: https://www.boost.org/ | |
using namespace std; | |
// Structure for tokens | |
struct token | |
{ | |
string token_type; | |
int position; | |
string value; | |
}; | |
// Symbols | |
const string symbols = "=+-*/"; | |
const char *symbols_arr = symbols.c_str(); | |
// Output structure array | |
struct token out_tokens[100]; | |
int out_tokens_count = 0; | |
// Function to check if a character is a number | |
bool is_constant(string s) | |
{ | |
return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); | |
} | |
// Function to check if a character is a symbol | |
void parse_symbol(string input) | |
{ | |
// Parse for symbols | |
int symbol_pos = 1; | |
for (int i = 0; i < input.length(); i++) | |
{ | |
for (int j = 0; j < 5; j++) | |
{ | |
if (input[i] == symbols_arr[j]) | |
{ | |
out_tokens[symbol_pos].token_type = "symbol"; | |
out_tokens[symbol_pos].position = i; | |
out_tokens[symbol_pos].value = symbols_arr[j]; | |
symbol_pos = symbol_pos + 2; | |
out_tokens_count++; | |
} | |
} | |
} | |
} | |
// Function to print output | |
void print_result(){ | |
// Print out_tokens | |
int id_counter = 0; | |
for (int i = 0; i < out_tokens_count; i++) | |
{ | |
if (out_tokens[i].token_type == "identifier") | |
{ | |
cout << "(id" << id_counter++ << ")"; | |
} | |
else if (out_tokens[i].token_type == "constant") | |
{ | |
cout << out_tokens[i].value; | |
} | |
else if (out_tokens[i].token_type == "symbol") | |
{ | |
cout << "(" << out_tokens[i].value << ")"; | |
} | |
} | |
cout << endl; | |
} | |
int main() | |
{ | |
// Input | |
string input; | |
// Ask for input | |
cout << "Enter a string: "; | |
// Get input | |
getline(cin, input); | |
// Parse for symbols | |
parse_symbol(input); | |
// Tokenize input | |
vector<string> tokens; | |
boost::split(tokens, input, boost::is_any_of(symbols)); | |
// print tokens | |
int non_symbol_pos = 0; | |
// cout << "Tokens: "; | |
for (int i = 0; i < tokens.size(); i++) | |
{ | |
//cout << tokens[i] << " "; | |
if (is_constant(tokens[i])) | |
{ | |
// cout << "constant "; | |
out_tokens[non_symbol_pos].token_type = "constant"; | |
out_tokens[non_symbol_pos].position = non_symbol_pos; | |
out_tokens[non_symbol_pos].value = tokens[i]; | |
non_symbol_pos = non_symbol_pos + 2; | |
out_tokens_count++; | |
} | |
else | |
{ | |
// cout << "identifier"; | |
out_tokens[non_symbol_pos].token_type = "identifier"; | |
out_tokens[non_symbol_pos].position = non_symbol_pos; | |
out_tokens[non_symbol_pos].value = tokens[i]; | |
non_symbol_pos = non_symbol_pos + 2; | |
out_tokens_count++; | |
} | |
} | |
// Print out_tokens | |
print_result(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment