Created
October 31, 2018 00:10
-
-
Save mikeymop/d1e5de6f29e9f41fb74246ce3958ec88 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <map> | |
#include <string.h> | |
#include <cctype> | |
#include "tokens.h" | |
//track all tokentypes from tokens.h | |
static std::map<TokenType, std::string> TokenMap { | |
{PRINT, "PRINT"}, | |
{IF, "IF"}, | |
{THEN, "THEN"}, | |
{TRUE, "TRUE"}, | |
{FALSE, "FALSE"}, | |
{IDENT, "IDENT"}, | |
{ICONST, "ICONST"}, | |
{SCONST, "SCONST"}, | |
{PLUS, "PLUS"}, | |
{MINUS, "MINUS"}, | |
{STAR, "STAR"}, | |
{SLASH, "SLASH"}, | |
{ASSIGN, "ASSIGN"}, | |
{EQ, "EQ"}, | |
{NEQ, "NEQ"}, | |
{LT, "LT"}, | |
{LEQ, "LEQ"}, | |
{GT, "GT"}, | |
{GEQ, "GEQ"}, | |
{LOGICAND, "LOGICAND"}, | |
{LOGICOR, "LOGICOR"}, | |
{LPAREN, "LPAREN"}, | |
{RPAREN, "RPAREN"}, | |
{SC, "SC"}, | |
{ERR, "ERR"}, | |
{DONE, "DONE"}, | |
}; | |
/*Token::TokenType detect_id(std::string const& iStr) { | |
if (iStr == "var") return VAR; | |
if (iStr == "set") return SET; | |
if (iStr == "print") return PRINT; | |
if (iStr == "repeat") return REPEAT; | |
}*/ | |
/*static std::map<std::string, TokenType> identures { | |
//I very inconveniently cannot use the tokenmap | |
{"var", VAR}, | |
{"set", SET}, | |
{"print", PRINT}, | |
{"repeat", REPEAT} | |
};*/ | |
//I have to treat identifiers differently, since I cant fit the ident tokens in the TokenMap | |
/*Token identificator(const string& lexeme, int lineNum) { | |
TokenType tt = IDENT; | |
switch(detect_id(lexeme)) { | |
case VAR: | |
tt = VAR; | |
return VAR; | |
case SET: | |
tt = SET; | |
return SET; | |
case PRINT: | |
tt = PRINT; | |
return PRINT; | |
case REPEAT: | |
tt = REPEAT; | |
return REPEAT; | |
} | |
return Token(tt, lexeme, lineNum); | |
}*/ | |
//redir istream to cout | |
ostream& operator<<(ostream& out, const Token &tok) { | |
TokenType tt = tok.GetTokenType(); | |
//output whatever token you match from the map | |
out << TokenMap[tt]; | |
//check a few tokentypes including string and int | |
if(tt == IDENT || tt == SCONST || tt == ICONST || tt == ERR) { | |
out << "(" << tok.GetLexeme() << ")"; | |
} | |
return out; | |
} | |
Token getNextToken(istream *in, int *lineNum) { | |
//TokenType tt = tok.GetTokenType(); //test tok | |
//from slide | |
enum LexState {BEGIN, INID, INSTRING, ININT, INCOMMENT, /* do more later */}; | |
string lexeme; //the lexeme we're building | |
char ch; //the char we're working with | |
LexState state = BEGIN; //ensure we start at the top of the switch | |
//deref instream, istream::peek didn't work? | |
while(in->get(ch)) { | |
//grab chars / close lines | |
if(ch == '\n') { | |
(*lineNum)++; | |
} | |
//lexemes for strings and declarations | |
switch(state) { | |
//begin case | |
case BEGIN: | |
if(isspace(ch)) { | |
continue; //move on to next word if you see a space | |
} | |
lexeme = ch; //save the char | |
//Identifier stuff | |
if(isalpha(ch)) { | |
state = INID; | |
} else if(isdigit(ch)) { | |
state = ININT; | |
} else if( ch == '"' ) { | |
//string stuff | |
state = INSTRING; | |
} else if( ch == '#') { | |
state = INCOMMENT; | |
} else { | |
// operators | |
TokenType tt = ERR; | |
switch(ch) { | |
case '+': | |
tt = PLUS; | |
break; | |
case '*': | |
tt = STAR; | |
break; | |
case '/': | |
tt = SLASH; | |
break; | |
case '-': | |
tt = MINUS; | |
break; | |
case '=': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '=') { | |
tt = EQ; | |
} else if(' ') { | |
tt = ASSIGN; | |
} else { | |
return Token(ERR, lexeme, *lineNum); | |
} | |
break; | |
case '!': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '=') { | |
tt = NEQ; | |
} else { | |
//tf is that? | |
return Token(ERR, lexeme, *lineNum); | |
} | |
break; | |
case '<': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '=') { | |
tt = LEQ; | |
} | |
tt = LT; | |
break; | |
case '>': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '=') { | |
tt = GEQ; | |
} | |
tt = LT; | |
break; | |
case '&': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '&') { | |
tt = LOGICAND; | |
} | |
tt = LT; | |
break; | |
case '|': | |
//two char ops are strange | |
lexeme.push_back(ch); | |
ch = in->peek(); | |
lexeme.push_back(ch); | |
if(ch == '|') { | |
tt = LOGICOR; | |
} | |
tt = LT; | |
break; | |
case '(': | |
//two char ops are strange | |
tt = LPAREN; | |
break; | |
case ')': | |
//two char ops are strange | |
tt = RPAREN; | |
break; | |
case ';': | |
tt = SC; | |
break; | |
} //end opcase | |
return Token(tt, lexeme, *lineNum); | |
//cant return in case statements | |
} //end stringbs | |
break; | |
//end begin case | |
//identifiers | |
case INID: | |
if(isalpha(ch) || isdigit(ch)) { | |
lexeme += ch; | |
} else { | |
if(ch == '\n') { | |
(*lineNum)--; //have to -1 rq incase we print an err on this lexeme | |
} | |
in->putback(ch); | |
} //identify keywords | |
if(lexeme == "print") { | |
return Token(PRINT, lexeme, *lineNum); | |
break; | |
} else if(lexeme == "if") { | |
return Token(IF, lexeme, *lineNum); | |
state == BEGIN; | |
break; | |
} else if(lexeme == "then") { | |
return Token(THEN, lexeme, *lineNum); | |
state == BEGIN; | |
break; | |
} else if(lexeme == "true") { | |
return Token(TRUE, lexeme, *lineNum); | |
state == BEGIN; | |
break; | |
} else if(lexeme == "false") { | |
return Token(FALSE, lexeme, *lineNum); | |
state == BEGIN; | |
break; | |
} else if(isspace(ch)) { | |
state == BEGIN; | |
return Token(IDENT, lexeme, *lineNum); | |
lexeme.clear(); | |
break; | |
} else { | |
break; | |
} | |
//finish strings | |
case ININT: | |
if(isdigit(ch)) { | |
lexeme += ch; | |
} else if(isalpha(ch)) { | |
lexeme += ch; | |
return Token(ERR, lexeme, *lineNum); | |
} else { | |
if(ch == '\n') { | |
(*lineNum)--; //it saw the \n so roll bk | |
in->putback(ch); | |
} | |
} | |
return Token(ICONST, lexeme, *lineNum); | |
break; | |
case INSTRING: | |
lexeme += ch; | |
if(ch == '"') { | |
lexeme = lexeme.substr(1, lexeme.length()-2); | |
return Token(SCONST, lexeme, *lineNum); | |
} | |
if(ch == '\n') { | |
return Token(ERR, lexeme, *lineNum); | |
} | |
break; //start the case over | |
case INCOMMENT: | |
if(ch == '\n') { | |
state = BEGIN; | |
} | |
break; | |
} | |
} | |
if( in->eof() ) | |
return Token(DONE, "", *lineNum); | |
return Token(ERR, lexeme, *lineNum); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string.h> | |
#include "tokens.h" | |
using namespace std; | |
/* | |
return Token(token type, lexeme, line#) | |
return Token(Done, "", lineNum) | |
-v every token is printed | |
-sum if present, summary information is printed | |
-allids a list of the lexemes for all identifiers printed in | |
alphabetical order | |
filename read from the filename; otherwise read from standard in | |
getNextToken(istream * ...) | |
istream *in; | |
in = & cin; | |
or | |
in = & some ifstream; | |
*/ | |
istream *in = &cin; | |
Token tok; | |
TokenType tt = tok.GetTokenType(); | |
int main(int argc, char* argv[]) { | |
//args | |
//int numfiles; | |
bool isfile; | |
bool v; | |
bool sum; | |
//bool allids; | |
int lineNum; | |
int stringCt; | |
//int identCt; | |
int tokenCt; | |
string arg1; | |
string source; | |
for(int i=1; i < argc; i++) { | |
//numfiles = 0; | |
arg1 = argv[i]; | |
//Done: arg tester | |
if(in) { | |
; | |
} | |
if(arg1[0] == '-') { | |
if(arg1 == "-v") { | |
//do -v | |
v = true; | |
} else if(arg1 == "-sum") { | |
//do sum | |
sum = true; | |
} else if(arg1 == "-allids") { | |
//do allids | |
; | |
} else { | |
cout << "INVALID FLAG " << arg1 << endl; | |
return 2; | |
} | |
} else { | |
//arg must be a filename test it | |
isfile = true; | |
source = argv[i]; | |
} | |
//handle files if you have them2 | |
ifstream iFile; | |
if(isfile) { | |
iFile.open(source); | |
if(i < argc - 1) { | |
cout << "TOO MANY FILE NAMES" << endl; | |
return 3; | |
} else if(!iFile.is_open()) { | |
cerr << "UNABLE TO OPEN " << source << endl; | |
} else { | |
in = &iFile; | |
//iterate the file and spit out tokens | |
while((tok = getNextToken(&iFile, &lineNum)) != ERR && tok != DONE) { | |
// handle verbose mode | |
if(v) { | |
cout << tok << endl; | |
} | |
tokenCt++; | |
//we can pick out tokens with GetTokenType() | |
if(tok.GetTokenType() == SCONST) { | |
stringCt++; | |
} | |
} //while | |
if(tok.GetTokenType() == ERR) { | |
cout << "Error on line " << lineNum << " ("; | |
cout << tok.GetLexeme() << ")" << endl; | |
} | |
//sum stuff | |
if(sum) { | |
cout << "Total lines: " << endl; | |
cout << "Total tokens: " << endl; | |
cout << "Total identifiers: " << endl; | |
cout << "Total strings: " << endl; | |
} | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* tokens.h | |
* | |
* CS280 | |
* Fall 2018 | |
*/ | |
#ifndef TOKENS_H_ | |
#define TOKENS_H_ | |
#include <string> | |
#include <iostream> | |
using std::string; | |
using std::istream; | |
using std::ostream; | |
enum TokenType { | |
// keywords | |
PRINT, | |
IF, | |
THEN, | |
TRUE, | |
FALSE, | |
// an identifier | |
IDENT, | |
// an integer and string constant | |
ICONST, | |
SCONST, | |
// the operators, parens and semicolon | |
PLUS, | |
MINUS, | |
STAR, | |
SLASH, | |
ASSIGN, | |
EQ, | |
NEQ, | |
LT, | |
LEQ, | |
GT, | |
GEQ, | |
LOGICAND, | |
LOGICOR, | |
LPAREN, | |
RPAREN, | |
SC, | |
// any error returns this token | |
ERR, | |
// when completed (EOF), return this token | |
DONE | |
}; | |
class Token { | |
TokenType tt; | |
string lexeme; | |
int lnum; | |
public: | |
Token() { | |
tt = ERR; | |
lnum = -1; | |
} | |
Token(TokenType tt, string lexeme, int line) { | |
this->tt = tt; | |
this->lexeme = lexeme; | |
this->lnum = line; | |
} | |
bool operator==(const TokenType tt) const { return this->tt == tt; } | |
bool operator!=(const TokenType tt) const { return this->tt != tt; } | |
TokenType GetTokenType() const { return tt; } | |
string GetLexeme() const { return lexeme; } | |
int GetLinenum() const { return lnum; } | |
}; | |
extern ostream& operator<<(ostream& out, const Token& tok); | |
extern Token getNextToken(istream *in, int *linenum); | |
#endif /* TOKENS_H_ */ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment