Created
October 24, 2018 02:51
-
-
Save mikeymop/2555c7acf53b688d4f1cd553db9fc6f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <map> | |
#include <string.h> | |
#include <cctype> | |
#include "tokens.h" | |
//track all tokentypes from tokens.h | |
static std::map<TokenType, std::string> TokenMap { | |
{PRINT, "PRINT"}, | |
{IF, "IF"}, | |
{THEN, "THEN"}, | |
{TRUE, "TRUE"}, | |
{FALSE, "FALSE"}, | |
{IDENT, "IDENT"}, | |
{ICONST, "ICONST"}, | |
{SCONST, "SCONST"}, | |
{PLUS, "PLUS"}, | |
{MINUS, "MINUS"}, | |
{STAR, "STAR"}, | |
{SLASH, "SLASH"}, | |
{ASSIGN, "ASSIGN"}, | |
{EQ, "EQ"}, | |
{NEQ, "NEQ"}, | |
{LT, "LT"}, | |
{LEQ, "LEQ"}, | |
{GT, "GT"}, | |
{GEQ, "GEQ"}, | |
{LOGICAND, "LOGICAND"}, | |
{LOGICOR, "LOGICOR"}, | |
{LPAREN, "LPAREN"}, | |
{RPAREN, "RPAREN"}, | |
{SC, "SC"}, | |
{ERR, "ERR"}, | |
{DONE, "DONE"} | |
}; | |
//redir istream to cout | |
ostream& operator<<(ostream& out, const Token &tok) { | |
TokenType tt = tok.GetTokenType(); | |
//output whatever token you match from the map | |
out << TokenMap[tt]; | |
//check a few tokentypes including string and int | |
if(tt == IDENT || tt == SCONST || tt == ICONST || tt == ERR) { | |
out << "(" << tok.GetLexeme() << ")"; | |
} | |
return out; | |
} | |
Token getNextToken(istream *in, int *lineNum) { | |
//TokenType tt = tok.GetTokenType(); //test tok | |
//from slide | |
enum LexState {BEGIN, INID, INSTRING, INCOMMENT, /* do more later */}; | |
string lexeme; | |
char ch; | |
LexState state = BEGIN; | |
//deref instream, istream::peek didn't work? | |
while(in->get(ch)) { | |
//grab chars / close lines | |
if(ch == '\n') { | |
(*lineNum)++; | |
} | |
//lexemes for strings and declarations | |
switch(state) { | |
//begin case | |
case BEGIN: | |
if(isspace(ch)) { | |
continue; //move on to next word if you see a space | |
} | |
lexeme = ch; //save the char | |
//Identifier stuff | |
if(isalpha(ch)) { | |
state = INID; | |
} else if(isdigit(ch)) { | |
;//state = ININT; | |
} else if( ch == '"' ) { | |
//string stuff | |
state = INSTRING; | |
} else { | |
// operators | |
TokenType tt = ERR; | |
char x; //tmp | |
switch(ch) { | |
case '+': | |
tt = PLUS; | |
case '*': | |
tt = STAR; | |
case '/': | |
tt = SLASH; | |
case '-': | |
tt = MINUS; | |
case '#': | |
state = INCOMMENT; | |
case '=': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '=') { | |
tt = EQ; | |
} else if(' ') { | |
tt = ASSIGN; | |
} else { | |
return Token(ERR, lexeme, *lineNum); | |
} | |
case '!': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '=') { | |
tt = NEQ; | |
} else { | |
//tf is that? | |
return Token(ERR, lexeme, *lineNum); | |
} | |
case '<': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '=') { | |
tt = LEQ; | |
} | |
tt = LT; | |
case '>': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '=') { | |
tt = GEQ; | |
} | |
tt = LT; | |
case '&': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '&') { | |
tt = LOGICAND; | |
} | |
tt = LT; | |
case '|': | |
//two char ops are strange | |
x = in->peek(); | |
if(ch == '|') { | |
tt = LOGICOR; | |
} | |
tt = LT; | |
case '(': | |
//two char ops are strange | |
tt = LPAREN; | |
case ')': | |
//two char ops are strange | |
tt = RPAREN; | |
case ';': | |
tt = SC; | |
} //end opcase | |
//cant return in case statements | |
return Token(tt, lexeme, *lineNum); | |
} //end stringbs | |
break; | |
//end begin case | |
//identifiers | |
case INID: | |
if(isalpha(ch) || isdigit(ch)) { | |
lexeme += ch; | |
} else { | |
if(ch == '\n') | |
in->putback(ch); | |
} | |
return Token(IDENT, lexeme, *lineNum); | |
//break; for later | |
//finish strings | |
case INSTRING: | |
lexeme += ch; | |
if(ch == '\n') { | |
return Token(ERR, lexeme, *lineNum); | |
} | |
if(ch == '"') { | |
lexeme = lexeme.substr(1, lexeme.length()-1); | |
return Token(SCONST, lexeme, *lineNum); | |
} | |
break; //start the case over | |
case INCOMMENT: | |
break; | |
} | |
} | |
if( in->eof() ) | |
return Token(DONE, "", *lineNum); | |
return Token(ERR, lexeme, *lineNum); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string.h> | |
#include "tokens.h" | |
using namespace std; | |
/* | |
return Token(token type, lexeme, line#) | |
return Token(Done, "", lineNum) | |
-v every token is printed | |
-sum if present, summary information is printed | |
-allids a list of the lexemes for all identifiers printed in | |
alphabetical order | |
filename read from the filename; otherwise read from standard in | |
getNextToken(istream * ...) | |
istream *in; | |
in = & cin; | |
or | |
in = & some ifstream; | |
*/ | |
istream *in = &cin; | |
Token tok; | |
TokenType tt = tok.GetTokenType(); | |
int main(int argc, char* argv[]) { | |
//args | |
//int numfiles; | |
bool isfile; | |
bool v; | |
//bool sum; | |
//bool allids; | |
int lineNum; | |
//int stringCt; | |
//int identCt; | |
//int tokenCount; | |
string arg1; | |
string source; | |
for(int i=1; i < argc; i++) { | |
//numfiles = 0; | |
arg1 = argv[i]; | |
//Done: arg tester | |
if(in) { | |
; | |
} | |
if(arg1[0] == '-') { | |
if(arg1 == "-v") { | |
//do -v | |
v = true; | |
} else if(arg1 == "-sum") { | |
//do sum | |
; | |
} else if(arg1 == "-allids") { | |
//do allids | |
; | |
} else { | |
cout << "INVALID FLAG " << arg1 << endl; | |
return 2; | |
} | |
} else { | |
//arg must be a filename test it | |
isfile = true; | |
source = argv[i]; | |
} | |
//handle files if you have them2 | |
ifstream iFile; | |
if(isfile) { | |
iFile.open(source); | |
if(i < argc - 1) { | |
cout << "TOO MANY FILE NAMES" << endl; | |
return 3; | |
} else if(!iFile.is_open()) { | |
cerr << "UNABLE TO OPEN " << source << endl; | |
} else { | |
in = &iFile; | |
while((tok = getNextToken(&iFile, &lineNum)) != ERR && tok != DONE) { | |
// handle verbose mode | |
if(v) { | |
cout << tok << endl; | |
} | |
//TODO: keep statistics for other flags | |
} | |
} | |
} | |
//SLIDE: getNextToken shit | |
/* | |
lineNum = 0; | |
while((tok = getNextToken(in, &lineNum)) != ERR && tok != DONE) { | |
// handle verbose mode | |
if(v) { | |
cout << tok << endl; | |
} | |
//TODO: keep statistics for other flags | |
}*/ | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* tokens.h | |
* | |
* CS280 | |
* Fall 2018 | |
*/ | |
#ifndef TOKENS_H_ | |
#define TOKENS_H_ | |
#include <string> | |
#include <iostream> | |
using std::string; | |
using std::istream; | |
using std::ostream; | |
enum TokenType { | |
// keywords | |
PRINT, | |
IF, | |
THEN, | |
TRUE, | |
FALSE, | |
// an identifier | |
IDENT, | |
// an integer and string constant | |
ICONST, | |
SCONST, | |
// the operators, parens and semicolon | |
PLUS, | |
MINUS, | |
STAR, | |
SLASH, | |
ASSIGN, | |
EQ, | |
NEQ, | |
LT, | |
LEQ, | |
GT, | |
GEQ, | |
LOGICAND, | |
LOGICOR, | |
LPAREN, | |
RPAREN, | |
SC, | |
// any error returns this token | |
ERR, | |
// when completed (EOF), return this token | |
DONE | |
}; | |
class Token { | |
TokenType tt; | |
string lexeme; | |
int lnum; | |
public: | |
Token() { | |
tt = ERR; | |
lnum = -1; | |
} | |
Token(TokenType tt, string lexeme, int line) { | |
this->tt = tt; | |
this->lexeme = lexeme; | |
this->lnum = line; | |
} | |
bool operator==(const TokenType tt) const { return this->tt == tt; } | |
bool operator!=(const TokenType tt) const { return this->tt != tt; } | |
TokenType GetTokenType() const { return tt; } | |
string GetLexeme() const { return lexeme; } | |
int GetLinenum() const { return lnum; } | |
}; | |
extern ostream& operator<<(ostream& out, const Token& tok); | |
extern Token getNextToken(istream *in, int *linenum); | |
#endif /* TOKENS_H_ */ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment