Created
March 4, 2016 16:34
-
-
Save VenturaDelMonte/cdcd7d968a18b32304a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* @author Del Monte Ventura - Cesarano Antonio | |
* The scanner definition for COOL. | |
*/ | |
import java_cup.runtime.Symbol; | |
%% | |
%{ | |
/* Stuff enclosed in %{ %} is copied verbatim to the lexer class | |
* definition, all the extra variables/functions you want to use in the | |
* lexer actions should go here. Don't remove or modify anything that | |
* was there initially. */ | |
// Max size of string constants | |
static int MAX_STR_CONST = 1025; | |
private static String STRING_TOO_LONG_ERROR_MESSAGE = "String constant too long"; | |
// For assembling string constants | |
StringBuffer string_buff = new StringBuffer(); | |
private int nested_comments = 0; | |
private int curr_lineno = 1; | |
int get_curr_lineno() { | |
return curr_lineno; | |
} | |
private AbstractSymbol filename; | |
void set_filename(String fname) { | |
filename = AbstractTable.stringtable.addString(fname); | |
} | |
AbstractSymbol curr_filename() { | |
return filename; | |
} | |
%} | |
/* main character classes */ | |
LineTerminator = \r|\n|\r\n | |
WhiteSpace = {LineTerminator}|[ \t\f\u000b\xB] | |
/* identifiers */ | |
TypeIdentifier = [A-Z][a-zA-Z0-9_]* | |
ObjectIdentifier = [a-z][a-zA-Z0-9_]* | |
/* integer literals */ | |
DecIntegerLiteral = [0-9]+ | |
/* lexer states */ | |
%state STRING, ML_COMM, SL_COMM, ERROR_STRING | |
%init{ | |
/* Stuff enclosed in %init{ %init} is copied verbatim to the lexer | |
* class constructor, all the extra initialization you want to do should | |
* go here. Don't remove or modify anything that was there initially. */ | |
// empty for now | |
%init} | |
%eofval{ | |
/* Stuff enclosed in %eofval{ %eofval} specifies java code that is | |
* executed when end-of-file is reached. If you use multiple lexical | |
* states and want to do something special if an EOF is encountered in | |
* one of those states, place your code in the switch statement. | |
* Ultimately, you should return the EOF symbol, or your lexer won't | |
* work. */ | |
switch (zzLexicalState) | |
{ | |
case YYINITIAL: | |
case SL_COMM: | |
return new Symbol(TokenConstants.EOF); //EOF reached | |
case STRING: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in string constant"); } | |
case ML_COMM: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in comment"); } | |
default: break; | |
} | |
return new Symbol(TokenConstants.EOF); | |
%eofval} | |
%class CoolLexer | |
%cup | |
%unicode | |
%line | |
%% | |
<YYINITIAL> { | |
/* keywords */ | |
[cC][lL][aA][sS][sS] { return new Symbol(TokenConstants.CLASS); } | |
[eE][lL][sS][eE] { return new Symbol(TokenConstants.ELSE); } | |
[fF][iI] { return new Symbol(TokenConstants.FI); } | |
[iI][fF] { return new Symbol(TokenConstants.IF); } | |
[iI][nN] { return new Symbol(TokenConstants.IN); } | |
[iI][nN][hH][eE][rR][iI][tT][sS] { return new Symbol(TokenConstants.INHERITS); } | |
[iI][sS][vV][oO][iI][dD] { return new Symbol(TokenConstants.ISVOID); } | |
[lL][eE][tT] { return new Symbol(TokenConstants.LET); } | |
[lL][oO][oO][pP] { return new Symbol(TokenConstants.LOOP); } | |
[pP][oO][oO][lL] { return new Symbol(TokenConstants.POOL); } | |
[tT][hH][eE][nN] { return new Symbol(TokenConstants.THEN); } | |
[wW][hH][iI][lL][eE] { return new Symbol(TokenConstants.WHILE); } | |
[cC][aA][sS][eE] { return new Symbol(TokenConstants.CASE); } | |
[eE][sS][aA][cC] { return new Symbol(TokenConstants.ESAC); } | |
[nN][eE][wW] { return new Symbol(TokenConstants.NEW); } | |
[oO][fF] { return new Symbol(TokenConstants.OF); } | |
[nN][oO][tT] { return new Symbol(TokenConstants.NOT); } | |
/* aggiunta for */ | |
/* [fF][oO][rR] { return new Symbol(TokenConstants.POOL); } */ | |
/* aggiunta mycase..do */ | |
/* [mM][yY][cC][aA][sS][eE] { return new Symbol(TokenConstants.ESAC); } */ | |
/* [dD][oO] { return new Symbol(TokenConstants.POOL); } */ | |
/* [dD][eE][fF][aA][uU][lL][tT] { return new Symbol(TokenConstants.FI); } */ | |
/* aggiunta mapcar n, f, x1, ..., xM */ | |
[mM][aA][pP][cC][aA][rR] { return new Symbol(TokenConstants.INHERITS); } | |
/* booleans */ | |
t[rR][uU][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.TRUE); } | |
f[aA][lL][sS][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.FALSE); } | |
/* operators */ | |
"+" { return new Symbol(TokenConstants.PLUS); } | |
"-" { return new Symbol(TokenConstants.MINUS); } | |
"*" { return new Symbol(TokenConstants.MULT); } | |
"/" { return new Symbol(TokenConstants.DIV); } | |
"=" { return new Symbol(TokenConstants.EQ); } | |
"<" { return new Symbol(TokenConstants.LT); } | |
"<=" { return new Symbol(TokenConstants.LE); } | |
"=>" { return new Symbol(TokenConstants.DARROW); } | |
"~" { return new Symbol(TokenConstants.NEG); } | |
"@" { return new Symbol(TokenConstants.AT); } | |
"." { return new Symbol(TokenConstants.DOT); } | |
"<-" { return new Symbol(TokenConstants.ASSIGN); } | |
"," { return new Symbol(TokenConstants.COMMA); } | |
";" { return new Symbol(TokenConstants.SEMI); } | |
":" { return new Symbol(TokenConstants.COLON); } | |
"(" { return new Symbol(TokenConstants.LPAREN); } | |
")" { return new Symbol(TokenConstants.RPAREN); } | |
"{" { return new Symbol(TokenConstants.LBRACE); } | |
"}" { return new Symbol(TokenConstants.RBRACE); } | |
/* type identifier pattern */ | |
{TypeIdentifier} { return new Symbol(TokenConstants.TYPEID, AbstractTable.idtable.addString(yytext())); } | |
/* object identifier pattern */ | |
{ObjectIdentifier} { return new Symbol(TokenConstants.OBJECTID, AbstractTable.idtable.addString(yytext())); } | |
/* integer pattern */ | |
{DecIntegerLiteral} { return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext())); } | |
/* newline */ | |
\n { curr_lineno++; } | |
/* whitespaces */ | |
{WhiteSpace} { } //skip whitespaces | |
/* string opened */ | |
\" { string_buff.setLength(0); yybegin(STRING); } | |
/* multiline comment opened */ | |
"(*" { nested_comments = 1; yybegin(ML_COMM); } | |
/* inline comment */ | |
"--" { yybegin(SL_COMM); } | |
/* unbalanced multiline comment */ | |
"*)" { return new Symbol(TokenConstants.ERROR, "Unmatched *)"); } | |
/* invalid character */ | |
. { return new Symbol(TokenConstants.ERROR, yytext()); } | |
} | |
/** | |
* single line comment handler | |
*/ | |
<SL_COMM> | |
{ | |
// end of Single Line Comment | |
.*[\n] { curr_lineno++; yybegin(YYINITIAL); } //skip comment content until newline then come back to initial state | |
} | |
/** | |
* multiple line comment handler | |
*/ | |
<ML_COMM> | |
{ | |
/* end nested comment */ | |
"*)" { | |
nested_comments--; //check balancement | |
if (nested_comments == 0) { yybegin(YYINITIAL); } | |
} | |
/* start nested comment */ | |
"(*" { nested_comments++; } | |
// newline character | |
[\n] { curr_lineno++; } //only increase number of lines | |
. { } //skip | |
} | |
/** | |
* strings handler | |
*/ | |
<STRING> | |
{ | |
// end of string constant | |
\" { | |
yybegin(YYINITIAL); | |
// (i.e. return <40, "hello world">) | |
return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(string_buff.toString())); | |
} | |
/* characters to append to string constant */ | |
[^\n\\\"\0] { if(string_buff.length() < MAX_STR_CONST - 1) | |
string_buff.append(yytext()); | |
else | |
{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\' { if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\'"); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\\" { | |
if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\""); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\n { if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\n"); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\t { if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\t"); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\f { if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\f"); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
\\b { if(string_buff.length() < MAX_STR_CONST - 1){ | |
string_buff.append("\b"); | |
} | |
else{ | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
/* escaped newline character */ | |
\\[\n] { | |
curr_lineno++; | |
if(string_buff.length() < MAX_STR_CONST - 1) | |
string_buff.append('\n'); | |
else { | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
/* unescaped newline character */ | |
[\n] { curr_lineno++; | |
yybegin(YYINITIAL); | |
return new Symbol(TokenConstants.ERROR, "Unterminated string constant"); } | |
/* single backslash */ | |
\\ { } | |
/* single character */ | |
\\[^\r\n\f\b] { | |
if(string_buff.length() < MAX_STR_CONST - 1) | |
string_buff.append(yytext().charAt(1)); | |
else { | |
yybegin(ERROR_STRING); | |
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE); | |
} | |
} | |
/* null character */ | |
\x00|\u0000 { yybegin(ERROR_STRING); return new Symbol(TokenConstants.ERROR, "String contains null character."); } | |
} | |
/* continue lexical analysis after an error */ | |
<ERROR_STRING> | |
{ | |
/* text followed by close quote */ | |
.*\" { yybegin(YYINITIAL); } //resume lexing after the closing " | |
/* escaped newline */ | |
\\[\n] { curr_lineno++; } | |
/* unescaped newline */ | |
[\n] { curr_lineno++; //resume lexing at the next line | |
yybegin(YYINITIAL); } | |
. { /* do nothing */ } //skip rest of the string | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment