Skip to content

Instantly share code, notes, and snippets.

@VenturaDelMonte
Created March 4, 2016 16:34
Show Gist options
  • Save VenturaDelMonte/cdcd7d968a18b32304a9 to your computer and use it in GitHub Desktop.
Save VenturaDelMonte/cdcd7d968a18b32304a9 to your computer and use it in GitHub Desktop.
/*
* @author Del Monte Ventura - Cesarano Antonio
* The scanner definition for COOL.
*/
import java_cup.runtime.Symbol;
%%
%{
/* Stuff enclosed in %{ %} is copied verbatim to the lexer class
* definition, all the extra variables/functions you want to use in the
* lexer actions should go here. Don't remove or modify anything that
* was there initially. */
// Max size of string constants
static int MAX_STR_CONST = 1025;
private static String STRING_TOO_LONG_ERROR_MESSAGE = "String constant too long";
// For assembling string constants
StringBuffer string_buff = new StringBuffer();
private int nested_comments = 0;
private int curr_lineno = 1;
int get_curr_lineno() {
return curr_lineno;
}
private AbstractSymbol filename;
void set_filename(String fname) {
filename = AbstractTable.stringtable.addString(fname);
}
AbstractSymbol curr_filename() {
return filename;
}
%}
/* main character classes */
LineTerminator = \r|\n|\r\n
WhiteSpace = {LineTerminator}|[ \t\f\u000b\xB]
/* identifiers */
TypeIdentifier = [A-Z][a-zA-Z0-9_]*
ObjectIdentifier = [a-z][a-zA-Z0-9_]*
/* integer literals */
DecIntegerLiteral = [0-9]+
/* lexer states */
%state STRING, ML_COMM, SL_COMM, ERROR_STRING
%init{
/* Stuff enclosed in %init{ %init} is copied verbatim to the lexer
* class constructor, all the extra initialization you want to do should
* go here. Don't remove or modify anything that was there initially. */
// empty for now
%init}
%eofval{
/* Stuff enclosed in %eofval{ %eofval} specifies java code that is
* executed when end-of-file is reached. If you use multiple lexical
* states and want to do something special if an EOF is encountered in
* one of those states, place your code in the switch statement.
* Ultimately, you should return the EOF symbol, or your lexer won't
* work. */
switch (zzLexicalState)
{
case YYINITIAL:
case SL_COMM:
return new Symbol(TokenConstants.EOF); //EOF reached
case STRING: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in string constant"); }
case ML_COMM: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in comment"); }
default: break;
}
return new Symbol(TokenConstants.EOF);
%eofval}
%class CoolLexer
%cup
%unicode
%line
%%
<YYINITIAL> {
/* keywords */
[cC][lL][aA][sS][sS] { return new Symbol(TokenConstants.CLASS); }
[eE][lL][sS][eE] { return new Symbol(TokenConstants.ELSE); }
[fF][iI] { return new Symbol(TokenConstants.FI); }
[iI][fF] { return new Symbol(TokenConstants.IF); }
[iI][nN] { return new Symbol(TokenConstants.IN); }
[iI][nN][hH][eE][rR][iI][tT][sS] { return new Symbol(TokenConstants.INHERITS); }
[iI][sS][vV][oO][iI][dD] { return new Symbol(TokenConstants.ISVOID); }
[lL][eE][tT] { return new Symbol(TokenConstants.LET); }
[lL][oO][oO][pP] { return new Symbol(TokenConstants.LOOP); }
[pP][oO][oO][lL] { return new Symbol(TokenConstants.POOL); }
[tT][hH][eE][nN] { return new Symbol(TokenConstants.THEN); }
[wW][hH][iI][lL][eE] { return new Symbol(TokenConstants.WHILE); }
[cC][aA][sS][eE] { return new Symbol(TokenConstants.CASE); }
[eE][sS][aA][cC] { return new Symbol(TokenConstants.ESAC); }
[nN][eE][wW] { return new Symbol(TokenConstants.NEW); }
[oO][fF] { return new Symbol(TokenConstants.OF); }
[nN][oO][tT] { return new Symbol(TokenConstants.NOT); }
/* aggiunta for */
/* [fF][oO][rR] { return new Symbol(TokenConstants.POOL); } */
/* aggiunta mycase..do */
/* [mM][yY][cC][aA][sS][eE] { return new Symbol(TokenConstants.ESAC); } */
/* [dD][oO] { return new Symbol(TokenConstants.POOL); } */
/* [dD][eE][fF][aA][uU][lL][tT] { return new Symbol(TokenConstants.FI); } */
/* aggiunta mapcar n, f, x1, ..., xM */
[mM][aA][pP][cC][aA][rR] { return new Symbol(TokenConstants.INHERITS); }
/* booleans */
t[rR][uU][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.TRUE); }
f[aA][lL][sS][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.FALSE); }
/* operators */
"+" { return new Symbol(TokenConstants.PLUS); }
"-" { return new Symbol(TokenConstants.MINUS); }
"*" { return new Symbol(TokenConstants.MULT); }
"/" { return new Symbol(TokenConstants.DIV); }
"=" { return new Symbol(TokenConstants.EQ); }
"<" { return new Symbol(TokenConstants.LT); }
"<=" { return new Symbol(TokenConstants.LE); }
"=>" { return new Symbol(TokenConstants.DARROW); }
"~" { return new Symbol(TokenConstants.NEG); }
"@" { return new Symbol(TokenConstants.AT); }
"." { return new Symbol(TokenConstants.DOT); }
"<-" { return new Symbol(TokenConstants.ASSIGN); }
"," { return new Symbol(TokenConstants.COMMA); }
";" { return new Symbol(TokenConstants.SEMI); }
":" { return new Symbol(TokenConstants.COLON); }
"(" { return new Symbol(TokenConstants.LPAREN); }
")" { return new Symbol(TokenConstants.RPAREN); }
"{" { return new Symbol(TokenConstants.LBRACE); }
"}" { return new Symbol(TokenConstants.RBRACE); }
/* type identifier pattern */
{TypeIdentifier} { return new Symbol(TokenConstants.TYPEID, AbstractTable.idtable.addString(yytext())); }
/* object identifier pattern */
{ObjectIdentifier} { return new Symbol(TokenConstants.OBJECTID, AbstractTable.idtable.addString(yytext())); }
/* integer pattern */
{DecIntegerLiteral} { return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext())); }
/* newline */
\n { curr_lineno++; }
/* whitespaces */
{WhiteSpace} { } //skip whitespaces
/* string opened */
\" { string_buff.setLength(0); yybegin(STRING); }
/* multiline comment opened */
"(*" { nested_comments = 1; yybegin(ML_COMM); }
/* inline comment */
"--" { yybegin(SL_COMM); }
/* unbalanced multiline comment */
"*)" { return new Symbol(TokenConstants.ERROR, "Unmatched *)"); }
/* invalid character */
. { return new Symbol(TokenConstants.ERROR, yytext()); }
}
/**
* single line comment handler
*/
<SL_COMM>
{
// end of Single Line Comment
.*[\n] { curr_lineno++; yybegin(YYINITIAL); } //skip comment content until newline then come back to initial state
}
/**
* multiple line comment handler
*/
<ML_COMM>
{
/* end nested comment */
"*)" {
nested_comments--; //check balancement
if (nested_comments == 0) { yybegin(YYINITIAL); }
}
/* start nested comment */
"(*" { nested_comments++; }
// newline character
[\n] { curr_lineno++; } //only increase number of lines
. { } //skip
}
/**
* strings handler
*/
<STRING>
{
// end of string constant
\" {
yybegin(YYINITIAL);
// (i.e. return <40, "hello world">)
return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(string_buff.toString()));
}
/* characters to append to string constant */
[^\n\\\"\0] { if(string_buff.length() < MAX_STR_CONST - 1)
string_buff.append(yytext());
else
{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\' { if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\'");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\\" {
if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\"");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\n { if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\n");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\t { if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\t");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\f { if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\f");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
\\b { if(string_buff.length() < MAX_STR_CONST - 1){
string_buff.append("\b");
}
else{
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
/* escaped newline character */
\\[\n] {
curr_lineno++;
if(string_buff.length() < MAX_STR_CONST - 1)
string_buff.append('\n');
else {
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
/* unescaped newline character */
[\n] { curr_lineno++;
yybegin(YYINITIAL);
return new Symbol(TokenConstants.ERROR, "Unterminated string constant"); }
/* single backslash */
\\ { }
/* single character */
\\[^\r\n\f\b] {
if(string_buff.length() < MAX_STR_CONST - 1)
string_buff.append(yytext().charAt(1));
else {
yybegin(ERROR_STRING);
return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
}
}
/* null character */
\x00|\u0000 { yybegin(ERROR_STRING); return new Symbol(TokenConstants.ERROR, "String contains null character."); }
}
/* continue lexical analysis after an error */
<ERROR_STRING>
{
/* text followed by close quote */
.*\" { yybegin(YYINITIAL); } //resume lexing after the closing "
/* escaped newline */
\\[\n] { curr_lineno++; }
/* unescaped newline */
[\n] { curr_lineno++; //resume lexing at the next line
yybegin(YYINITIAL); }
. { /* do nothing */ } //skip rest of the string
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment