VenturaDelMonte/cool.lex

## cool.lex
/*
 *  @author Del Monte Ventura - Cesarano Antonio
 *  The scanner definition for COOL.
 */

import java_cup.runtime.Symbol;

%%

%{

/*  Stuff enclosed in %{ %} is copied verbatim to the lexer class
 *  definition, all the extra variables/functions you want to use in the
 *  lexer actions should go here.  Don't remove or modify anything that
 *  was there initially.  */

    // Max size of string constants
    static int MAX_STR_CONST = 1025;

	private static String STRING_TOO_LONG_ERROR_MESSAGE = "String constant too long";

    // For assembling string constants
    StringBuffer string_buff = new StringBuffer();

	private int nested_comments = 0;
    private int curr_lineno = 1;

    int get_curr_lineno() {
		return curr_lineno;
    }

    private AbstractSymbol filename;

    void set_filename(String fname) {
	filename = AbstractTable.stringtable.addString(fname);
    }

    AbstractSymbol curr_filename() {
	return filename;
    }
%}
/* main character classes */
LineTerminator = \r|\n|\r\n
WhiteSpace = {LineTerminator}|[ \t\f\u000b\xB]

/* identifiers */
TypeIdentifier = [A-Z][a-zA-Z0-9_]*
ObjectIdentifier = [a-z][a-zA-Z0-9_]*

/* integer literals */
DecIntegerLiteral = [0-9]+

/* lexer states */
%state STRING, ML_COMM, SL_COMM, ERROR_STRING


%init{

/*  Stuff enclosed in %init{ %init} is copied verbatim to the lexer
 *  class constructor, all the extra initialization you want to do should
 *  go here.  Don't remove or modify anything that was there initially. */

    // empty for now
%init}

%eofval{

/*  Stuff enclosed in %eofval{ %eofval} specifies java code that is
 *  executed when end-of-file is reached.  If you use multiple lexical
 *  states and want to do something special if an EOF is encountered in
 *  one of those states, place your code in the switch statement.
 *  Ultimately, you should return the EOF symbol, or your lexer won't
 *  work.  */

    switch (zzLexicalState)
    {
    	case YYINITIAL:
    	case SL_COMM:
    		return new Symbol(TokenConstants.EOF);	//EOF reached
    	case STRING: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in string constant"); }
    	case ML_COMM: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in comment"); }
    	default: break;
    }
    return new Symbol(TokenConstants.EOF);
%eofval}

%class CoolLexer
%cup
%unicode
%line


%%

<YYINITIAL> {

/* keywords */

[cC][lL][aA][sS][sS]				{ return new Symbol(TokenConstants.CLASS);	}
[eE][lL][sS][eE] 					{ return new Symbol(TokenConstants.ELSE);	}
[fF][iI] 							{ return new Symbol(TokenConstants.FI);		}
[iI][fF] 							{ return new Symbol(TokenConstants.IF);		}
[iI][nN] 							{ return new Symbol(TokenConstants.IN);		}
[iI][nN][hH][eE][rR][iI][tT][sS]	{ return new Symbol(TokenConstants.INHERITS); }
[iI][sS][vV][oO][iI][dD] 			{ return new Symbol(TokenConstants.ISVOID);	}
[lL][eE][tT] 		 				{ return new Symbol(TokenConstants.LET);	}
[lL][oO][oO][pP] 					{ return new Symbol(TokenConstants.LOOP);	}
[pP][oO][oO][lL] 					{ return new Symbol(TokenConstants.POOL);	}
[tT][hH][eE][nN] 					{ return new Symbol(TokenConstants.THEN); 	}
[wW][hH][iI][lL][eE] 				{ return new Symbol(TokenConstants.WHILE);	}
[cC][aA][sS][eE] 					{ return new Symbol(TokenConstants.CASE);	}
[eE][sS][aA][cC]					{ return new Symbol(TokenConstants.ESAC);	}
[nN][eE][wW] 						{ return new Symbol(TokenConstants.NEW);	}
[oO][fF] 							{ return new Symbol(TokenConstants.OF);		}
[nN][oO][tT] 						{ return new Symbol(TokenConstants.NOT);	}

/* aggiunta for */

/* [fF][oO][rR]						{ return new Symbol(TokenConstants.POOL);	} */

/* aggiunta mycase..do */

/* [mM][yY][cC][aA][sS][eE]		   	{ return new Symbol(TokenConstants.ESAC);	} */
/* [dD][oO]							{ return new Symbol(TokenConstants.POOL);	} */
/* [dD][eE][fF][aA][uU][lL][tT]		{ return new Symbol(TokenConstants.FI); 	} */

/* aggiunta mapcar n, f, x1, ..., xM */
[mM][aA][pP][cC][aA][rR]			{ return new Symbol(TokenConstants.INHERITS); 	}

/* booleans */

t[rR][uU][eE]	  		 					{ return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.TRUE); 	}
f[aA][lL][sS][eE] 			 			    { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.FALSE);	}

/* operators */

"+"									{ return new Symbol(TokenConstants.PLUS);   }
"-"				 					{ return new Symbol(TokenConstants.MINUS);  }
"*"				 					{ return new Symbol(TokenConstants.MULT);   }
"/"									{ return new Symbol(TokenConstants.DIV);    }
"="				 					{ return new Symbol(TokenConstants.EQ);     }
"<"				 					{ return new Symbol(TokenConstants.LT); 	 }
"<="			 					{ return new Symbol(TokenConstants.LE);     }
"=>"			 					{ return new Symbol(TokenConstants.DARROW); }
"~"				 					{ return new Symbol(TokenConstants.NEG);    }
"@"				 					{ return new Symbol(TokenConstants.AT);     }
"."									{ return new Symbol(TokenConstants.DOT);    }
"<-"		     					{ return new Symbol(TokenConstants.ASSIGN); }

","				 					{ return new Symbol(TokenConstants.COMMA);  }
";"				 					{ return new Symbol(TokenConstants.SEMI);   }
":"				 					{ return new Symbol(TokenConstants.COLON);  }

"("				 					{ return new Symbol(TokenConstants.LPAREN); }
")"				 					{ return new Symbol(TokenConstants.RPAREN); }
"{"				 					{ return new Symbol(TokenConstants.LBRACE); }
"}"				 					{ return new Symbol(TokenConstants.RBRACE); }

/* type identifier pattern */
{TypeIdentifier}					{ return new Symbol(TokenConstants.TYPEID, AbstractTable.idtable.addString(yytext())); }

/* object identifier pattern */
{ObjectIdentifier}					{ return new Symbol(TokenConstants.OBJECTID, AbstractTable.idtable.addString(yytext())); }

/* integer pattern */
{DecIntegerLiteral}					{ return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext())); }


/* newline */
\n									{ curr_lineno++;							}

/* whitespaces */
{WhiteSpace}						{ }	//skip whitespaces

/* string opened */
\"									{ string_buff.setLength(0); yybegin(STRING);	}

/* multiline comment opened */
"(*"								{ nested_comments = 1; yybegin(ML_COMM);	}

/* inline comment */
"--"								{ yybegin(SL_COMM);							}

/* unbalanced multiline comment */
"*)"								{ return new Symbol(TokenConstants.ERROR, "Unmatched *)"); }

/* invalid character */
.									{ return new Symbol(TokenConstants.ERROR, yytext()); }

}


/**
 * single line comment handler
 */
<SL_COMM>
{

	// end of Single Line Comment
	.*[\n]						  { curr_lineno++;   yybegin(YYINITIAL); }	//skip comment content until newline then come back to initial state

}


/**
 * multiple line comment handler
 */
<ML_COMM>
{
/* end nested comment */
"*)"						  		  {
											nested_comments--;   //check balancement
											if (nested_comments == 0)  {   yybegin(YYINITIAL);  }
							  		  }
/* start nested comment */
"(*"					  			  {	    nested_comments++;    }


// newline character
[\n]								{ curr_lineno++; } //only increase number of lines

.									{ }	//skip

}


/**
 * strings handler
 */
<STRING>
{
// end of string constant
\"					{
						yybegin(YYINITIAL);
						// (i.e. return <40, "hello world">)
						return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(string_buff.toString()));
					}

/* characters to append to string constant */
[^\n\\\"\0]						{ 	if(string_buff.length() < MAX_STR_CONST - 1)
 										string_buff.append(yytext());
								  	else
								  	{
									  yybegin(ERROR_STRING);
									  return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
								  	}
								 }

\\'					{	if(string_buff.length() < MAX_STR_CONST - 1){
							string_buff.append("\'");
						}
						else{
							yybegin(ERROR_STRING);
							return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
						}
					}

\\\"					{
							if(string_buff.length() < MAX_STR_CONST - 1){
								string_buff.append("\"");
							}
							else{
								yybegin(ERROR_STRING);
								return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
							}
					}

\\n					{	if(string_buff.length() < MAX_STR_CONST - 1){
							string_buff.append("\n");
						}
						else{
							yybegin(ERROR_STRING);
							return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
						}
					}

\\t					{	if(string_buff.length() < MAX_STR_CONST - 1){
							string_buff.append("\t");
						}
						else{
							yybegin(ERROR_STRING);
							return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
						}
					}

\\f					{	if(string_buff.length() < MAX_STR_CONST - 1){
							string_buff.append("\f");
						}
						else{
							yybegin(ERROR_STRING);
							return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
						}
					}

\\b					{	if(string_buff.length() < MAX_STR_CONST - 1){
							string_buff.append("\b");
						}
						else{
							yybegin(ERROR_STRING);
							return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
						}
					}
/* escaped newline character */
\\[\n]							{
									  curr_lineno++;
									  if(string_buff.length() < MAX_STR_CONST - 1)
	 									  string_buff.append('\n');
	 								  else {
										  yybegin(ERROR_STRING);
										  return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
									  }
								}

/* unescaped newline character */
[\n]							{ 		curr_lineno++;
									  	yybegin(YYINITIAL);
									  	return new Symbol(TokenConstants.ERROR, "Unterminated string constant"); }

/* single backslash */
\\								{ 											}

/* single character */

\\[^\r\n\f\b]					{

									if(string_buff.length() < MAX_STR_CONST - 1)
	 									  string_buff.append(yytext().charAt(1));
	 								  else {
										  yybegin(ERROR_STRING);
										  return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
									  }
								}

/* null character */
\x00|\u0000						{ yybegin(ERROR_STRING); return new Symbol(TokenConstants.ERROR, "String contains null character."); }
}


/* continue lexical analysis after an error */
<ERROR_STRING>
{

/* text followed by close quote */
.*\"							{ yybegin(YYINITIAL);			}   //resume lexing after the closing "

/* escaped newline */
\\[\n]							{ curr_lineno++;				}

/* unescaped newline */
[\n]							{ curr_lineno++;  					//resume lexing at the next line
							      yybegin(YYINITIAL);			}

. 								{ /* do nothing */				}	//skip rest of the string
}
	/*
	* @author Del Monte Ventura - Cesarano Antonio
	* The scanner definition for COOL.
	*/

	import java_cup.runtime.Symbol;

	%%

	%{

	/* Stuff enclosed in %{ %} is copied verbatim to the lexer class
	* definition, all the extra variables/functions you want to use in the
	* lexer actions should go here. Don't remove or modify anything that
	* was there initially. */

	// Max size of string constants
	static int MAX_STR_CONST = 1025;

	private static String STRING_TOO_LONG_ERROR_MESSAGE = "String constant too long";

	// For assembling string constants
	StringBuffer string_buff = new StringBuffer();

	private int nested_comments = 0;
	private int curr_lineno = 1;

	int get_curr_lineno() {
	return curr_lineno;
	}

	private AbstractSymbol filename;

	void set_filename(String fname) {
	filename = AbstractTable.stringtable.addString(fname);
	}

	AbstractSymbol curr_filename() {
	return filename;
	}
	%}
	/* main character classes */
	LineTerminator = \r\|\n\|\r\n
	WhiteSpace = {LineTerminator}\|[ \t\f\u000b\xB]

	/* identifiers */
	TypeIdentifier = [A-Z][a-zA-Z0-9_]*
	ObjectIdentifier = [a-z][a-zA-Z0-9_]*

	/* integer literals */
	DecIntegerLiteral = [0-9]+

	/* lexer states */
	%state STRING, ML_COMM, SL_COMM, ERROR_STRING


	%init{

	/* Stuff enclosed in %init{ %init} is copied verbatim to the lexer
	* class constructor, all the extra initialization you want to do should
	* go here. Don't remove or modify anything that was there initially. */

	// empty for now
	%init}

	%eofval{

	/* Stuff enclosed in %eofval{ %eofval} specifies java code that is
	* executed when end-of-file is reached. If you use multiple lexical
	* states and want to do something special if an EOF is encountered in
	* one of those states, place your code in the switch statement.
	* Ultimately, you should return the EOF symbol, or your lexer won't
	* work. */

	switch (zzLexicalState)
	{
	case YYINITIAL:
	case SL_COMM:
	return new Symbol(TokenConstants.EOF); //EOF reached
	case STRING: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in string constant"); }
	case ML_COMM: { yybegin(YYINITIAL); return new Symbol(TokenConstants.ERROR, "EOF in comment"); }
	default: break;
	}
	return new Symbol(TokenConstants.EOF);
	%eofval}

	%class CoolLexer
	%cup
	%unicode
	%line


	%%

	<YYINITIAL> {

	/* keywords */

	[cC][lL][aA][sS][sS] { return new Symbol(TokenConstants.CLASS); }
	[eE][lL][sS][eE] { return new Symbol(TokenConstants.ELSE); }
	[fF][iI] { return new Symbol(TokenConstants.FI); }
	[iI][fF] { return new Symbol(TokenConstants.IF); }
	[iI][nN] { return new Symbol(TokenConstants.IN); }
	[iI][nN][hH][eE][rR][iI][tT][sS] { return new Symbol(TokenConstants.INHERITS); }
	[iI][sS][vV][oO][iI][dD] { return new Symbol(TokenConstants.ISVOID); }
	[lL][eE][tT] { return new Symbol(TokenConstants.LET); }
	[lL][oO][oO][pP] { return new Symbol(TokenConstants.LOOP); }
	[pP][oO][oO][lL] { return new Symbol(TokenConstants.POOL); }
	[tT][hH][eE][nN] { return new Symbol(TokenConstants.THEN); }
	[wW][hH][iI][lL][eE] { return new Symbol(TokenConstants.WHILE); }
	[cC][aA][sS][eE] { return new Symbol(TokenConstants.CASE); }
	[eE][sS][aA][cC] { return new Symbol(TokenConstants.ESAC); }
	[nN][eE][wW] { return new Symbol(TokenConstants.NEW); }
	[oO][fF] { return new Symbol(TokenConstants.OF); }
	[nN][oO][tT] { return new Symbol(TokenConstants.NOT); }

	/* aggiunta for */

	/* [fF][oO][rR] { return new Symbol(TokenConstants.POOL); } */

	/* aggiunta mycase..do */

	/* [mM][yY][cC][aA][sS][eE] { return new Symbol(TokenConstants.ESAC); } */
	/* [dD][oO] { return new Symbol(TokenConstants.POOL); } */
	/* [dD][eE][fF][aA][uU][lL][tT] { return new Symbol(TokenConstants.FI); } */

	/* aggiunta mapcar n, f, x1, ..., xM */
	[mM][aA][pP][cC][aA][rR] { return new Symbol(TokenConstants.INHERITS); }

	/* booleans */

	t[rR][uU][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.TRUE); }
	f[aA][lL][sS][eE] { return new Symbol(TokenConstants.BOOL_CONST, java.lang.Boolean.FALSE); }

	/* operators */

	"+" { return new Symbol(TokenConstants.PLUS); }
	"-" { return new Symbol(TokenConstants.MINUS); }
	"*" { return new Symbol(TokenConstants.MULT); }
	"/" { return new Symbol(TokenConstants.DIV); }
	"=" { return new Symbol(TokenConstants.EQ); }
	"<" { return new Symbol(TokenConstants.LT); }
	"<=" { return new Symbol(TokenConstants.LE); }
	"=>" { return new Symbol(TokenConstants.DARROW); }
	"~" { return new Symbol(TokenConstants.NEG); }
	"@" { return new Symbol(TokenConstants.AT); }
	"." { return new Symbol(TokenConstants.DOT); }
	"<-" { return new Symbol(TokenConstants.ASSIGN); }

	"," { return new Symbol(TokenConstants.COMMA); }
	";" { return new Symbol(TokenConstants.SEMI); }
	":" { return new Symbol(TokenConstants.COLON); }

	"(" { return new Symbol(TokenConstants.LPAREN); }
	")" { return new Symbol(TokenConstants.RPAREN); }
	"{" { return new Symbol(TokenConstants.LBRACE); }
	"}" { return new Symbol(TokenConstants.RBRACE); }

	/* type identifier pattern */
	{TypeIdentifier} { return new Symbol(TokenConstants.TYPEID, AbstractTable.idtable.addString(yytext())); }

	/* object identifier pattern */
	{ObjectIdentifier} { return new Symbol(TokenConstants.OBJECTID, AbstractTable.idtable.addString(yytext())); }

	/* integer pattern */
	{DecIntegerLiteral} { return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext())); }


	/* newline */
	\n { curr_lineno++; }

	/* whitespaces */
	{WhiteSpace} { } //skip whitespaces

	/* string opened */
	\" { string_buff.setLength(0); yybegin(STRING); }

	/* multiline comment opened */
	"(*" { nested_comments = 1; yybegin(ML_COMM); }

	/* inline comment */
	"--" { yybegin(SL_COMM); }

	/* unbalanced multiline comment */
	")" { return new Symbol(TokenConstants.ERROR, "Unmatched )"); }

	/* invalid character */
	. { return new Symbol(TokenConstants.ERROR, yytext()); }

	}



	/**
	* single line comment handler
	*/
	<SL_COMM>
	{

	// end of Single Line Comment
	.*[\n] { curr_lineno++; yybegin(YYINITIAL); } //skip comment content until newline then come back to initial state

	}



	/**
	* multiple line comment handler
	*/
	<ML_COMM>
	{
	/* end nested comment */
	"*)" {
	nested_comments--; //check balancement
	if (nested_comments == 0) { yybegin(YYINITIAL); }
	}
	/* start nested comment */
	"(*" { nested_comments++; }


	// newline character
	[\n] { curr_lineno++; } //only increase number of lines

	. { } //skip

	}


	/**
	* strings handler
	*/
	<STRING>
	{
	// end of string constant
	\" {
	yybegin(YYINITIAL);
	// (i.e. return <40, "hello world">)
	return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(string_buff.toString()));
	}

	/* characters to append to string constant */
	[^\n\\\"\0] { if(string_buff.length() < MAX_STR_CONST - 1)
	string_buff.append(yytext());
	else
	{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\' { if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\'");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\\" {
	if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\"");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\n { if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\n");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\t { if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\t");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\f { if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\f");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	\\b { if(string_buff.length() < MAX_STR_CONST - 1){
	string_buff.append("\b");
	}
	else{
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}
	/* escaped newline character */
	\\[\n] {
	curr_lineno++;
	if(string_buff.length() < MAX_STR_CONST - 1)
	string_buff.append('\n');
	else {
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	/* unescaped newline character */
	[\n] { curr_lineno++;
	yybegin(YYINITIAL);
	return new Symbol(TokenConstants.ERROR, "Unterminated string constant"); }

	/* single backslash */
	\\ { }

	/* single character */

	\\[^\r\n\f\b] {

	if(string_buff.length() < MAX_STR_CONST - 1)
	string_buff.append(yytext().charAt(1));
	else {
	yybegin(ERROR_STRING);
	return new Symbol(TokenConstants.ERROR, STRING_TOO_LONG_ERROR_MESSAGE);
	}
	}

	/* null character */
	\x00\|\u0000 { yybegin(ERROR_STRING); return new Symbol(TokenConstants.ERROR, "String contains null character."); }
	}


	/* continue lexical analysis after an error */
	<ERROR_STRING>
	{

	/* text followed by close quote */
	.*\" { yybegin(YYINITIAL); } //resume lexing after the closing "

	/* escaped newline */
	\\[\n] { curr_lineno++; }

	/* unescaped newline */
	[\n] { curr_lineno++; //resume lexing at the next line
	yybegin(YYINITIAL); }

	. { /* do nothing */ } //skip rest of the string
	}