Created
June 28, 2020 04:31
-
-
Save benmezger/53132801fb01356f4c369bc9f45cc52f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options { | |
IGNORE_CASE = true; | |
JAVA_UNICODE_ESCAPE = true; | |
STATIC = false; | |
DEBUG_PARSER = true; | |
LOOKAHEAD = 1; | |
DEBUG_LOOKAHEAD = true; | |
} | |
PARSER_BEGIN(LanguageParser) | |
package compiler.parser; | |
import javax.swing.text.html.parser.Parser; | |
import java.util.List; | |
import java.util.ArrayList; | |
import java.io.InputStream; | |
import java.io.ByteArrayInputStream; | |
import java.lang.StringBuilder; | |
import java.lang.StringBuilder; | |
public class LanguageParser { | |
int contParseError = 0; | |
boolean debugRecovery = true; | |
final static List<ErrorStruct> output = new ArrayList<ErrorStruct>(); | |
boolean eof; | |
public static List<Token> getTokens(String stream){ | |
InputStream target = new ByteArrayInputStream(stream.getBytes()); | |
LanguageParser parser = new LanguageParser(target); | |
return tokenize(parser); | |
} | |
public static ArrayList<ErrorStruct> checkSyntax(String stream) { | |
InputStream target = new ByteArrayInputStream(stream.getBytes()); | |
LanguageParser parser = new LanguageParser(target); | |
try { | |
parser.begin_program(); | |
} | |
catch (ParseException e){ | |
output.add(new ErrorStruct("Error parsing the program.\n", e)); | |
} | |
ArrayList tmp = new ArrayList<ErrorStruct>(output); | |
output.clear(); | |
return tmp; | |
} | |
public static LanguageParser create(String stream){ | |
InputStream target = new ByteArrayInputStream(stream.getBytes()); | |
LanguageParser parser = new LanguageParser(target); | |
return parser; | |
} | |
public static void main(String args[]) throws TokenMgrError, ParseException { | |
LanguageParser parser = null; | |
if (args.length == 0) { | |
parser = new LanguageParser(System.in); | |
} | |
else if (args.length == 1) { | |
try { | |
parser = new LanguageParser(new java.io.FileInputStream(args[0])); | |
} | |
catch (java.io.FileNotFoundException e) { | |
System.out.println("LanguageParser: file " + args[0] + " was not found."); | |
return; | |
} | |
} | |
//for (Token token: tokenize(parser)){ | |
// String name = LanguageParserConstants.tokenImage[token.kind]; | |
// System.out.println("Line " + token.beginLine + " | Column " + token.beginColumn + " | " + token + "\n"); | |
//} | |
} | |
public static List<Token> tokenize(LanguageParser parser){ | |
List<Token> tokens = new ArrayList<Token>(); | |
Token token = parser.getNextToken(); | |
while (token.kind != LanguageParserConstants.EOF){ | |
tokens.add(token); | |
token = parser.getNextToken(); | |
} | |
if (!TokenHandler.isClosed()){ | |
tokens.add(TokenHandler.createToken()); | |
} | |
return tokens; | |
} | |
static public String im(int x){ | |
String s = tokenImage[x]; | |
int k = s.lastIndexOf("\""); | |
try { | |
s = s.substring(1, k); | |
} | |
catch (StringIndexOutOfBoundsException e){} | |
return s; | |
} | |
public void consumeUntil(RecoverySet g, ParseException e, String met) throws ParseException { | |
Token tok; | |
if (g == null){ | |
throw e; | |
} | |
tok = getToken(1); // Current token | |
while (!eof){ | |
/* found a token in set */ | |
if (g.contains(tok.kind)) { | |
break; | |
} | |
getNextToken(); | |
tok = getToken(1); | |
if (tok.kind == EOF && !g.contains(EOF)){ | |
eof = true; | |
} | |
} | |
contParseError++; | |
} | |
} | |
PARSER_END(LanguageParser) | |
SKIP: { | |
" " | |
| "\n" | |
| "\t" | |
| < "#" (~["\n"])* > | |
| < "/*"> {TokenHandler.writeInvalidToOutput(image.toString(), input_stream.getEndLine(), input_stream.getBeginColumn(), jjmatchedKind);}: BLOCK_COMMENT_STATE | |
} | |
<BLOCK_COMMENT_STATE> SKIP: { | |
<"/*"> {TokenHandler.setClosed(true);}: DEFAULT | |
| <~[]> | |
} | |
/* Keywords */ | |
TOKEN: { | |
<INTEGER: "integer"> | |
| <REAL: "real"> | |
| <STRING: "string"> | |
| <TRUE: "true"> | |
| <UNTRUE: "untrue"> | |
| <DO: "do"> | |
| <THIS: "this"> | |
| <DESCRIPTION: "description"> | |
| <BODY: "body"> | |
| <IS: "is"> | |
| <AS: "as"> | |
| <AND: "and"> | |
| <CONSTANT: "constant"> | |
| <VARIABLE: "variable"> | |
| <DESIGNATE: "designate"> | |
| <READ: "read"> | |
| <WRITE: "write"> | |
| <ALL: "all"> | |
| <AVALIATE: "avaliate"> | |
| <RESULT: "result"> | |
| <REPEAT: "repeat"> | |
| <DECLARATION: "declaration"> | |
| <TYPE: "type"> | |
| <ENUM: "enum"> | |
} | |
/* Operators, relations, etc*/ | |
TOKEN: { | |
<PLUS : "+"> | |
| <MINUS : "-"> | |
| <MULTIPLY : "*"> | |
| <DIV : "/"> | |
| <POWER : "**"> | |
| <WHOLE_DIV : "%"> | |
| <REST_DIV: "%%"> | |
| <EQUAL_TO: "=="> | |
| <ASSIGN: "="> | |
| <DIFF_THAN: "!="> | |
| <LESS_THAN: "<<"> | |
| <GREATER_THAN: ">>"> | |
| <LESS_THAN_OR_EQ_TO: "<<="> | |
| <GREATER_THAN_OR_EQ_TO: ">>="> | |
| <OPEN_BRACKET: "["> | |
| <CLOSE_BRACKET: "]"> | |
| <OPEN_PARENTHESIS: "("> | |
| <CLOSE_PARENTHESIS: ")"> | |
| <OPEN_CURLY_BRACKETS: "{"> | |
| <CLOSE_CURLY_BRACKETS: "}"> | |
| <DOT: "."> | |
| <COMMA: ","> | |
} | |
/* Logical Operators */ | |
TOKEN: { | |
<LOGICAL_AND : "&" > | |
| <LOGICAL_OR : "|"> | |
| <LOGICAL_NOT: "!"> | |
| <LOGIC: "logic"> | |
} | |
/* Identifiers */ | |
TOKEN: { | |
<IDENTIFIER: (<LETTER>(<DIGITS>)?(<LETTER>|(""))*)+(("")|<LETTER>)*> | |
| <#LETTER: ["a"-"z","A"-"Z"]> | |
| <#DIGITS: (["0"-"9"])> | |
} | |
/* Numbers */ | |
TOKEN: { | |
<NUM : (<MINUS>)?(<DIGIT>)+> | |
| <NUMBER_REAL : ((<MINUS>)? (<DIGIT>)+ <DOT> (<DIGIT>)+)> | |
| <#DIGIT : ["0" - "9"]> | |
} | |
TOKEN: { | |
<STRING_LITERAL: ("\"" (~["\"","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"]))* "\"") | |
| ("\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"]))* "\'")> | |
} | |
/* Catch all for undefined tokens */ | |
TOKEN : { | |
<OTHER: ~[]> | | |
<INVALID_IDENTIFIER: | |
<LETTER>(<LETTER>|<DIGITS>|(""))*<DIGITS>(<DIGITS>)+(<LETTER>|(""))* | | |
<LETTER>(<LETTER>|<DIGITS>|("_"))*(<DIGITS>)+ | | |
<LETTER>(<LETTER>|<DIGITS>)((""))+<DIGITS>(<LETTER>|<DIGITS>|("")) | | |
(<DIGITS>)+(<LETTER>|<DIGITS>|("_"))* | | |
("")(<LETTER>|<DIGITS>|(""))*> | |
} | |
void enum_values(): {} { | |
constant_result() | <IDENTIFIER> | |
} | |
void inner_enum_declaration(RecoverySet r) : {} { | |
try { | |
<IDENTIFIER> <IS> enum_values() inner_enum_decla_cont() <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "Error: Invalid enum declaration syntax."); | |
output.add(new ErrorStruct("Erro: declaracao de enum interna incorreta.\n", e)); | |
} | |
} | |
void inner_enum_decla_cont() : {} { | |
(<COMMA> enum_values() inner_enum_decla_cont())? | |
} | |
void enum_declaration(RecoverySet r) : {} { | |
try { | |
<TYPE> <OPEN_BRACKET> | |
(inner_enum_declaration(r))+ | |
<CLOSE_BRACKET> | |
(<DECLARATION> declaration_constants_and_variables(r))? | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "Error: Invalid enum declaration syntax."); | |
output.add(new ErrorStruct("Erro: declaracao de enum incorreta.\n", e)); | |
} | |
} | |
void identifiers() : {} { | |
<IDENTIFIER> index() | |
} | |
void identifiers_list() : {} { | |
try { | |
identifiers() (<COMMA> identifiers())* | |
} | |
catch (ParseException e){ | |
output.add(new ErrorStruct("Erro: Lista de identificadores incorreto.\n", e)); | |
} | |
} | |
void type_declaration() : {} { | |
<INTEGER> | <REAL> | <STRING> | <LOGIC> | <ENUM> | |
} | |
void variable_declaration() : {} { | |
identifiers_list() <IS> type_declaration() <DOT> | |
} | |
void type_constant() : {} { | |
<INTEGER> | <REAL> | <STRING> | <LOGIC> | |
} | |
void end_constant() : {} { | |
<AS> <CONSTANT> (constant_declaration())+ | |
} | |
void start_variable() : {} { | |
<VARIABLE> (variable_declaration())+ (end_constant())? | |
} | |
void constant_declaration() : {} { | |
identifiers_list() <IS> type_constant() <ASSIGN> constant_result() <DOT> | |
} | |
void end_variable() : {} { | |
<AS> <VARIABLE> (variable_declaration())+ | |
} | |
void start_constant() : {} { | |
<CONSTANT> (constant_declaration())+ (end_variable())? | |
} | |
void start_declaration(RecoverySet r) : {} { | |
try{ | |
start_variable() | start_constant() | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "Error: Invalid declaration body.\n"); | |
output.add(new ErrorStruct("Erro: Declaracão de variavel/constante errado.\n", e)); | |
} | |
} | |
void inner_declaration(RecoverySet r) : {} { | |
try { | |
<AS> start_declaration(r) | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "Error: Invalid declaration body.\n"); | |
output.add(new ErrorStruct("Error: Bad inner declaration of as.\n", e)); | |
} | |
} | |
void declaration_constants_and_variables(RecoverySet r) : {} { | |
try{ | |
<CONSTANT> <AND> <VARIABLE> <OPEN_BRACKET> inner_declaration(r) <CLOSE_BRACKET> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "declaration_constants_and_variables"); | |
output.add(new ErrorStruct("Error: Forma geral de declaracão de constante e variaval incorreta.\n", e)); | |
} | |
} | |
void constant_result() : {} { | |
<STRING_LITERAL> | <NUM> | <NUMBER_REAL> | |
} | |
void logic_result(RecoverySet r ) : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} { | |
try { | |
<TRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> true_result_cont() | <UNTRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> untrue_result_cont() | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, ""); | |
output.add(new ErrorStruct("Erro: Verificação de resultado logico incorreto.\n", e)); | |
} | |
} | |
void true_result_cont() : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} { | |
try { | |
(<UNTRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> <DOT>) | <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(g, e, "list_of_commands"); | |
output.add(new ErrorStruct("Erro: Clausula de teste incorreta.\n", e)); | |
} | |
} | |
void untrue_result_cont() : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} { | |
try { | |
(<TRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> <DOT>) | <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(g, e, ""); | |
output.add(new ErrorStruct("Erro: Clausula de teste incorreta.\n", e)); | |
} | |
} | |
void declarations(RecoverySet r) : {} { | |
try { | |
(<DECLARATION> (enum_declaration(r) | declaration_constants_and_variables(r)))? | |
} | |
catch (ParseException e) { | |
consumeUntil(r, e, "declarations"); | |
output.add(new ErrorStruct("Erro de declaração de enum/variable\n", e)); | |
} | |
} | |
void list_of_commands(RecoverySet r) : {RecoverySet g = First.list_of_commands ;} { | |
try { | |
(repeat(g) | avaliate(g) | (<WRITE> (write(g) | write_all(g))) | designate(g) | read(g))+ | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "list_of_commands"); | |
output.add(new ErrorStruct("Erro: Declaração de comando incorreta.\n", e)); | |
} | |
} | |
void expression(RecoverySet g) : {} { | |
try { | |
arithmetic_or_logic_expression(g) expression_cont(g) | |
} | |
catch (ParseException e){ | |
consumeUntil(g, e, "list_of_commands"); | |
output.add(new ErrorStruct("Erro: Expressão incorreta.\n", e)); | |
} | |
} | |
void arithmetic_or_logic_expression(RecoverySet g) : {} { | |
second_term(g) lesser_priority_operators(g) | |
} | |
void expression_cont(RecoverySet g) : {} { | |
((<EQUAL_TO> arithmetic_or_logic_expression(g)) | (<DIFF_THAN> arithmetic_or_logic_expression(g)) | (<LESS_THAN> arithmetic_or_logic_expression(g)) | (<GREATER_THAN> arithmetic_or_logic_expression(g)) | |
| (<LESS_THAN_OR_EQ_TO> arithmetic_or_logic_expression(g)) | (<GREATER_THAN_OR_EQ_TO> arithmetic_or_logic_expression(g)))? | |
} | |
void first_term(RecoverySet g) : {} { | |
element(g) top_priority_operators(g) | |
} | |
void second_term(RecoverySet g) : {} { | |
first_term(g) medium_priority_operators(g) | |
} | |
void element(RecoverySet g) : {} { | |
(<IDENTIFIER> index()) | |
| <NUM> | <NUMBER_REAL> | <STRING_LITERAL> | |
| <TRUE> | <UNTRUE> | (<OPEN_PARENTHESIS> expression(g) <CLOSE_PARENTHESIS>) | |
| (<LOGICAL_NOT> <OPEN_PARENTHESIS> expression(g) <CLOSE_PARENTHESIS>) | |
} | |
void index() : {} { | |
(<OPEN_CURLY_BRACKETS> <NUM> <CLOSE_CURLY_BRACKETS>)? | |
} | |
void top_priority_operators(RecoverySet g) : {} { | |
(<POWER> element(g))* | |
} | |
void medium_priority_operators(RecoverySet g) : {} { | |
((<MULTIPLY> first_term(g)) | (<DIV> first_term(g)) | (<WHOLE_DIV> first_term(g)) | (<REST_DIV> first_term(g)) | (<LOGICAL_AND> first_term(g)))* | |
} | |
void lesser_priority_operators(RecoverySet g) : {} { | |
((<PLUS> second_term(g)) | (<MINUS> second_term(g)) | (<LOGICAL_OR> second_term(g)))* | |
} | |
void repeat(RecoverySet r) : { | |
RecoverySet g = new RecoverySet(OPEN_BRACKET); | |
RecoverySet h = new RecoverySet(CLOSE_BRACKET); | |
} { | |
try { | |
<REPEAT> <THIS> expression(g) <OPEN_BRACKET> | |
list_of_commands(h) | |
<CLOSE_BRACKET> <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, ""); | |
output.add(new ErrorStruct("Erro: Declaração do comando repeat incorreta. \n", e)); | |
} | |
} | |
void avaliate(RecoverySet r) : {RecoverySet g = new RecoverySet(DOT);} { | |
try { | |
<AVALIATE> <THIS> expression(First.selection_command) | |
logic_result(g) | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, ""); | |
output.add(new ErrorStruct("Erro: Declaração do comando avaliate incorreta. \n", e)); | |
} | |
} | |
void write(RecoverySet g) : {} { | |
try { | |
<THIS> <OPEN_BRACKET> write_body() <CLOSE_BRACKET> <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(g, e, "list_of_commands"); | |
output.add(new ErrorStruct("Erro: Comando write incorreto.\n", e)); | |
} | |
} | |
void write_all(RecoverySet g) : {} { | |
try { | |
<ALL> <THIS> <OPEN_BRACKET> write_body() <CLOSE_BRACKET> <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(g, e, ""); | |
output.add(new ErrorStruct("Erro: no comando write all.\n", e)); | |
} | |
} | |
void write_body() : {} { | |
try { | |
(constant_result() (<COMMA> write_body_cont())* | identifiers() (<COMMA> write_body_cont())*) | |
} | |
catch (ParseException e){ | |
output.add(new ErrorStruct("Erro: Erro no corpo do write.\n", e)); | |
} | |
} | |
void write_body_cont() : {} { | |
try { | |
constant_result() | identifiers() | |
} | |
catch (ParseException e){ | |
output.add(new ErrorStruct("Erro: Erro no corpo do write.\n", e)); | |
} | |
} | |
void designate(RecoverySet r) : {RecoverySet h = new RecoverySet(DOT);} { | |
try { | |
<DESIGNATE> <THIS> identifiers_list() <AS> expression(h) <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, ""); | |
output.add(new ErrorStruct("Erro: Erro de atribuição (designate).\n", e)); | |
} | |
} | |
void read(RecoverySet r) : {} { | |
try { | |
<READ> <THIS> <OPEN_BRACKET> identifiers_list() <CLOSE_BRACKET> <DOT> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "read"); | |
} | |
} | |
void header(RecoverySet r) : {} { | |
try { | |
<DO> <THIS> <IDENTIFIER> <OPEN_BRACKET> <CLOSE_BRACKET> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "header"); | |
output.add(new ErrorStruct("Erro: Cabecalho principal incorreto.\n", e)); | |
} | |
} | |
void body(RecoverySet r) : {RecoverySet g = new RecoverySet(OPEN_BRACKET);} { | |
try { | |
<BODY> <OPEN_BRACKET> | |
list_of_commands(g) | |
<CLOSE_BRACKET> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "body"); | |
output.add(new ErrorStruct("Erro: Declaracão do corpo incorreto.\n", e)); | |
} | |
} | |
void desc(RecoverySet r) : {} { | |
try { | |
(<DESCRIPTION> <STRING_LITERAL>)? | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "description"); | |
output.add(new ErrorStruct("Erro: Descricao do programa incorreto\n", e)); | |
} | |
} | |
void main(RecoverySet r) : { | |
RecoverySet h = new RecoverySet(BODY); | |
RecoverySet g = new RecoverySet(DECLARATION); | |
RecoverySet i = g.union(h); | |
RecoverySet l = new RecoverySet(DESCRIPTION); | |
} { | |
try { | |
header(i) | |
declarations(h) | |
body(r.union(l)) | |
desc(r) | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "main"); | |
} | |
} | |
void begin_program() : {RecoverySet r = new RecoverySet(EOF);} { | |
try { | |
[ main(r) ] <EOF> | |
} | |
catch (ParseException e){ | |
consumeUntil(r, e, "begin_program"); | |
output.add(new ErrorStruct("Erro: Forma geral do programa incorreto.\n", e)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment