Skip to content

Instantly share code, notes, and snippets.

@benmezger
Created June 28, 2020 04:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benmezger/53132801fb01356f4c369bc9f45cc52f to your computer and use it in GitHub Desktop.
Save benmezger/53132801fb01356f4c369bc9f45cc52f to your computer and use it in GitHub Desktop.
options {
IGNORE_CASE = true;
JAVA_UNICODE_ESCAPE = true;
STATIC = false;
DEBUG_PARSER = true;
LOOKAHEAD = 1;
DEBUG_LOOKAHEAD = true;
}
PARSER_BEGIN(LanguageParser)
package compiler.parser;
import javax.swing.text.html.parser.Parser;
import java.util.List;
import java.util.ArrayList;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.lang.StringBuilder;
import java.lang.StringBuilder;
public class LanguageParser {
int contParseError = 0;
boolean debugRecovery = true;
final static List<ErrorStruct> output = new ArrayList<ErrorStruct>();
boolean eof;
public static List<Token> getTokens(String stream){
InputStream target = new ByteArrayInputStream(stream.getBytes());
LanguageParser parser = new LanguageParser(target);
return tokenize(parser);
}
public static ArrayList<ErrorStruct> checkSyntax(String stream) {
InputStream target = new ByteArrayInputStream(stream.getBytes());
LanguageParser parser = new LanguageParser(target);
try {
parser.begin_program();
}
catch (ParseException e){
output.add(new ErrorStruct("Error parsing the program.\n", e));
}
ArrayList tmp = new ArrayList<ErrorStruct>(output);
output.clear();
return tmp;
}
public static LanguageParser create(String stream){
InputStream target = new ByteArrayInputStream(stream.getBytes());
LanguageParser parser = new LanguageParser(target);
return parser;
}
public static void main(String args[]) throws TokenMgrError, ParseException {
LanguageParser parser = null;
if (args.length == 0) {
parser = new LanguageParser(System.in);
}
else if (args.length == 1) {
try {
parser = new LanguageParser(new java.io.FileInputStream(args[0]));
}
catch (java.io.FileNotFoundException e) {
System.out.println("LanguageParser: file " + args[0] + " was not found.");
return;
}
}
//for (Token token: tokenize(parser)){
// String name = LanguageParserConstants.tokenImage[token.kind];
// System.out.println("Line " + token.beginLine + " | Column " + token.beginColumn + " | " + token + "\n");
//}
}
public static List<Token> tokenize(LanguageParser parser){
List<Token> tokens = new ArrayList<Token>();
Token token = parser.getNextToken();
while (token.kind != LanguageParserConstants.EOF){
tokens.add(token);
token = parser.getNextToken();
}
if (!TokenHandler.isClosed()){
tokens.add(TokenHandler.createToken());
}
return tokens;
}
static public String im(int x){
String s = tokenImage[x];
int k = s.lastIndexOf("\"");
try {
s = s.substring(1, k);
}
catch (StringIndexOutOfBoundsException e){}
return s;
}
public void consumeUntil(RecoverySet g, ParseException e, String met) throws ParseException {
Token tok;
if (g == null){
throw e;
}
tok = getToken(1); // Current token
while (!eof){
/* found a token in set */
if (g.contains(tok.kind)) {
break;
}
getNextToken();
tok = getToken(1);
if (tok.kind == EOF && !g.contains(EOF)){
eof = true;
}
}
contParseError++;
}
}
PARSER_END(LanguageParser)
SKIP: {
" "
| "\n"
| "\t"
| < "#" (~["\n"])* >
| < "/*"> {TokenHandler.writeInvalidToOutput(image.toString(), input_stream.getEndLine(), input_stream.getBeginColumn(), jjmatchedKind);}: BLOCK_COMMENT_STATE
}
<BLOCK_COMMENT_STATE> SKIP: {
<"/*"> {TokenHandler.setClosed(true);}: DEFAULT
| <~[]>
}
/* Keywords */
TOKEN: {
<INTEGER: "integer">
| <REAL: "real">
| <STRING: "string">
| <TRUE: "true">
| <UNTRUE: "untrue">
| <DO: "do">
| <THIS: "this">
| <DESCRIPTION: "description">
| <BODY: "body">
| <IS: "is">
| <AS: "as">
| <AND: "and">
| <CONSTANT: "constant">
| <VARIABLE: "variable">
| <DESIGNATE: "designate">
| <READ: "read">
| <WRITE: "write">
| <ALL: "all">
| <AVALIATE: "avaliate">
| <RESULT: "result">
| <REPEAT: "repeat">
| <DECLARATION: "declaration">
| <TYPE: "type">
| <ENUM: "enum">
}
/* Operators, relations, etc*/
TOKEN: {
<PLUS : "+">
| <MINUS : "-">
| <MULTIPLY : "*">
| <DIV : "/">
| <POWER : "**">
| <WHOLE_DIV : "%">
| <REST_DIV: "%%">
| <EQUAL_TO: "==">
| <ASSIGN: "=">
| <DIFF_THAN: "!=">
| <LESS_THAN: "<<">
| <GREATER_THAN: ">>">
| <LESS_THAN_OR_EQ_TO: "<<=">
| <GREATER_THAN_OR_EQ_TO: ">>=">
| <OPEN_BRACKET: "[">
| <CLOSE_BRACKET: "]">
| <OPEN_PARENTHESIS: "(">
| <CLOSE_PARENTHESIS: ")">
| <OPEN_CURLY_BRACKETS: "{">
| <CLOSE_CURLY_BRACKETS: "}">
| <DOT: ".">
| <COMMA: ",">
}
/* Logical Operators */
TOKEN: {
<LOGICAL_AND : "&" >
| <LOGICAL_OR : "|">
| <LOGICAL_NOT: "!">
| <LOGIC: "logic">
}
/* Identifiers */
TOKEN: {
<IDENTIFIER: (<LETTER>(<DIGITS>)?(<LETTER>|(""))*)+(("")|<LETTER>)*>
| <#LETTER: ["a"-"z","A"-"Z"]>
| <#DIGITS: (["0"-"9"])>
}
/* Numbers */
TOKEN: {
<NUM : (<MINUS>)?(<DIGIT>)+>
| <NUMBER_REAL : ((<MINUS>)? (<DIGIT>)+ <DOT> (<DIGIT>)+)>
| <#DIGIT : ["0" - "9"]>
}
TOKEN: {
<STRING_LITERAL: ("\"" (~["\"","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"]))* "\"")
| ("\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"]))* "\'")>
}
/* Catch all for undefined tokens */
TOKEN : {
<OTHER: ~[]> |
<INVALID_IDENTIFIER:
<LETTER>(<LETTER>|<DIGITS>|(""))*<DIGITS>(<DIGITS>)+(<LETTER>|(""))* |
<LETTER>(<LETTER>|<DIGITS>|("_"))*(<DIGITS>)+ |
<LETTER>(<LETTER>|<DIGITS>)((""))+<DIGITS>(<LETTER>|<DIGITS>|("")) |
(<DIGITS>)+(<LETTER>|<DIGITS>|("_"))* |
("")(<LETTER>|<DIGITS>|(""))*>
}
void enum_values(): {} {
constant_result() | <IDENTIFIER>
}
void inner_enum_declaration(RecoverySet r) : {} {
try {
<IDENTIFIER> <IS> enum_values() inner_enum_decla_cont() <DOT>
}
catch (ParseException e){
consumeUntil(r, e, "Error: Invalid enum declaration syntax.");
output.add(new ErrorStruct("Erro: declaracao de enum interna incorreta.\n", e));
}
}
void inner_enum_decla_cont() : {} {
(<COMMA> enum_values() inner_enum_decla_cont())?
}
void enum_declaration(RecoverySet r) : {} {
try {
<TYPE> <OPEN_BRACKET>
(inner_enum_declaration(r))+
<CLOSE_BRACKET>
(<DECLARATION> declaration_constants_and_variables(r))?
}
catch (ParseException e){
consumeUntil(r, e, "Error: Invalid enum declaration syntax.");
output.add(new ErrorStruct("Erro: declaracao de enum incorreta.\n", e));
}
}
void identifiers() : {} {
<IDENTIFIER> index()
}
void identifiers_list() : {} {
try {
identifiers() (<COMMA> identifiers())*
}
catch (ParseException e){
output.add(new ErrorStruct("Erro: Lista de identificadores incorreto.\n", e));
}
}
void type_declaration() : {} {
<INTEGER> | <REAL> | <STRING> | <LOGIC> | <ENUM>
}
void variable_declaration() : {} {
identifiers_list() <IS> type_declaration() <DOT>
}
void type_constant() : {} {
<INTEGER> | <REAL> | <STRING> | <LOGIC>
}
void end_constant() : {} {
<AS> <CONSTANT> (constant_declaration())+
}
void start_variable() : {} {
<VARIABLE> (variable_declaration())+ (end_constant())?
}
void constant_declaration() : {} {
identifiers_list() <IS> type_constant() <ASSIGN> constant_result() <DOT>
}
void end_variable() : {} {
<AS> <VARIABLE> (variable_declaration())+
}
void start_constant() : {} {
<CONSTANT> (constant_declaration())+ (end_variable())?
}
void start_declaration(RecoverySet r) : {} {
try{
start_variable() | start_constant()
}
catch (ParseException e){
consumeUntil(r, e, "Error: Invalid declaration body.\n");
output.add(new ErrorStruct("Erro: Declaracão de variavel/constante errado.\n", e));
}
}
void inner_declaration(RecoverySet r) : {} {
try {
<AS> start_declaration(r)
}
catch (ParseException e){
consumeUntil(r, e, "Error: Invalid declaration body.\n");
output.add(new ErrorStruct("Error: Bad inner declaration of as.\n", e));
}
}
void declaration_constants_and_variables(RecoverySet r) : {} {
try{
<CONSTANT> <AND> <VARIABLE> <OPEN_BRACKET> inner_declaration(r) <CLOSE_BRACKET>
}
catch (ParseException e){
consumeUntil(r, e, "declaration_constants_and_variables");
output.add(new ErrorStruct("Error: Forma geral de declaracão de constante e variaval incorreta.\n", e));
}
}
void constant_result() : {} {
<STRING_LITERAL> | <NUM> | <NUMBER_REAL>
}
void logic_result(RecoverySet r ) : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} {
try {
<TRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> true_result_cont() | <UNTRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> untrue_result_cont()
}
catch (ParseException e){
consumeUntil(r, e, "");
output.add(new ErrorStruct("Erro: Verificação de resultado logico incorreto.\n", e));
}
}
void true_result_cont() : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} {
try {
(<UNTRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> <DOT>) | <DOT>
}
catch (ParseException e){
consumeUntil(g, e, "list_of_commands");
output.add(new ErrorStruct("Erro: Clausula de teste incorreta.\n", e));
}
}
void untrue_result_cont() : {RecoverySet g = new RecoverySet(CLOSE_BRACKET);} {
try {
(<TRUE> <RESULT> <OPEN_BRACKET> list_of_commands(g) <CLOSE_BRACKET> <DOT>) | <DOT>
}
catch (ParseException e){
consumeUntil(g, e, "");
output.add(new ErrorStruct("Erro: Clausula de teste incorreta.\n", e));
}
}
void declarations(RecoverySet r) : {} {
try {
(<DECLARATION> (enum_declaration(r) | declaration_constants_and_variables(r)))?
}
catch (ParseException e) {
consumeUntil(r, e, "declarations");
output.add(new ErrorStruct("Erro de declaração de enum/variable\n", e));
}
}
void list_of_commands(RecoverySet r) : {RecoverySet g = First.list_of_commands ;} {
try {
(repeat(g) | avaliate(g) | (<WRITE> (write(g) | write_all(g))) | designate(g) | read(g))+
}
catch (ParseException e){
consumeUntil(r, e, "list_of_commands");
output.add(new ErrorStruct("Erro: Declaração de comando incorreta.\n", e));
}
}
void expression(RecoverySet g) : {} {
try {
arithmetic_or_logic_expression(g) expression_cont(g)
}
catch (ParseException e){
consumeUntil(g, e, "list_of_commands");
output.add(new ErrorStruct("Erro: Expressão incorreta.\n", e));
}
}
void arithmetic_or_logic_expression(RecoverySet g) : {} {
second_term(g) lesser_priority_operators(g)
}
void expression_cont(RecoverySet g) : {} {
((<EQUAL_TO> arithmetic_or_logic_expression(g)) | (<DIFF_THAN> arithmetic_or_logic_expression(g)) | (<LESS_THAN> arithmetic_or_logic_expression(g)) | (<GREATER_THAN> arithmetic_or_logic_expression(g))
| (<LESS_THAN_OR_EQ_TO> arithmetic_or_logic_expression(g)) | (<GREATER_THAN_OR_EQ_TO> arithmetic_or_logic_expression(g)))?
}
void first_term(RecoverySet g) : {} {
element(g) top_priority_operators(g)
}
void second_term(RecoverySet g) : {} {
first_term(g) medium_priority_operators(g)
}
void element(RecoverySet g) : {} {
(<IDENTIFIER> index())
| <NUM> | <NUMBER_REAL> | <STRING_LITERAL>
| <TRUE> | <UNTRUE> | (<OPEN_PARENTHESIS> expression(g) <CLOSE_PARENTHESIS>)
| (<LOGICAL_NOT> <OPEN_PARENTHESIS> expression(g) <CLOSE_PARENTHESIS>)
}
void index() : {} {
(<OPEN_CURLY_BRACKETS> <NUM> <CLOSE_CURLY_BRACKETS>)?
}
void top_priority_operators(RecoverySet g) : {} {
(<POWER> element(g))*
}
void medium_priority_operators(RecoverySet g) : {} {
((<MULTIPLY> first_term(g)) | (<DIV> first_term(g)) | (<WHOLE_DIV> first_term(g)) | (<REST_DIV> first_term(g)) | (<LOGICAL_AND> first_term(g)))*
}
void lesser_priority_operators(RecoverySet g) : {} {
((<PLUS> second_term(g)) | (<MINUS> second_term(g)) | (<LOGICAL_OR> second_term(g)))*
}
void repeat(RecoverySet r) : {
RecoverySet g = new RecoverySet(OPEN_BRACKET);
RecoverySet h = new RecoverySet(CLOSE_BRACKET);
} {
try {
<REPEAT> <THIS> expression(g) <OPEN_BRACKET>
list_of_commands(h)
<CLOSE_BRACKET> <DOT>
}
catch (ParseException e){
consumeUntil(r, e, "");
output.add(new ErrorStruct("Erro: Declaração do comando repeat incorreta. \n", e));
}
}
void avaliate(RecoverySet r) : {RecoverySet g = new RecoverySet(DOT);} {
try {
<AVALIATE> <THIS> expression(First.selection_command)
logic_result(g)
}
catch (ParseException e){
consumeUntil(r, e, "");
output.add(new ErrorStruct("Erro: Declaração do comando avaliate incorreta. \n", e));
}
}
void write(RecoverySet g) : {} {
try {
<THIS> <OPEN_BRACKET> write_body() <CLOSE_BRACKET> <DOT>
}
catch (ParseException e){
consumeUntil(g, e, "list_of_commands");
output.add(new ErrorStruct("Erro: Comando write incorreto.\n", e));
}
}
void write_all(RecoverySet g) : {} {
try {
<ALL> <THIS> <OPEN_BRACKET> write_body() <CLOSE_BRACKET> <DOT>
}
catch (ParseException e){
consumeUntil(g, e, "");
output.add(new ErrorStruct("Erro: no comando write all.\n", e));
}
}
void write_body() : {} {
try {
(constant_result() (<COMMA> write_body_cont())* | identifiers() (<COMMA> write_body_cont())*)
}
catch (ParseException e){
output.add(new ErrorStruct("Erro: Erro no corpo do write.\n", e));
}
}
void write_body_cont() : {} {
try {
constant_result() | identifiers()
}
catch (ParseException e){
output.add(new ErrorStruct("Erro: Erro no corpo do write.\n", e));
}
}
void designate(RecoverySet r) : {RecoverySet h = new RecoverySet(DOT);} {
try {
<DESIGNATE> <THIS> identifiers_list() <AS> expression(h) <DOT>
}
catch (ParseException e){
consumeUntil(r, e, "");
output.add(new ErrorStruct("Erro: Erro de atribuição (designate).\n", e));
}
}
void read(RecoverySet r) : {} {
try {
<READ> <THIS> <OPEN_BRACKET> identifiers_list() <CLOSE_BRACKET> <DOT>
}
catch (ParseException e){
consumeUntil(r, e, "read");
}
}
void header(RecoverySet r) : {} {
try {
<DO> <THIS> <IDENTIFIER> <OPEN_BRACKET> <CLOSE_BRACKET>
}
catch (ParseException e){
consumeUntil(r, e, "header");
output.add(new ErrorStruct("Erro: Cabecalho principal incorreto.\n", e));
}
}
void body(RecoverySet r) : {RecoverySet g = new RecoverySet(OPEN_BRACKET);} {
try {
<BODY> <OPEN_BRACKET>
list_of_commands(g)
<CLOSE_BRACKET>
}
catch (ParseException e){
consumeUntil(r, e, "body");
output.add(new ErrorStruct("Erro: Declaracão do corpo incorreto.\n", e));
}
}
void desc(RecoverySet r) : {} {
try {
(<DESCRIPTION> <STRING_LITERAL>)?
}
catch (ParseException e){
consumeUntil(r, e, "description");
output.add(new ErrorStruct("Erro: Descricao do programa incorreto\n", e));
}
}
void main(RecoverySet r) : {
RecoverySet h = new RecoverySet(BODY);
RecoverySet g = new RecoverySet(DECLARATION);
RecoverySet i = g.union(h);
RecoverySet l = new RecoverySet(DESCRIPTION);
} {
try {
header(i)
declarations(h)
body(r.union(l))
desc(r)
}
catch (ParseException e){
consumeUntil(r, e, "main");
}
}
void begin_program() : {RecoverySet r = new RecoverySet(EOF);} {
try {
[ main(r) ] <EOF>
}
catch (ParseException e){
consumeUntil(r, e, "begin_program");
output.add(new ErrorStruct("Erro: Forma geral do programa incorreto.\n", e));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment