-
-
Save aborg0/3025801 to your computer and use it in GitHub Desktop.
grammar mercury_alt; | |
//options { | |
// backtrack = true; | |
// memoize=true; | |
//} | |
model: | |
s item*; | |
//http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Terms.html#Terms | |
item: term TERM_END s; | |
term | |
: (altFunctor); | |
listTerm | |
: OPEN_LIST s commaSeparatedFunctors CLOSE_LIST; | |
tupleTerm | |
: OPEN_CURLY s commaSeparatedFunctors CLOSE_CURLY; | |
s : (SL_COMMENT | ML_COMMENT | WS | LINE)*; | |
commaSeparatedFunctors | |
: (functor (COMMA s functor)*)?; | |
altFunctor | |
: ('?-' s| ':-' s)? altPrec1200; | |
altPrec1200 | |
: altPrec1199 ((':-' s| '-->' s) altPrec1199)? | |
; | |
altPrec1199 | |
: (('end_module'// fx 1199 | |
|'import_module'// fx 1199 | |
|'include_module'// fx 1199 | |
|'initialise'// fx 1199 | |
|'initialize'// fx 1199 | |
|'finalise'// fx 1199 | |
|'finalize'// fx 1199 | |
|'inst'// fx 1199 | |
|'instance'// fx 1199 | |
|'mode'// fx 1199 | |
|'module'// fx 1199 | |
|'pragma'// fx 1199 | |
|'promise'// fx 1199 | |
|'rule'// fx 1199 | |
|'typeclass'// fx 1199 | |
|'use_module'// fx 1199 | |
) s)? altPrec1181; | |
altPrec1181 | |
: ('solver' s altPrec1181) | altPrec1180; | |
altPrec1180 | |
: ('type' s altPrec1180) | altPrec1179; | |
altPrec1179 | |
: altPrec1175 ('--->' s altPrec1179)?; | |
altPrec1175 | |
: altPrec1170 | |
(('::' |'==>' | 'where') s altPrec1170)?; | |
altPrec1170 | |
: altPrec1160 ('else' altPrec1170)? ; | |
altPrec1160 | |
: ('if' s)? altPrec1150; | |
altPrec1150 | |
: altPrec1100 ('then' s altPrec1100)?; | |
altPrec1100 | |
: altPrec1050 (';' s altPrec1100)?; | |
altPrec1050 | |
: altPrec1025 ('->' s altPrec1050)?; | |
altPrec1025 | |
: altPrec1000 ('&' s altPrec1025)?; | |
altPrec1000 | |
: prec950 (COMMA s altPrec1000)?; | |
functor | |
: ('?-' s| ':-' s)? prec1200; | |
prec1200 | |
: prec1199 ((':-' s| '-->' s) prec1199)? | |
; | |
prec1199 | |
: (('end_module'// fx 1199 | |
|'import_module'// fx 1199 | |
|'include_module'// fx 1199 | |
|'initialise'// fx 1199 | |
|'initialize'// fx 1199 | |
|'finalise'// fx 1199 | |
|'finalize'// fx 1199 | |
|'inst'// fx 1199 | |
|'instance'// fx 1199 | |
|'mode'// fx 1199 | |
|'module'// fx 1199 | |
|'pragma'// fx 1199 | |
|'promise'// fx 1199 | |
|'rule'// fx 1199 | |
|'typeclass'// fx 1199 | |
|'use_module'// fx 1199 | |
) s)? prec1181; | |
prec1181 | |
: ('solver' s prec1181) | prec1180; | |
prec1180 | |
: ('type' s prec1180) | prec1179; | |
prec1179 | |
: prec1175 ('--->' s prec1179)?; | |
prec1175 | |
: prec1170 | |
(('::' |'==>' | 'where') s prec1170)?; | |
prec1170 | |
: prec1160 ('else' prec1170)? ; | |
prec1160 | |
: ('if' s)? prec1150; | |
prec1150 | |
: prec1100 ('then' s prec1100)?; | |
prec1100 | |
: prec1050 (';' s prec1100)?; | |
prec1050 | |
: prec1025 ('->' s prec1050)?; | |
prec1025 | |
: prec1000 ('&' s prec1025)?; | |
prec1000 | |
: prec950 /*(',' s prec1000)?*/; | |
prec950 | |
://TODO, how to make it work? It fails because the ambiguite between the prefix + and the infix + in these positions. | |
// ((('all' | 'arbitrary' | 'promise_equivalent_solutions' | 'promise_equivalent_solution_sets' | 'require_complete_switch' | 'some' ) s) prec920 s prec950) | | |
((('promise_impure' | 'promise_pure' | 'promise_semipure' | 'require_det' | 'require_semidet' | 'require_multi' | 'require_cc_multi' | 'require_cc_nondet' | 'require_erroneous' | 'require_failure') s) prec920) | | |
((('promise_exclusive' | 'promise_exclusive_exhaustive' | 'promise_exhaustive') s) prec950) | | |
prec920; | |
prec920 | |
: prec900 (('<=' | '<=>' | '=>') s prec920)?; | |
prec900 | |
: ((('\\\\+' | 'not' | '~') s) prec900) | | |
(prec800 ('when' s prec800)?); | |
prec800 | |
: ((('impure' | 'semipure') s) prec800) | | |
prec740 | | |
(('func' | 'pred') s prec740); | |
prec740 | |
: prec720 ('or' s prec740)?; | |
prec720 | |
: prec701 ('and' s prec720)?; | |
prec701 | |
: prec700 ('is' s prec700)?; | |
prec700 | |
: prec650 (('<' | '=' | '=..' | '=:=' | '=<' | '==' | '=\\\\=' | '>' | '>=' | '@<' | '@=<' | '@>' | '@>=' | '\\\\=' | '\\\\==' | '~=') s prec650)?; | |
prec650 | |
: prec550 ((':=' | '=^') s prec550)? | |
; | |
prec550 | |
: prec500 ('..' s prec500)?; | |
prec500 | |
: ('+' s prec400 /*why not fy? */) | | |
// (prec400 (('+' s prec400)* | ('++' s prec400)? | ('-' s prec400)* | ('--' s prec400)* | ('/\\\\' s prec400)* | ('\\\\/' s prec400)*)); | |
(prec400 (('+' s prec400)+ | ('++' s prec400) | ('-' s prec400)+ | ('--' s prec400)+ | ('/\\\\' s prec400)+ | ('\\\\/' s prec400)+)?); | |
prec400 | |
// : prec200 (('*' s prec200)* | ('/' s prec200)* | ('//' s prec200)* | ('<<' s prec200)* | ('>>' s prec200)* | ('div' s prec200)* | ('mod' s prec200)? | ('rem' s prec200)?); | |
: prec200 (('*' s prec200)+ | ('/' s prec200)+ | ('//' s prec200)+ | ('<<' s prec200)+ | ('>>' s prec200)+ | ('div' s prec200)+ | ('mod' s prec200) | ('rem' s prec200))?; | |
prec200 | |
: (('-' | '\\\\') s)? prec120 ('**' prec120)?; | |
prec120 | |
: prec100 ((':' | ('`' (NAME ((END NAME)*)|VARIABLE) '`')) s prec100)*; | |
prec100 | |
: ('^' s)? prec99; | |
prec99 | |
: prec90 ('^' s prec99)?; | |
prec90 | |
: prec40 ('@' s prec40)?; | |
prec40 | |
: (('!' | '!.' | '!:') s)? prec10; | |
prec10 | |
: prec0 ((END ~(WS | SL_COMMENT | ML_COMMENT | EOF))=>'.' prec0)*; | |
prec0 | |
: (NAME | FLOAT | STRING | parenthezedTerm | listTerm | tupleTerm | VARIABLE) (OPEN_CT s commaSeparatedFunctors CLOSE)? s; | |
parenthezedTerm: OPEN_CT term CLOSE; | |
//lexer grammar mercury; | |
//http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Tokens.html#Tokens | |
fragment DIGIT: '0'..'9'; | |
////TODO handle ints, 0b, 0o, 0x, 0' | |
FLOAT : (DIGIT+ ('.' DIGIT+)? |('.' DIGIT+)) ('E'|'e'('+'|'-')?DIGIT+)?; | |
LINE: | |
'#' /*INT*/DIGIT+ '\n' | |
; | |
STRING: | |
('"' ( '\\' ('a'|'b'|'t'|'n'|'f'|'r'|'u'|'"'|/*'\''|*/'\\') | ~('"' | '\\') )* '"')+ | |
; | |
NAME: | |
(('a'..'z') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*)/* | | |
('\'' ( '\\' ('a'|'b'|'t'|'n'|'f'|'r'|'u'|'"'|'''|'\\') | ~('\'' | '\\') )* '\'')+*/ | |
; | |
VARIABLE: | |
('A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* | |
; | |
IMPLEMENTATION_DEFINED_LITERAL: | |
'$' (('a'..'z') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*) | |
; | |
OPEN_CT: | |
'(' | |
; | |
CLOSE: ')'; | |
OPEN_LIST:'['; | |
CLOSE_LIST | |
: ']'; | |
OPEN_CURLY:'{'; | |
CLOSE_CURLY:'}'; | |
HEAD_TAIL_SEPARATOR:'|'; | |
COMMA:','; | |
END:'.'; | |
WS : (' '|'\t'|'\r'|'\n')+; | |
SL_COMMENT | |
: ('%' (~ '\n')*); | |
ML_COMMENT | |
: '/*' (options {greedy=false;} : .)* '*/' /*{$channel=HIDDEN;}*/; | |
TERM_END | |
: '.' (WS | SL_COMMENT | ML_COMMENT | EOF); |
And here is the email:
Hello,
Still a newbie... I was trying to implement an LL(*) parser to the
Mercury language, based on this
http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Terms.html#Terms
reference, although I have some problems, could you help me understand?
(Maybe I was looking at the wrong page.)
- The comments are not defined anywhere, (I guess it was handled by
referring to ISO Prolog), could you confirm, that those are the % single
line, and non-nesting /*, */ comments (and there is no more)? - Do the comments act as a whitespace? (I guess yes, but as comments
not mentioned anywhere, this might be just an implementation choice.) - How could be the built-in , be distinguished from the argument's
comma? (I tried to define the arguments as simple terms, but the
precedence rules make it not working for the basic hello world program.
See: https://gist.github.com/3025801 you can open it with AntlrWorks for
syntax highlight, and navigation. When I tried to defined as simple left
associative terms separated by comma, there were ambiguity errors
because of the other alternative.) - Are the unary +, - operators have to correct specifier? I thought it
would be fy instead of fx. - The 950 precedence fxy parts seems to be not separated, maybe there
is a mandatory whitespace there? (With fxy there is an ambiguity error
there in my attempt.) - I think the . has also double meaning, but maybe I do misunderstand
something:io.write_string("Hello, ", !IO), io.write_string("World!", !IO), io.nl(!IO). %this dot seems to have different role from the ones following io.
Thanks, gabor
PS.: I have a simpler grammar with builder for eclipse working more or
less, but without a proper grammar the error reports are really awful,
practically useless.
The https://gist.github.com/3025801/d6ca0081f11807618cc1128e40af39aa2234ad3a version seems to be working (except the precedent rule 950, and the comma in precedent rule 1000) for the simplest Hello World (without the syntactic sugar of !, and still requires backtracking because of the . handling), but it fails for the imput like:
main(!IO) :-
io.write_string("Hello, ", !IO),
io.write_string("World!", !IO),
io.nl(!IO).
The https://gist.github.com/3025801/2b48f39914721191f0f3cad9ad2c2520a62bfbf5 version uses an alternative path to handle the different , usages, better . handling. Although the precedence rule 950 still not working well.
The version https://gist.github.com/3025801/3c633639c7acfdaeb655a577d669213263d1c9a7 shows my attempt with backtracking, although that is not supported properly in Xtext, and it might be really slow, so I would prefer not using backtracking. The previous version (https://gist.github.com/3025801/0a74fff25f39f9d9895022c3f8330e27b1eea790) is without backtracking.