Created
June 30, 2012 22:24
-
-
Save aborg0/3025801 to your computer and use it in GitHub Desktop.
Attempt to define Mercury grammar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar mercury_alt; | |
//options { | |
// backtrack = true; | |
// memoize=true; | |
//} | |
model: | |
s item*; | |
//http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Terms.html#Terms | |
item: term TERM_END s; | |
term | |
: (altFunctor); | |
listTerm | |
: OPEN_LIST s commaSeparatedFunctors CLOSE_LIST; | |
tupleTerm | |
: OPEN_CURLY s commaSeparatedFunctors CLOSE_CURLY; | |
s : (SL_COMMENT | ML_COMMENT | WS | LINE)*; | |
commaSeparatedFunctors | |
: (functor (COMMA s functor)*)?; | |
altFunctor | |
: ('?-' s| ':-' s)? altPrec1200; | |
altPrec1200 | |
: altPrec1199 ((':-' s| '-->' s) altPrec1199)? | |
; | |
altPrec1199 | |
: (('end_module'// fx 1199 | |
|'import_module'// fx 1199 | |
|'include_module'// fx 1199 | |
|'initialise'// fx 1199 | |
|'initialize'// fx 1199 | |
|'finalise'// fx 1199 | |
|'finalize'// fx 1199 | |
|'inst'// fx 1199 | |
|'instance'// fx 1199 | |
|'mode'// fx 1199 | |
|'module'// fx 1199 | |
|'pragma'// fx 1199 | |
|'promise'// fx 1199 | |
|'rule'// fx 1199 | |
|'typeclass'// fx 1199 | |
|'use_module'// fx 1199 | |
) s)? altPrec1181; | |
altPrec1181 | |
: ('solver' s altPrec1181) | altPrec1180; | |
altPrec1180 | |
: ('type' s altPrec1180) | altPrec1179; | |
altPrec1179 | |
: altPrec1175 ('--->' s altPrec1179)?; | |
altPrec1175 | |
: altPrec1170 | |
(('::' |'==>' | 'where') s altPrec1170)?; | |
altPrec1170 | |
: altPrec1160 ('else' altPrec1170)? ; | |
altPrec1160 | |
: ('if' s)? altPrec1150; | |
altPrec1150 | |
: altPrec1100 ('then' s altPrec1100)?; | |
altPrec1100 | |
: altPrec1050 (';' s altPrec1100)?; | |
altPrec1050 | |
: altPrec1025 ('->' s altPrec1050)?; | |
altPrec1025 | |
: altPrec1000 ('&' s altPrec1025)?; | |
altPrec1000 | |
: prec950 (COMMA s altPrec1000)?; | |
functor | |
: ('?-' s| ':-' s)? prec1200; | |
prec1200 | |
: prec1199 ((':-' s| '-->' s) prec1199)? | |
; | |
prec1199 | |
: (('end_module'// fx 1199 | |
|'import_module'// fx 1199 | |
|'include_module'// fx 1199 | |
|'initialise'// fx 1199 | |
|'initialize'// fx 1199 | |
|'finalise'// fx 1199 | |
|'finalize'// fx 1199 | |
|'inst'// fx 1199 | |
|'instance'// fx 1199 | |
|'mode'// fx 1199 | |
|'module'// fx 1199 | |
|'pragma'// fx 1199 | |
|'promise'// fx 1199 | |
|'rule'// fx 1199 | |
|'typeclass'// fx 1199 | |
|'use_module'// fx 1199 | |
) s)? prec1181; | |
prec1181 | |
: ('solver' s prec1181) | prec1180; | |
prec1180 | |
: ('type' s prec1180) | prec1179; | |
prec1179 | |
: prec1175 ('--->' s prec1179)?; | |
prec1175 | |
: prec1170 | |
(('::' |'==>' | 'where') s prec1170)?; | |
prec1170 | |
: prec1160 ('else' prec1170)? ; | |
prec1160 | |
: ('if' s)? prec1150; | |
prec1150 | |
: prec1100 ('then' s prec1100)?; | |
prec1100 | |
: prec1050 (';' s prec1100)?; | |
prec1050 | |
: prec1025 ('->' s prec1050)?; | |
prec1025 | |
: prec1000 ('&' s prec1025)?; | |
prec1000 | |
: prec950 /*(',' s prec1000)?*/; | |
prec950 | |
://TODO, how to make it work? It fails because the ambiguite between the prefix + and the infix + in these positions. | |
// ((('all' | 'arbitrary' | 'promise_equivalent_solutions' | 'promise_equivalent_solution_sets' | 'require_complete_switch' | 'some' ) s) prec920 s prec950) | | |
((('promise_impure' | 'promise_pure' | 'promise_semipure' | 'require_det' | 'require_semidet' | 'require_multi' | 'require_cc_multi' | 'require_cc_nondet' | 'require_erroneous' | 'require_failure') s) prec920) | | |
((('promise_exclusive' | 'promise_exclusive_exhaustive' | 'promise_exhaustive') s) prec950) | | |
prec920; | |
prec920 | |
: prec900 (('<=' | '<=>' | '=>') s prec920)?; | |
prec900 | |
: ((('\\\\+' | 'not' | '~') s) prec900) | | |
(prec800 ('when' s prec800)?); | |
prec800 | |
: ((('impure' | 'semipure') s) prec800) | | |
prec740 | | |
(('func' | 'pred') s prec740); | |
prec740 | |
: prec720 ('or' s prec740)?; | |
prec720 | |
: prec701 ('and' s prec720)?; | |
prec701 | |
: prec700 ('is' s prec700)?; | |
prec700 | |
: prec650 (('<' | '=' | '=..' | '=:=' | '=<' | '==' | '=\\\\=' | '>' | '>=' | '@<' | '@=<' | '@>' | '@>=' | '\\\\=' | '\\\\==' | '~=') s prec650)?; | |
prec650 | |
: prec550 ((':=' | '=^') s prec550)? | |
; | |
prec550 | |
: prec500 ('..' s prec500)?; | |
prec500 | |
: ('+' s prec400 /*why not fy? */) | | |
// (prec400 (('+' s prec400)* | ('++' s prec400)? | ('-' s prec400)* | ('--' s prec400)* | ('/\\\\' s prec400)* | ('\\\\/' s prec400)*)); | |
(prec400 (('+' s prec400)+ | ('++' s prec400) | ('-' s prec400)+ | ('--' s prec400)+ | ('/\\\\' s prec400)+ | ('\\\\/' s prec400)+)?); | |
prec400 | |
// : prec200 (('*' s prec200)* | ('/' s prec200)* | ('//' s prec200)* | ('<<' s prec200)* | ('>>' s prec200)* | ('div' s prec200)* | ('mod' s prec200)? | ('rem' s prec200)?); | |
: prec200 (('*' s prec200)+ | ('/' s prec200)+ | ('//' s prec200)+ | ('<<' s prec200)+ | ('>>' s prec200)+ | ('div' s prec200)+ | ('mod' s prec200) | ('rem' s prec200))?; | |
prec200 | |
: (('-' | '\\\\') s)? prec120 ('**' prec120)?; | |
prec120 | |
: prec100 ((':' | ('`' (NAME ((END NAME)*)|VARIABLE) '`')) s prec100)*; | |
prec100 | |
: ('^' s)? prec99; | |
prec99 | |
: prec90 ('^' s prec99)?; | |
prec90 | |
: prec40 ('@' s prec40)?; | |
prec40 | |
: (('!' | '!.' | '!:') s)? prec10; | |
prec10 | |
: prec0 ((END ~(WS | SL_COMMENT | ML_COMMENT | EOF))=>'.' prec0)*; | |
prec0 | |
: (NAME | FLOAT | STRING | parenthezedTerm | listTerm | tupleTerm | VARIABLE) (OPEN_CT s commaSeparatedFunctors CLOSE)? s; | |
parenthezedTerm: OPEN_CT term CLOSE; | |
//lexer grammar mercury; | |
//http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Tokens.html#Tokens | |
fragment DIGIT: '0'..'9'; | |
////TODO handle ints, 0b, 0o, 0x, 0' | |
FLOAT : (DIGIT+ ('.' DIGIT+)? |('.' DIGIT+)) ('E'|'e'('+'|'-')?DIGIT+)?; | |
LINE: | |
'#' /*INT*/DIGIT+ '\n' | |
; | |
STRING: | |
('"' ( '\\' ('a'|'b'|'t'|'n'|'f'|'r'|'u'|'"'|/*'\''|*/'\\') | ~('"' | '\\') )* '"')+ | |
; | |
NAME: | |
(('a'..'z') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*)/* | | |
('\'' ( '\\' ('a'|'b'|'t'|'n'|'f'|'r'|'u'|'"'|'''|'\\') | ~('\'' | '\\') )* '\'')+*/ | |
; | |
VARIABLE: | |
('A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* | |
; | |
IMPLEMENTATION_DEFINED_LITERAL: | |
'$' (('a'..'z') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*) | |
; | |
OPEN_CT: | |
'(' | |
; | |
CLOSE: ')'; | |
OPEN_LIST:'['; | |
CLOSE_LIST | |
: ']'; | |
OPEN_CURLY:'{'; | |
CLOSE_CURLY:'}'; | |
HEAD_TAIL_SEPARATOR:'|'; | |
COMMA:','; | |
END:'.'; | |
WS : (' '|'\t'|'\r'|'\n')+; | |
SL_COMMENT | |
: ('%' (~ '\n')*); | |
ML_COMMENT | |
: '/*' (options {greedy=false;} : .)* '*/' /*{$channel=HIDDEN;}*/; | |
TERM_END | |
: '.' (WS | SL_COMMENT | ML_COMMENT | EOF); |
The https://gist.github.com/3025801/d6ca0081f11807618cc1128e40af39aa2234ad3a version seems to be working (except the precedent rule 950, and the comma in precedent rule 1000) for the simplest Hello World (without the syntactic sugar of !, and still requires backtracking because of the . handling), but it fails for the imput like:
main(!IO) :-
io.write_string("Hello, ", !IO),
io.write_string("World!", !IO),
io.nl(!IO).
The https://gist.github.com/3025801/2b48f39914721191f0f3cad9ad2c2520a62bfbf5 version uses an alternative path to handle the different , usages, better . handling. Although the precedence rule 950 still not working well.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
And here is the email:
Hello,
Still a newbie... I was trying to implement an LL(*) parser to the
Mercury language, based on this
http://www.mercury.csse.unimelb.edu.au/information/doc-release/mercury_ref/Terms.html#Terms
reference, although I have some problems, could you help me understand?
(Maybe I was looking at the wrong page.)
referring to ISO Prolog), could you confirm, that those are the % single
line, and non-nesting /*, */ comments (and there is no more)?
not mentioned anywhere, this might be just an implementation choice.)
comma? (I tried to define the arguments as simple terms, but the
precedence rules make it not working for the basic hello world program.
See: https://gist.github.com/3025801 you can open it with AntlrWorks for
syntax highlight, and navigation. When I tried to defined as simple left
associative terms separated by comma, there were ambiguity errors
because of the other alternative.)
would be fy instead of fx.
is a mandatory whitespace there? (With fxy there is an ambiguity error
there in my attempt.)
something: