Skip to content

Instantly share code, notes, and snippets.

@kaby76
Created June 29, 2021 13:20
Show Gist options
  • Save kaby76/8a96f5b79348b5006d020fd9f8255d72 to your computer and use it in GitHub Desktop.
Save kaby76/8a96f5b79348b5006d020fd9f8255d72 to your computer and use it in GitHub Desktop.
Conversion of lark.lark to lark.g4 Antlr4 syntax
// https://github.com/lark-parser/lark/blob/d676df9b888ead42daffd31c035d95241bff0920/lark/grammars/lark.lark
// LarkParser.g4
parser grammar LarkParser;
options {
tokenVocab = LarkLexer;
contextSuperClass=AttributedParseTreeNode;
}
start: (item? NL)* item? EOF ;
item: rule_ | token | statement ;
rule_: RULE rule_params priority? ':' expansions ;
token: TOKEN token_params priority? ':' expansions ;
rule_params: ('{' RULE (',' RULE)* '}')? ;
token_params: ('{' TOKEN (',' TOKEN)* '}')? ;
priority: '.' NUMBER ;
statement: '%ignore' expansions
| '%import' import_path ('->' name)?
| '%import' import_path name_list
| '%override' rule_
| '%declare' name+
;
import_path: '.'? name ('.' name)* ;
name_list: '(' name (',' name)* ')' ;
expansions: alias (VBAR alias)* ;
alias: expansion ('->' RULE)? ;
expansion: expr* ;
expr: atom (OP | '~' NUMBER ('..' NUMBER)? )? ;
atom: '(' expansions ')' | '[' expansions ']' | value ;
value: STRING '..' STRING
| name
| (REGEXP | STRING)
| name '{' value (',' value)* '}'
;
name: RULE | TOKEN ;
// LarkLexer.g4
lexer grammar LarkLexer;
channels { OFF_CHANNEL }
COLON: ':' ;
LC : '{' ;
RC : '}' ;
LP : '(' ;
RP : ')' ;
LB : '[' ;
RB : ']' ;
COMMA : ',' ;
DOT : '.' ;
ARROW : '->' ;
IGNORE : '%ignore' ;
IMPORT : '%import' ;
OVERRIDE : '%override' ;
DECLARE : '%declare' ;
DD : '..' ;
SQ : '~' ;
VBAR: NL? '|' ;
OP: [+*] | '?' ;
RULE: '!'? [_?]? [a-z] [_a-z0-9]* ;
TOKEN: '_'? [A-Z] [_A-Z0-9]* ;
STRING: FSTRING 'i'? ;
REGEXP: '/' ('\\' '/' | '\\' '\\' | ~'/' )*? '/' [imslux]* ;
NL: ('\r'? '\n')+ Space* ;
//
// Strings
//
fragment STRING_INNER: (~'"')*? ;
fragment STRING_ESC_INNER: STRING_INNER ;
fragment FSTRING : '"' STRING_ESC_INNER '"' ;
//
// Numbers
//
fragment DIGIT: '0' .. '9' ;
fragment HEXDIGIT: 'a' .. 'f' | 'A' .. 'F' | DIGIT ;
fragment INT: DIGIT+ ;
NUMBER: ('+' | '-')? INT ;
//
// Whitespace
//
WS_INLINE: (' ' | '\t')+ -> channel(OFF_CHANNEL) ;
COMMENT: Space* '//' (~'\n')* -> channel(OFF_CHANNEL) ;
fragment Space : (' '| '\t' | '\n' | '\r' | '\f' | 'u2B7F' );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment