heyqbnk/README.md

## README.md

      
    Raw
  

              README.md
            
          
    TypeLanguage ANTLR v4 grammar

Official TL grammar can be found here:
https://gitlab.com/telekram/telekram/-/blob/master/generator/src/commonMain/antlr/TL.g4
It looks like official grammar does not work correctly. You can check it by passing this TL file to mentioned schema:
https://github.com/newton-blockchain/ton/blob/master/tl/generate/scheme/tonlib_api.tl
Some problems occur and created parsed tree is not correct.
Here is TL grammar which I wrote by myself. It is based on TL formal description but has some little modifications to make ANTLR work properly.

  
## TL.g4
grammar TL;

/*
 * This grammar is based on official TypeLanguage formal, but has some
 * modifications to make ANTRL work properly.
 *
 * @link https://core.telegram.org/mtproto/TL-formal
 * @link https://core.telegram.org/mtproto/TL-combinators
 */

/*
 * Lexer settings.
 */
fragment IDENT_CHAR: LETTER | DIGIT | '_';
fragment LETTER: LC_LETTER | UC_LETTER;
fragment LC_LETTER: [a-z];
fragment UC_LETTER: [A-Z];
fragment DIGIT: [0-9];
fragment HEX_DIGIT: [a-f0-9];
fragment NAMESPACE_IDENT: LC_IDENT;

/* Declaration mode switches. */
FUNCTIONS: '---functions---';
TYPES: '---types---';

/* Default identifier tokens. */
LC_IDENT_FULL: LC_IDENT_NS '#' HEX_DIGIT{8};
LC_IDENT_NS: (NAMESPACE_IDENT '.')+ LC_IDENT;
UC_IDENT_NS: (NAMESPACE_IDENT '.')+ UC_IDENT;
LC_IDENT: LC_LETTER IDENT_CHAR*;
UC_IDENT: UC_LETTER IDENT_CHAR*;

/* Misc tokens which are used in TL syntax. */
NAT_CONST: DIGIT+;

UNDERSCORE: '_';
COLON: ':';
SEMICOLON: ';';
OPEN_PAR: '(';
CLOSE_PAR: ')';
OPEN_BRACKET: '[';
CLOSE_BRACKET: ']';
OPEN_BRACE: '{';
CLOSE_BRACE: '}';
EQUALS: '=';
HASH: '#';
QUESTION_MARK: '?';
PERCENT: '%';
PLUS: '+';
LANGLE: '<';
RANGLE: '>';
COMMA: ',';
DOT: '.';
ASTERISK: '*';
EXCL_MARK: '!';
NEW_KW: 'New';
FINAL_KW: 'Final';
EMPTY_KW: 'Empty';

/* Whitespaces. */
WS: [ \r\n\t]+ -> skip;

/* Comments. */
LINE_COMMENT: '//' .*? ('\n' | EOF) -> skip;
MULTILINE_COMMENT: '/*' .*? '*/' -> skip;

/*
 * Parser settings settings.
 */
tl_file: tl_program EOF;

tl_program: constr_declarations (FUNCTIONS fun_declarations | TYPES constr_declarations)*;
constr_declarations: declaration*;
fun_declarations: declaration*;
declaration
    : combinator_decl
    | partial_app_decl
    | final_decl
    | builtin_combinator_decl;

type_expr: expr;
nat_expr: expr;
expr: subexpr*;

subexpr: term | NAT_CONST '+' subexpr | subexpr '+' NAT_CONST;

term
    : term_pars
    | term_type_ident
    | term_var_ident
    | term_nat_const
    | term_percent
    | term_type_ident_generic;
term_pars: '(' expr ')';
term_type_ident: type_ident;
term_var_ident: var_ident;
term_nat_const: NAT_CONST;
term_percent: '%' term;
term_type_ident_generic: type_ident '<' expr (',' expr)* '>';

type_ident: boxed_type_ident | LC_IDENT_NS | LC_IDENT | '#';
boxed_type_ident: UC_IDENT_NS | UC_IDENT;
var_ident: LC_IDENT | UC_IDENT;
type_term: term;
nat_term: term;

combinator_decl: full_combinator_id opt_args* args* '=' result_type ';';
full_combinator_id: LC_IDENT_FULL | LC_IDENT_NS | LC_IDENT | '_';
opt_args: '{' var_ident+ ':' '!'? type_expr '}';
args
    : args_simple
    | args_optional
    | args_pars
    | args_type_term;
args_simple: var_ident_opt ':' conditional_def? '!'? type_term;
args_optional: (var_ident_opt ':')? (multiplicity '*')? '[' args* ']';
args_pars: '(' var_ident_opt+ ':' '!'? type_term ')';
args_type_term: '!'? type_term;

multiplicity: nat_term;
var_ident_opt: var_ident | '_';
conditional_def: var_ident ('.' NAT_CONST)? '?';
result_type
    : result_type_subexpr
    | result_type_generic;
result_type_subexpr: boxed_type_ident subexpr*;
result_type_generic: boxed_type_ident '<' subexpr (',' subexpr)* '>';

builtin_combinator_decl: full_combinator_id '?' '=' boxed_type_ident ';';

partial_app_decl: partial_type_app_decl | partial_comb_app_decl;
partial_type_app_decl
    : boxed_type_ident subexpr+ ';'
    | boxed_type_ident '<' expr (',' expr)* '>';
partial_comb_app_decl: combinator_id subexpr+ ';';
combinator_id: LC_IDENT_NS | LC_IDENT | '_';

final_decl: final_decl_keyword boxed_type_ident ';';
final_decl_keyword: NEW_KW | FINAL_KW | EMPTY_KW;
	grammar TL;

	/*
	* This grammar is based on official TypeLanguage formal, but has some
	* modifications to make ANTRL work properly.
	*
	* @link https://core.telegram.org/mtproto/TL-formal
	* @link https://core.telegram.org/mtproto/TL-combinators
	*/

	/*
	* Lexer settings.
	*/
	fragment IDENT_CHAR: LETTER \| DIGIT \| '_';
	fragment LETTER: LC_LETTER \| UC_LETTER;
	fragment LC_LETTER: [a-z];
	fragment UC_LETTER: [A-Z];
	fragment DIGIT: [0-9];
	fragment HEX_DIGIT: [a-f0-9];
	fragment NAMESPACE_IDENT: LC_IDENT;

	/* Declaration mode switches. */
	FUNCTIONS: '---functions---';
	TYPES: '---types---';

	/* Default identifier tokens. */
	LC_IDENT_FULL: LC_IDENT_NS '#' HEX_DIGIT{8};
	LC_IDENT_NS: (NAMESPACE_IDENT '.')+ LC_IDENT;
	UC_IDENT_NS: (NAMESPACE_IDENT '.')+ UC_IDENT;
	LC_IDENT: LC_LETTER IDENT_CHAR*;
	UC_IDENT: UC_LETTER IDENT_CHAR*;

	/* Misc tokens which are used in TL syntax. */
	NAT_CONST: DIGIT+;

	UNDERSCORE: '_';
	COLON: ':';
	SEMICOLON: ';';
	OPEN_PAR: '(';
	CLOSE_PAR: ')';
	OPEN_BRACKET: '[';
	CLOSE_BRACKET: ']';
	OPEN_BRACE: '{';
	CLOSE_BRACE: '}';
	EQUALS: '=';
	HASH: '#';
	QUESTION_MARK: '?';
	PERCENT: '%';
	PLUS: '+';
	LANGLE: '<';
	RANGLE: '>';
	COMMA: ',';
	DOT: '.';
	ASTERISK: '*';
	EXCL_MARK: '!';
	NEW_KW: 'New';
	FINAL_KW: 'Final';
	EMPTY_KW: 'Empty';

	/* Whitespaces. */
	WS: [ \r\n\t]+ -> skip;

	/* Comments. */
	LINE_COMMENT: '//' .*? ('\n' \| EOF) -> skip;
	MULTILINE_COMMENT: '/' .? '*/' -> skip;

	/*
	* Parser settings settings.
	*/
	tl_file: tl_program EOF;

	tl_program: constr_declarations (FUNCTIONS fun_declarations \| TYPES constr_declarations)*;
	constr_declarations: declaration*;
	fun_declarations: declaration*;
	declaration
	: combinator_decl
	\| partial_app_decl
	\| final_decl
	\| builtin_combinator_decl;

	type_expr: expr;
	nat_expr: expr;
	expr: subexpr*;

	subexpr: term \| NAT_CONST '+' subexpr \| subexpr '+' NAT_CONST;

	term
	: term_pars
	\| term_type_ident
	\| term_var_ident
	\| term_nat_const
	\| term_percent
	\| term_type_ident_generic;
	term_pars: '(' expr ')';
	term_type_ident: type_ident;
	term_var_ident: var_ident;
	term_nat_const: NAT_CONST;
	term_percent: '%' term;
	term_type_ident_generic: type_ident '<' expr (',' expr)* '>';

	type_ident: boxed_type_ident \| LC_IDENT_NS \| LC_IDENT \| '#';
	boxed_type_ident: UC_IDENT_NS \| UC_IDENT;
	var_ident: LC_IDENT \| UC_IDENT;
	type_term: term;
	nat_term: term;

	combinator_decl: full_combinator_id opt_args* args* '=' result_type ';';
	full_combinator_id: LC_IDENT_FULL \| LC_IDENT_NS \| LC_IDENT \| '_';
	opt_args: '{' var_ident+ ':' '!'? type_expr '}';
	args
	: args_simple
	\| args_optional
	\| args_pars
	\| args_type_term;
	args_simple: var_ident_opt ':' conditional_def? '!'? type_term;
	args_optional: (var_ident_opt ':')? (multiplicity '')? '[' args ']';
	args_pars: '(' var_ident_opt+ ':' '!'? type_term ')';
	args_type_term: '!'? type_term;

	multiplicity: nat_term;
	var_ident_opt: var_ident \| '_';
	conditional_def: var_ident ('.' NAT_CONST)? '?';
	result_type
	: result_type_subexpr
	\| result_type_generic;
	result_type_subexpr: boxed_type_ident subexpr*;
	result_type_generic: boxed_type_ident '<' subexpr (',' subexpr)* '>';

	builtin_combinator_decl: full_combinator_id '?' '=' boxed_type_ident ';';

	partial_app_decl: partial_type_app_decl \| partial_comb_app_decl;
	partial_type_app_decl
	: boxed_type_ident subexpr+ ';'
	\| boxed_type_ident '<' expr (',' expr)* '>';
	partial_comb_app_decl: combinator_id subexpr+ ';';
	combinator_id: LC_IDENT_NS \| LC_IDENT \| '_';

	final_decl: final_decl_keyword boxed_type_ident ';';
	final_decl_keyword: NEW_KW \| FINAL_KW \| EMPTY_KW;