Created
November 17, 2023 15:28
-
-
Save nobodywasishere/b18d196ed06e91d8afb72e41d1c8fdeb to your computer and use it in GitHub Desktop.
Work in progress EBNF language grammar for Crystal
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
document = expressions ; | |
one_space = ' ' ; | |
opt_space = { one_space } ; | |
all_space = one_space, { one_space } ; | |
one_newline = '\n' | '\r\n' | '\r' ; | |
opt_newline = { one_newline } ; | |
all_newline = one_newline, { one_newline } ; | |
one_space_or_newline = one_space | one_newline ; | |
opt_space_or_newline = { one_space_or_newline } ; | |
all_space_or_newline = one_space_or_newline, { one_space_or_newline } ; | |
one_semicolon_or_newline = ';' | one_newline ; | |
opt_semicolon_or_newline = { one_semicolon_or_newline } ; | |
all_semicolon_or_newline = one_semicolon_or_newline, { one_semicolon_or_newline } ; | |
statement_end = opt_space, one_semicolon_or_newline ; | |
expressions = { multi_assign, statement_end } ; | |
multi_assign = expression ; | |
(* todo: limit to assign or call *) | |
(* | | |
( | |
{ multi_assign_part, ',' }, opt_space_or_newline, | |
[ '*', multi_assign_part, ',' ], opt_space_or_newline, | |
{ multi_assign_part, ',' }, opt_space_or_newline, | |
[ multi_assign_part ] | |
) ; *) | |
multi_assign_part = opt_space, expression, opt_space ; | |
expression = assign, opt_space, [ expression_suffix ] ; | |
(* todo *) | |
expression_suffix = expression_suffix_if | expression_suffix_unless | expression_suffix_rescue | expression_suffix_ensure ; | |
expression_suffix_if = "if", all_space, assign_no_control ; | |
expression_suffix_unless = "unless", all_space, assign_no_control ; | |
expression_suffix_rescue = "rescue", all_space, assign_no_control ; | |
expression_suffix_ensure = "ensure", all_space, assign_no_control ; | |
(* todo *) | |
assign = question_colon, | |
[ | |
opt_space, | |
( assignment_operator, all_space_or_newline, assign_no_control ) | | |
( '=', all_space_or_newline, bare_proc_type | assign_no_control ) (* todo: differentiate '[]=' operator *) | |
] ; | |
assignment_operator = | |
'+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '^=' | | |
'**=' | '<<=' | '>>=' | '||=' | '&&=' | '&+=' | '&-=' | '&*=' ; | |
question_colon = range, | |
{ | |
all_space, | |
"?", all_space_or_newline, question_colon, all_space_or_newline, | |
":", all_space_or_newline, question_colon | |
} ; | |
(* todo: the precedence of this may be wrong, as the parser checks manually for end tokens *) | |
range = [ range_operator ], infix_or, { range_operator, infix_or } ; | |
range_operator = '..' | '...' ; | |
infix_or = infix_and, [ opt_space, "||", opt_space_or_newline, infix_and ] ; | |
infix_and = infix_equality, [ opt_space, "&&", opt_space_or_newline, infix_equality ] ; | |
infix_equality = infix_cmp, [ opt_space, ( "<" | "<=" | ">" | ">=" | "<=>" ), opt_space_or_newline, infix_cmp ] ; | |
infix_cmp = logical_or, [ opt_space, ( "==" | "!=" | "=~" | "!~" | "===" ), opt_space_or_newline, logical_or ] ; | |
logical_or = logical_and, [ opt_space, ( "|", "^" ), opt_space_or_newline, logical_and ] ; | |
logical_and = shift, [ opt_space, "&", opt_space_or_newline, shift ] ; | |
shift = add_or_sub, [ opt_space, ( "<<" | ">>" ), opt_space_or_newline, add_or_sub ] ; | |
(* todo: some number stuff *) | |
add_or_sub = mul_or_div, [ opt_space, ( "+", "-", "&+", "&-" ), opt_space_or_newline, mul_or_div ] ; | |
mul_or_div = power, [ opt_space, ( "*", "/", "//", "%", "&*" ), opt_space_or_newline, power ] ; | |
power = prefix, [ opt_space, ( "**", "&**" ), opt_space_or_newline, prefix ] ; | |
prefix = ( ( "!" | "+" | "-" | "~" | "&+" | "&-" ), opt_space_or_newline, prefix ) | atomic_with_method ; | |
(* todo: newline chaining can't happen for class/module/enum/fun/def *) | |
atomic_with_method = atomic, [ opt_space, atomic_method_suffix ] ; | |
(* todo: stuff *) | |
atomic_method_suffix = opt_space_or_newline, ( is_a | as | as_question | responds_to | nil_question ) ; | |
single_arg = [ '*', opt_space, assign_no_control ] ; | |
is_a = '.is_a?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
as = '.as', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
as_question = '.as?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
responds_to = '.responds_to?', all_space, symbol | ( '(', opt_space_or_newline, symbol, opt_space_or_newline, ')' ) ; | |
nil_question = '.nil?', [ '(', opt_space_or_newline, ')' ] ; | |
union_type = atomic_type_with_suffix, opt_space, { '|', opt_space_or_newline, atomic_type_with_suffix } ; | |
atomic_type_with_suffix = atomic_type, type_suffix ; | |
type_suffix = | |
{ ( | |
'?' | '*' | '**' | | |
( '.', opt_space_or_newline, 'class' ) | | |
( '[', opt_space_or_newline, type_arg, opt_space_or_newline, ']' ) | |
), opt_space } ; | |
type_arg = number | sizeof | instance_sizeof | offsetof | union_type ; | |
atomic = parenthesized_expression | empty_array_literal | array_literal | hash_or_tuple_literal | percent_macro_expression | percent_macro_control | generic_or_global_call | fun_literal | annotation | number | char | delimiter | string_array | symbol_array | symbol | dollar_tilde | dollar_question | global_match_data_index | magic | identifier | constant | instance_var | class_var | underscore ; | |
parenthesized_expression = "(", opt_space_or_newline, { expression, statement_end }, [ expression ], ")" ; | |
empty_array_literal = "[]", opt_space, "of", opt_space_or_newline, bare_proc_type ; | |
array_literal = | |
"[", opt_space_or_newline, | |
{ [ "*" ], opt_space_or_newline, assign_no_control, opt_space, ",", opt_space_or_newline }, | |
[ [ "*" ], opt_space_or_newline, assign_no_control, opt_space_or_newline ], | |
"]" ; | |
hash_or_tuple_literal = "{", opt_space_or_newline, [ named_tuple (* todo: stuff *) ], "}" ; | |
fun_literal = | |
'fun', all_space_or_newline, | |
[ "(", opt_space_or_newline, | |
{ fun_literal_param, ',', opt_space_or_newline }, | |
fun_literal_param, opt_space_or_newline, | |
")" ], opt_space_or_newline, | |
[ ':', all_space_or_newline, bare_proc_type ], opt_space_or_newline, | |
[ do_end_block | curly_block ] ; | |
fun_literal_param = ident, opt_newline, [ all_space_or_newline, ":", all_space_or_newline, bare_proc_type ] ; | |
do_end_block = 'do', statement_end, expressions, opt_space_or_newline, 'end' ; | |
curly_block = '{', opt_space_or_newline, expressions, opt_space_or_newline, '}' ; | |
assign_no_control = assign ; | |
magic = '__LINE__' | '__FILE__' | '__DIR__' ; | |
underscore = '_' ; | |
identifier = keyword | var_or_call ; | |
keyword = begin | nil | true | false | yield | yield_with_scope | abstract | def | macro | require | | |
case | select | if | unless | include | extend | class | struct | module | enum | | |
while | until | return | next | break | lib | fun_def | alias | pointerof | sizeof | | |
instance_sizeof | offsetof | typeof | private | protected | asm | annotation ; | |
(* todo: handle blocks correctly, parse type declarations *) | |
var_or_call = ( "!" | is_a | as | as_question | responds_to | nil_question | "super" | "initialize" | "previous_def" | call_args ) ; | |
(* keywords not allowed in method definitions *) | |
keyword_def_disallowed = abstract | def | macro | require | include | extend | class | | |
struct | module | enum | lib | fun_def | alias | annotation ; | |
constant = generic_or_custom_literal ; | |
atomic_type = "self" | "self?" | typeof | underscore | constant | named_type_args | union_types | proc_type_output (* todo: .op_lparen?? *) ; | |
(* void_expression_keyword = ( break | next | return ), space, ":", space ; *) | |
(* todo: next char not '=' *) | |
symbol = ":", ( | |
"+" | "-" | "*" | "**" | "/" | "//" | | |
"==" | "===" | "=~" | "!=" | "!~" | "!" | | |
"<" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | | |
"&" | "&+" | "&-" | "&*" | "&**" | | |
"|" | "^" | "~" | "%" | "[]=" | "[]?" | "[]" | | |
string | | |
( ident_start, { ident_part }, [ '?' | '!' | '=' ] ) | |
) ; | |
ident = ident_start, { ident_part } ; | |
ident_start = ascii_letter | '_' | ord_0x9F ; | |
ident_part = ident_start | ascii_digit ; | |
global = "$", ident_start, { ident_part } ; | |
ascii_letter = ascii_uppercase | | |
"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | | |
"n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" ; | |
ascii_uppercase = | |
"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | | |
"N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ; | |
ascii_digit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0" ; | |
ord_0x9F = ? all characterrs yield_with_scope ord > 0x9F ? ; (* todo *) | |
binary_digit = "0" | "1" ; | |
octal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; | |
hex_digit = ascii_digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" ; | |
(* todo: don't allow consecutive underscores or underscore end *) | |
number = | |
( | |
( "0b", binary_digit, { binary_digit | '_' } ) | | |
( "0o", octal_digit, { octal_digit | '_' } ) | | |
( "0x", hex_digit, { hex_digit | '_' }, { number_suffix } ) | | |
( | |
ascii_digit, { ascii_digit | '_' }, | |
{ '.', { ascii_digit } }, | |
[ number_exponent ], | |
{ number_suffix } | |
) | |
) ; | |
number_exponent = "e", ascii_digit, { ascii_digit } ; | |
number_suffix = ( ( 'i' | 'u' ), ( '8' | '16' | '32' | '64' | '128' ) ) | ( 'f', '32' | '64' ) ; | |
begin = "begin", statement_end, expressions, [ rescue, { rescue }, [ else ] ], [ ensure ], 'end' ; | |
rescue = "rescue", all_space, [ identifier, [ all_space, ':', all_space, constant ] ], one_semicolon_or_newline, expressions, statement_end ; | |
else = "else", statement_end, expressions ; | |
ensure = "ensure", statement_end, expressions ; | |
if = | |
"if", all_space_or_newline, assign_no_control, statement_end, | |
expressions, | |
{ elsif, statement_end }, | |
[ else, statement_end ], | |
'end'; | |
elsif = | |
"elsif", all_space_or_newline, | |
assign_no_control ; | |
unless = | |
"unless", all_space_or_newline, assign_no_control, statement_end, | |
expressions, | |
[ else, statement_end ], | |
'end'; | |
while = "while", all_space_or_newline, while_body, statement_end ; | |
until = "until", all_space_or_newline, while_body, statement_end ; | |
while_body = assign_no_control, statement_end, expressions ; | |
nil = 'nil' ; | |
true = 'true' ; | |
false = 'false' ; | |
yield = call_args ; | |
abstract = "abstract", all_space_or_newline, ( def | class | struct ) ; | |
def = "def", opt_space_or_newline, def_or_macro_name, opt_space, [ ".", def_or_macro_name ], opt_space, | |
[ "(", opt_space_or_newline, { param, ',' }, [ param ], ")" ], | |
[ ":" ] ; | |
def_or_macro_name = def_symbols | path | ( ident - pseudo_methods, [ '=' ] ) ; | |
def_symbols = '`' | '<<' | '<' | '<=' | '==' | '===' | '!=' | '=~' | '!~' | | |
'>>' | '>' | '>=' | '+' | '-' | '*' | '/' | '//' | '!' | '~' | '%' | '&' | '|' | '^' | '**' | | |
'[]' | '[]?' | '[]=' | '<=>' | '&+' | '&-' | '&*' | '&*' ; | |
pseudo_methods = 'is_a?' | 'as' | 'as?' | 'responds_to?' | 'nil?' ; | |
def_op_name_disallowed = '!' ; | |
module = "module", all_space_or_newline, path, all_space, [ type_vars ], statement_end, expressions, 'end' ; | |
class = "class", class_body ; | |
struct = "struct", class_body ; | |
class_body = all_space_or_newline, path, opt_space, [ type_vars ], [ "<", all_space_or_newline, ( "self" | generic ) ], statement_end, expressions, 'end' ; | |
annotation = "@[", opt_space, path, opt_space, [ "(", opt_space_or_newline, ( named_args | call_args ), ")" ], "]" ; | |
enum = "enum", all_space_or_newline, path, opt_space, [ bare_proc_type ], statement_end, enum_body, 'end' ; | |
enum_body = | |
{ | |
( const, opt_space, [ '=', opt_space_or_newline, logical_or ], statement_end ), | |
( [ 'private' | 'protected' ], ( def, macro ) ) | | |
( class_var, opt_space, '=', opt_space_or_newline, assign ) | | |
percent_macro_expression | | |
percent_macro_control | | |
annotation | | |
statement_end | |
} ; | |
type_args = | |
"(", | |
opt_space_or_newline, | |
{ | |
named_type_args | | |
( | |
[ type_arg, opt_space, ",", opt_space_or_newline ], | |
type_arg, opt_space_or_newline | |
) | |
}, | |
")" ; | |
named_type_args = | |
{ ( ident | const | string ), ':', all_space, bare_proc_type, opt_space, ',' }, | |
( ident | const | string ), ':', all_space, bare_proc_type; | |
(* macro = "macro", space_or_newline, def_name, *) | |
path = "::", opt_space_or_newline, const, { "::", const } ; | |
const = ascii_uppercase, { ident_part } ; | |
require = "require", all_space, string ; | |
case = "case", opt_space_or_newline, { ';', opt_space }, [ all_space, assign_no_control, statement_end ], | |
{ ( when | in ), statement_end }, [ else, statement_end ], 'end' ; | |
select = "select", all_space, statement_end, { when, statement_end }, [ else, statement_end ], 'end' ; | |
include = "include", all_space, include_body ; | |
extend = "extend", all_space, include_body ; | |
include_body = opt_space_or_newline, [ 'self', generic ]; | |
alias = "alias", all_space_or_newline, path, opt_space, "=", opt_space_or_newline, bare_proc_type ; | |
generic = [ "::" ], path, [ type_args ] ; | |
instance_var = "@", ident; | |
class_var = "@@", ident; | |
return = "return", control_expression ; | |
next = "next", control_expression ; | |
break = "break", control_expression ; | |
control_expression = [ call_args ]; | |
lib = "lib", all_space_or_newline, path, statement_end, lib_body, 'end' ; | |
lib_body = | |
{ | |
annotation | | |
alias | | |
fun_def | | |
type_def | | |
c_struct | | |
c_union | | |
enum | | |
( const, opt_space_or_newline, '=', expression, statement_end ) | | |
( global, [ '=', opt_space, ( ident | const ) ], all_space, ':', all_space, bare_proc_type, statement_end ) | | |
percent_macro_expression | | |
percent_macro_control | |
}; | |
char = "'", ( ascii_letter | ascii_digit | " " ), "'" ; (* todo: improve *) | |
dollar_tilde = "$~" ; | |
dollar_question = "$?" ; | |
private = "private", all_space, assign ; | |
protected = "protected", all_space, assign ; | |
(* todo: asm operands, can't find any examples *) | |
asm = "asm", [ all_space ], | |
"(", | |
[ opt_space_or_newline ], string, [ opt_space_or_newline ], | |
")" ; | |
pointerof = "pointerof", opt_space, "(", opt_space_or_newline, assign, opt_space_or_newline, ")" ; | |
sizeof = "sizeof", opt_space, "(", opt_space_or_newline, bare_proc_type, opt_space_or_newline, ")" ; | |
instance_sizeof = sizeof ; | |
offsetof = "offsetof", opt_space, "(", opt_space_or_newline, | |
bare_proc_type, opt_space, ',', opt_space_or_newline, | |
( number | instance_var ), opt_space_or_newline, ")" ; | |
string = '"', ? characters, escaped if necessary ?, '"' ; | |
heredoc = ( "<<-" | "<<~" ), const, ? too complex for EBNF ? ; | |
yield_with_scope = "? TODO ?" ; | |
macro = "? TODO ?" ; | |
when = "? TODO ?" ; | |
in = "? TODO ?" ; | |
percent_macro_expression = "? TODO ?" ; | |
percent_macro_control = "? TODO ?" ; | |
fun_def = "? TODO ?" ; | |
type_def = "? TODO ?" ; | |
c_struct = "? TODO ?" ; | |
c_union = "? TODO ?" ; | |
call_args = "? TODO ?" ; | |
type_vars = "? TODO ?" ; | |
named_args = "? TODO ?" ; | |
generic_or_custom_literal = "? TODO ?" ; | |
typeof = "? TODO ?" ; | |
bare_proc_type = "? TODO ?" ; | |
named_tuple = "? TODO ?" ; | |
union_types = "? TODO ?" ; | |
proc_type_output = "? TODO ?" ; | |
generic_or_global_call = "? TODO ?" ; | |
delimiter = "? TODO ?" ; | |
string_array = "? TODO ?" ; | |
symbol_array = "? TODO ?" ; | |
global_match_data_index = "? TODO ?" ; | |
param = "? TODO ?" ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment