Skip to content

Instantly share code, notes, and snippets.

@nobodywasishere
Created November 17, 2023 15:28
Show Gist options
  • Save nobodywasishere/b18d196ed06e91d8afb72e41d1c8fdeb to your computer and use it in GitHub Desktop.
Save nobodywasishere/b18d196ed06e91d8afb72e41d1c8fdeb to your computer and use it in GitHub Desktop.
Work in progress EBNF language grammar for Crystal
document = expressions ;
one_space = ' ' ;
opt_space = { one_space } ;
all_space = one_space, { one_space } ;
one_newline = '\n' | '\r\n' | '\r' ;
opt_newline = { one_newline } ;
all_newline = one_newline, { one_newline } ;
one_space_or_newline = one_space | one_newline ;
opt_space_or_newline = { one_space_or_newline } ;
all_space_or_newline = one_space_or_newline, { one_space_or_newline } ;
one_semicolon_or_newline = ';' | one_newline ;
opt_semicolon_or_newline = { one_semicolon_or_newline } ;
all_semicolon_or_newline = one_semicolon_or_newline, { one_semicolon_or_newline } ;
statement_end = opt_space, one_semicolon_or_newline ;
expressions = { multi_assign, statement_end } ;
multi_assign = expression ;
(* todo: limit to assign or call *)
(* |
(
{ multi_assign_part, ',' }, opt_space_or_newline,
[ '*', multi_assign_part, ',' ], opt_space_or_newline,
{ multi_assign_part, ',' }, opt_space_or_newline,
[ multi_assign_part ]
) ; *)
multi_assign_part = opt_space, expression, opt_space ;
expression = assign, opt_space, [ expression_suffix ] ;
(* todo *)
expression_suffix = expression_suffix_if | expression_suffix_unless | expression_suffix_rescue | expression_suffix_ensure ;
expression_suffix_if = "if", all_space, assign_no_control ;
expression_suffix_unless = "unless", all_space, assign_no_control ;
expression_suffix_rescue = "rescue", all_space, assign_no_control ;
expression_suffix_ensure = "ensure", all_space, assign_no_control ;
(* todo *)
assign = question_colon,
[
opt_space,
( assignment_operator, all_space_or_newline, assign_no_control ) |
( '=', all_space_or_newline, bare_proc_type | assign_no_control ) (* todo: differentiate '[]=' operator *)
] ;
assignment_operator =
'+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '^=' |
'**=' | '<<=' | '>>=' | '||=' | '&&=' | '&+=' | '&-=' | '&*=' ;
question_colon = range,
{
all_space,
"?", all_space_or_newline, question_colon, all_space_or_newline,
":", all_space_or_newline, question_colon
} ;
(* todo: the precedence of this may be wrong, as the parser checks manually for end tokens *)
range = [ range_operator ], infix_or, { range_operator, infix_or } ;
range_operator = '..' | '...' ;
infix_or = infix_and, [ opt_space, "||", opt_space_or_newline, infix_and ] ;
infix_and = infix_equality, [ opt_space, "&&", opt_space_or_newline, infix_equality ] ;
infix_equality = infix_cmp, [ opt_space, ( "<" | "<=" | ">" | ">=" | "<=>" ), opt_space_or_newline, infix_cmp ] ;
infix_cmp = logical_or, [ opt_space, ( "==" | "!=" | "=~" | "!~" | "===" ), opt_space_or_newline, logical_or ] ;
logical_or = logical_and, [ opt_space, ( "|", "^" ), opt_space_or_newline, logical_and ] ;
logical_and = shift, [ opt_space, "&", opt_space_or_newline, shift ] ;
shift = add_or_sub, [ opt_space, ( "<<" | ">>" ), opt_space_or_newline, add_or_sub ] ;
(* todo: some number stuff *)
add_or_sub = mul_or_div, [ opt_space, ( "+", "-", "&+", "&-" ), opt_space_or_newline, mul_or_div ] ;
mul_or_div = power, [ opt_space, ( "*", "/", "//", "%", "&*" ), opt_space_or_newline, power ] ;
power = prefix, [ opt_space, ( "**", "&**" ), opt_space_or_newline, prefix ] ;
prefix = ( ( "!" | "+" | "-" | "~" | "&+" | "&-" ), opt_space_or_newline, prefix ) | atomic_with_method ;
(* todo: newline chaining can't happen for class/module/enum/fun/def *)
atomic_with_method = atomic, [ opt_space, atomic_method_suffix ] ;
(* todo: stuff *)
atomic_method_suffix = opt_space_or_newline, ( is_a | as | as_question | responds_to | nil_question ) ;
single_arg = [ '*', opt_space, assign_no_control ] ;
is_a = '.is_a?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ;
as = '.as', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ;
as_question = '.as?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ;
responds_to = '.responds_to?', all_space, symbol | ( '(', opt_space_or_newline, symbol, opt_space_or_newline, ')' ) ;
nil_question = '.nil?', [ '(', opt_space_or_newline, ')' ] ;
union_type = atomic_type_with_suffix, opt_space, { '|', opt_space_or_newline, atomic_type_with_suffix } ;
atomic_type_with_suffix = atomic_type, type_suffix ;
type_suffix =
{ (
'?' | '*' | '**' |
( '.', opt_space_or_newline, 'class' ) |
( '[', opt_space_or_newline, type_arg, opt_space_or_newline, ']' )
), opt_space } ;
type_arg = number | sizeof | instance_sizeof | offsetof | union_type ;
atomic = parenthesized_expression | empty_array_literal | array_literal | hash_or_tuple_literal | percent_macro_expression | percent_macro_control | generic_or_global_call | fun_literal | annotation | number | char | delimiter | string_array | symbol_array | symbol | dollar_tilde | dollar_question | global_match_data_index | magic | identifier | constant | instance_var | class_var | underscore ;
parenthesized_expression = "(", opt_space_or_newline, { expression, statement_end }, [ expression ], ")" ;
empty_array_literal = "[]", opt_space, "of", opt_space_or_newline, bare_proc_type ;
array_literal =
"[", opt_space_or_newline,
{ [ "*" ], opt_space_or_newline, assign_no_control, opt_space, ",", opt_space_or_newline },
[ [ "*" ], opt_space_or_newline, assign_no_control, opt_space_or_newline ],
"]" ;
hash_or_tuple_literal = "{", opt_space_or_newline, [ named_tuple (* todo: stuff *) ], "}" ;
fun_literal =
'fun', all_space_or_newline,
[ "(", opt_space_or_newline,
{ fun_literal_param, ',', opt_space_or_newline },
fun_literal_param, opt_space_or_newline,
")" ], opt_space_or_newline,
[ ':', all_space_or_newline, bare_proc_type ], opt_space_or_newline,
[ do_end_block | curly_block ] ;
fun_literal_param = ident, opt_newline, [ all_space_or_newline, ":", all_space_or_newline, bare_proc_type ] ;
do_end_block = 'do', statement_end, expressions, opt_space_or_newline, 'end' ;
curly_block = '{', opt_space_or_newline, expressions, opt_space_or_newline, '}' ;
assign_no_control = assign ;
magic = '__LINE__' | '__FILE__' | '__DIR__' ;
underscore = '_' ;
identifier = keyword | var_or_call ;
keyword = begin | nil | true | false | yield | yield_with_scope | abstract | def | macro | require |
case | select | if | unless | include | extend | class | struct | module | enum |
while | until | return | next | break | lib | fun_def | alias | pointerof | sizeof |
instance_sizeof | offsetof | typeof | private | protected | asm | annotation ;
(* todo: handle blocks correctly, parse type declarations *)
var_or_call = ( "!" | is_a | as | as_question | responds_to | nil_question | "super" | "initialize" | "previous_def" | call_args ) ;
(* keywords not allowed in method definitions *)
keyword_def_disallowed = abstract | def | macro | require | include | extend | class |
struct | module | enum | lib | fun_def | alias | annotation ;
constant = generic_or_custom_literal ;
atomic_type = "self" | "self?" | typeof | underscore | constant | named_type_args | union_types | proc_type_output (* todo: .op_lparen?? *) ;
(* void_expression_keyword = ( break | next | return ), space, ":", space ; *)
(* todo: next char not '=' *)
symbol = ":", (
"+" | "-" | "*" | "**" | "/" | "//" |
"==" | "===" | "=~" | "!=" | "!~" | "!" |
"<" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" |
"&" | "&+" | "&-" | "&*" | "&**" |
"|" | "^" | "~" | "%" | "[]=" | "[]?" | "[]" |
string |
( ident_start, { ident_part }, [ '?' | '!' | '=' ] )
) ;
ident = ident_start, { ident_part } ;
ident_start = ascii_letter | '_' | ord_0x9F ;
ident_part = ident_start | ascii_digit ;
global = "$", ident_start, { ident_part } ;
ascii_letter = ascii_uppercase |
"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" |
"n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" ;
ascii_uppercase =
"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" |
"N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ;
ascii_digit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0" ;
ord_0x9F = ? all characterrs yield_with_scope ord > 0x9F ? ; (* todo *)
binary_digit = "0" | "1" ;
octal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
hex_digit = ascii_digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" ;
(* todo: don't allow consecutive underscores or underscore end *)
number =
(
( "0b", binary_digit, { binary_digit | '_' } ) |
( "0o", octal_digit, { octal_digit | '_' } ) |
( "0x", hex_digit, { hex_digit | '_' }, { number_suffix } ) |
(
ascii_digit, { ascii_digit | '_' },
{ '.', { ascii_digit } },
[ number_exponent ],
{ number_suffix }
)
) ;
number_exponent = "e", ascii_digit, { ascii_digit } ;
number_suffix = ( ( 'i' | 'u' ), ( '8' | '16' | '32' | '64' | '128' ) ) | ( 'f', '32' | '64' ) ;
begin = "begin", statement_end, expressions, [ rescue, { rescue }, [ else ] ], [ ensure ], 'end' ;
rescue = "rescue", all_space, [ identifier, [ all_space, ':', all_space, constant ] ], one_semicolon_or_newline, expressions, statement_end ;
else = "else", statement_end, expressions ;
ensure = "ensure", statement_end, expressions ;
if =
"if", all_space_or_newline, assign_no_control, statement_end,
expressions,
{ elsif, statement_end },
[ else, statement_end ],
'end';
elsif =
"elsif", all_space_or_newline,
assign_no_control ;
unless =
"unless", all_space_or_newline, assign_no_control, statement_end,
expressions,
[ else, statement_end ],
'end';
while = "while", all_space_or_newline, while_body, statement_end ;
until = "until", all_space_or_newline, while_body, statement_end ;
while_body = assign_no_control, statement_end, expressions ;
nil = 'nil' ;
true = 'true' ;
false = 'false' ;
yield = call_args ;
abstract = "abstract", all_space_or_newline, ( def | class | struct ) ;
def = "def", opt_space_or_newline, def_or_macro_name, opt_space, [ ".", def_or_macro_name ], opt_space,
[ "(", opt_space_or_newline, { param, ',' }, [ param ], ")" ],
[ ":" ] ;
def_or_macro_name = def_symbols | path | ( ident - pseudo_methods, [ '=' ] ) ;
def_symbols = '`' | '<<' | '<' | '<=' | '==' | '===' | '!=' | '=~' | '!~' |
'>>' | '>' | '>=' | '+' | '-' | '*' | '/' | '//' | '!' | '~' | '%' | '&' | '|' | '^' | '**' |
'[]' | '[]?' | '[]=' | '<=>' | '&+' | '&-' | '&*' | '&*' ;
pseudo_methods = 'is_a?' | 'as' | 'as?' | 'responds_to?' | 'nil?' ;
def_op_name_disallowed = '!' ;
module = "module", all_space_or_newline, path, all_space, [ type_vars ], statement_end, expressions, 'end' ;
class = "class", class_body ;
struct = "struct", class_body ;
class_body = all_space_or_newline, path, opt_space, [ type_vars ], [ "<", all_space_or_newline, ( "self" | generic ) ], statement_end, expressions, 'end' ;
annotation = "@[", opt_space, path, opt_space, [ "(", opt_space_or_newline, ( named_args | call_args ), ")" ], "]" ;
enum = "enum", all_space_or_newline, path, opt_space, [ bare_proc_type ], statement_end, enum_body, 'end' ;
enum_body =
{
( const, opt_space, [ '=', opt_space_or_newline, logical_or ], statement_end ),
( [ 'private' | 'protected' ], ( def, macro ) ) |
( class_var, opt_space, '=', opt_space_or_newline, assign ) |
percent_macro_expression |
percent_macro_control |
annotation |
statement_end
} ;
type_args =
"(",
opt_space_or_newline,
{
named_type_args |
(
[ type_arg, opt_space, ",", opt_space_or_newline ],
type_arg, opt_space_or_newline
)
},
")" ;
named_type_args =
{ ( ident | const | string ), ':', all_space, bare_proc_type, opt_space, ',' },
( ident | const | string ), ':', all_space, bare_proc_type;
(* macro = "macro", space_or_newline, def_name, *)
path = "::", opt_space_or_newline, const, { "::", const } ;
const = ascii_uppercase, { ident_part } ;
require = "require", all_space, string ;
case = "case", opt_space_or_newline, { ';', opt_space }, [ all_space, assign_no_control, statement_end ],
{ ( when | in ), statement_end }, [ else, statement_end ], 'end' ;
select = "select", all_space, statement_end, { when, statement_end }, [ else, statement_end ], 'end' ;
include = "include", all_space, include_body ;
extend = "extend", all_space, include_body ;
include_body = opt_space_or_newline, [ 'self', generic ];
alias = "alias", all_space_or_newline, path, opt_space, "=", opt_space_or_newline, bare_proc_type ;
generic = [ "::" ], path, [ type_args ] ;
instance_var = "@", ident;
class_var = "@@", ident;
return = "return", control_expression ;
next = "next", control_expression ;
break = "break", control_expression ;
control_expression = [ call_args ];
lib = "lib", all_space_or_newline, path, statement_end, lib_body, 'end' ;
lib_body =
{
annotation |
alias |
fun_def |
type_def |
c_struct |
c_union |
enum |
( const, opt_space_or_newline, '=', expression, statement_end ) |
( global, [ '=', opt_space, ( ident | const ) ], all_space, ':', all_space, bare_proc_type, statement_end ) |
percent_macro_expression |
percent_macro_control
};
char = "'", ( ascii_letter | ascii_digit | " " ), "'" ; (* todo: improve *)
dollar_tilde = "$~" ;
dollar_question = "$?" ;
private = "private", all_space, assign ;
protected = "protected", all_space, assign ;
(* todo: asm operands, can't find any examples *)
asm = "asm", [ all_space ],
"(",
[ opt_space_or_newline ], string, [ opt_space_or_newline ],
")" ;
pointerof = "pointerof", opt_space, "(", opt_space_or_newline, assign, opt_space_or_newline, ")" ;
sizeof = "sizeof", opt_space, "(", opt_space_or_newline, bare_proc_type, opt_space_or_newline, ")" ;
instance_sizeof = sizeof ;
offsetof = "offsetof", opt_space, "(", opt_space_or_newline,
bare_proc_type, opt_space, ',', opt_space_or_newline,
( number | instance_var ), opt_space_or_newline, ")" ;
string = '"', ? characters, escaped if necessary ?, '"' ;
heredoc = ( "<<-" | "<<~" ), const, ? too complex for EBNF ? ;
yield_with_scope = "? TODO ?" ;
macro = "? TODO ?" ;
when = "? TODO ?" ;
in = "? TODO ?" ;
percent_macro_expression = "? TODO ?" ;
percent_macro_control = "? TODO ?" ;
fun_def = "? TODO ?" ;
type_def = "? TODO ?" ;
c_struct = "? TODO ?" ;
c_union = "? TODO ?" ;
call_args = "? TODO ?" ;
type_vars = "? TODO ?" ;
named_args = "? TODO ?" ;
generic_or_custom_literal = "? TODO ?" ;
typeof = "? TODO ?" ;
bare_proc_type = "? TODO ?" ;
named_tuple = "? TODO ?" ;
union_types = "? TODO ?" ;
proc_type_output = "? TODO ?" ;
generic_or_global_call = "? TODO ?" ;
delimiter = "? TODO ?" ;
string_array = "? TODO ?" ;
symbol_array = "? TODO ?" ;
global_match_data_index = "? TODO ?" ;
param = "? TODO ?" ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment