Skip to content

Instantly share code, notes, and snippets.

@alexnask
Created April 16, 2021 16:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexnask/2c0eb2f4543cbcfc3bfe3bd146b4f396 to your computer and use it in GitHub Desktop.
Save alexnask/2c0eb2f4543cbcfc3bfe3bd146b4f396 to your computer and use it in GitHub Desktop.
// Rules that will be inlined.
let underscore_sep = r => seq(r, repeat(
seq(optional("_"), r),
));
let bin = /[01]/;
let oct = /[0-7]/;
let dec = /[0-9]/;
let hex = /[0-9a-fA-F]/;
let bin_int = underscore_sep(bin);
let oct_int = underscore_sep(oct);
let dec_int = underscore_sep(dec);
let hex_int = underscore_sep(hex);
let char_escape = choice(
seq("\\x", hex, hex),
seq("\\u{", repeat1(hex), "}"),
seq("\\", /[nr\\t'"]/),
);
let string_char = choice(char_escape, /[^\\"\n]/);
let ox80_oxBF = /[\x80-\xBF]/;
let oxF4 = '\364';
let ox80_ox8F = /[\x80-\x8F]/;
let oxF1_oxF3 = /[\xF1-\xF3]/;
let oxF0 = '\360';
let ox90_0xBF = /[\x90-\xBF]/;
let oxEE_oxEF = /[\xEE-\xEF]/;
let oxED = '\355';
let ox80_ox9F = /[\x80-\x9F]/;
let oxE1_oxEC = /[\xE1-\xEC]/;
let oxE0 = '\340';
let oxA0_oxBF = /[\xA0-\xBF]/;
let oxC2_oxDF = /[\xC2-\xDF]/;
let mb_utf8_literal = choice(
seq(oxF4, ox80_ox8F, ox80_oxBF, ox80_oxBF),
seq(oxF1_oxF3, ox80_oxBF, ox80_oxBF, ox80_oxBF),
seq(oxF0, ox90_0xBF, ox80_oxBF, ox80_oxBF),
seq(oxEE_oxEF, ox80_oxBF, ox80_oxBF),
seq(oxED, ox80_ox9F, ox80_oxBF),
seq(oxE1_oxEC, ox80_oxBF, ox80_oxBF),
seq(oxE0, oxA0_oxBF, ox80_oxBF),
seq(oxC2_oxDF, ox80_oxBF),
);
let ascii_char_not_nl_slash_squote = /[\x00-\x09 \x0B-\x26-\x28-\x5B \x5D-\x7F]/;
let char_char = choice(mb_utf8_literal, char_escape, ascii_char_not_nl_slash_squote);
let comma_list = rule => seq(
rule,
repeat(seq(",", rule)),
optional(","),
);
let optional_comma_list = rule => seq(
repeat(seq(rule, ",")),
optional(rule),
);
let keyword_expression = (kw, $) => prec(expr_precs.primary, seq(kw, $._expression));
let if_prefix = $ => seq(
"if", "(", field("cond", $._expression), ")",
optional(field("payload", $.ptr_payload)),
);
let for_prefix = $ => seq(
"for", "(", field("iterable", $._expression), ")",
field("payload", $.ptr_index_payload),
);
let while_prefix = $ => seq(
"while", "(", field("cond", $._expression), ")",
optional(field("payload", $.ptr_payload)),
optional(seq(
":", "(", field("continue_expr", $._assignment_or_expression), ")",
)),
);
let loop_statement_prefix = ($, prefix) => seq(
optional(field("label", $.block_label)),
optional("inline"),
prefix,
);
let control_flow_statement = ($, prefix, has_else_payload) => {
const payload_rule = has_else_payload ?
optional(field("else_payload", $.payload)) : blank();
return prec(statement_prec, seq(
prefix,
choice(
seq(
field("body", $.block_expression),
optional(seq(
"else",
payload_rule,
field("else_body", $._statement),
)),
),
seq(
field("body", $._assignment_or_expression),
choice(
";",
seq(
"else",
payload_rule,
field("else_body", $._statement),
),
),
),
),
));
};
let control_flow_expression = ($, prefix, expr_rule, has_else_payload) => {
const payload_rule = has_else_payload ?
optional(field("else_payload", $.payload)) : blank();
return seq(
optional(field("label", $.block_label)),
optional("inline"),
prefix,
expr_rule,
optional(seq(
"else",
payload_rule,
field("else_body", expr_rule),
)),
);
};
let expr_precs = {
or: 1,
and: 2,
compare: 3,
bitwise: 4,
bitshift: 5,
addition: 6,
multiply: 7,
prefix: 8,
primary: 9,
async_call: 10,
primary_type: 11,
suffix: 12,
error_union: 13,
prefix_type: 14,
};
let statement_prec = 15;
module.exports = grammar({
name: "zig",
externals: (_) => [],
extras: $ => [/\s/, $.line_comments],
// TODO Will I need this?
//word: $ => $.identifier,
rules: {
root: ($) => seq(
optional($.container_doc_comment),
repeat($._container_members),
),
// Top level (grammar.y:3)
// Container level expressions, statements, declarations
_container_members: $ => choice(
$.test_decl,
$.top_level_comptime,
$.top_level_var_decl,
$.top_level_fn_proto,
$.usingnamespace,
$.container_field,
),
test_decl: $ => seq(
field("doc_comments", optional($.doc_comment)),
"test",
field("name", optional($.string_literal_single)),
field("body", $.block),
),
top_level_comptime: $ => seq(
field("doc_comments", optional($.doc_comment)),
"comptime",
field("body", $.block_expression)
),
top_level_var_decl: $ => seq(
optional(field("doc_comments", $.doc_comment)),
optional(field("pub", alias("pub", $.pub_mod))),
optional(field("export_extern", $.export_extern_mod)),
optional(field("threadlocal", $.threadlocal_mod)),
field("decl", $.var_decl),
),
top_level_fn_proto: $ => seq(
optional(field("doc_comments", $.doc_comment)),
optional(field("pub", alias("pub", $.pub_mod))),
optional(field("export_extern", $.export_extern_mod)),
field("proto", $.fn_proto),
choice(
";",
field("block", $.block),
),
),
usingnamespace: $ => seq(
optional(field("doc_comments", $.doc_comment)),
optional(field("pub", alias("pub", $.pub_mod))),
"usingnamespace",
$._expression,
";",
),
container_field: $ => seq(
optional(field("doc_comments", $.doc_comment)),
optional("comptime"),
field("name", $.identifier),
optional(seq(":", field("type", $.container_field_type))),
optional(seq("=", field("initializer", $._expression))),
",",
),
container_field_type: $ => seq(
choice("anytype", field("type_expr", $._type_expression)),
optional(field("byte_align", $.byte_align)),
),
var_decl: $ => seq(
choice("var", "const"),
field("name", $.identifier),
optional(seq(":", field("type_expr", $._type_expression))),
optional(field("byte_align", $.byte_align)),
optional(field("link_section", $.link_section)),
optional(seq("=", field("initializer", $._expression))),
";",
),
fn_proto: $ => prec(expr_precs.primary_type, seq(
"fn",
optional(field("name", $.identifier)),
"(",
optional_comma_list($.param_decl),
")",
optional(field("byte_align", $.byte_align)),
optional(field("link_section", $.link_section)),
optional(field("call_conv", $.call_conv)),
optional("!"),
field("return_type", $._type_expression),
)),
param_decl: $ => seq(
optional(field("doc_comments", $.doc_comment)),
optional(choice("noalias", "comptime")),
optional(seq(field("name", $.identifier), ":")),
field("type", $.param_type),
),
param_type: $ => choice(
"anytype",
"...",
field("type_expr", $._type_expression),
),
// Block level (grammar.y:29)
_statement: $ => choice(
$.var_decl_statement,
// TODO Does this work?
prec(statement_prec, seq($._assignment_or_expression, ";")),
$.prefixed_block_expression_statement,
$.bare_suspend_statement,
$.errdefer_statement,
$.if_statement,
$.block_statement,
$.for_statement,
$.while_statement,
// Match this instead of a switch expression + semicolon
prec(statement_prec + 1, $.switch_expression),
),
block_statement: $ => $.block_expression,
while_statement: $ => control_flow_statement(
$,
loop_statement_prefix($, while_prefix($)),
true,
),
for_statement: $ => control_flow_statement(
$,
loop_statement_prefix($, for_prefix($)),
false,
),
if_statement: $ => control_flow_statement(
$,
if_prefix($),
true,
),
bare_suspend_statement: _ => seq("suspend", ";"),
block_label: $ => prec(expr_precs.primary_type + 1, seq($.identifier, ":")),
block_expression: $ => prec(statement_prec, seq(
optional(field("label", $.block_label)),
$.block,
)),
errdefer_statement: $ => prec(statement_prec, seq(
"errdefer",
optional(field("payload", $.payload)),
choice(
$.block_expression,
seq($._assignment_or_expression, ";"),
),
)),
prefixed_block_expression_statement: $ => prec(statement_prec, seq(
// TODO Doesnt show up in `tree-sitter parse` output, check programmatically.
// remove some other aliases if it does.
field("keyword", choice(
"comptime", "nosuspend",
"suspend", "defer",
)),
choice(
$.block_expression,
seq($._assignment_or_expression, ";"),
),
)),
var_decl_statement: $ => prec(statement_prec, seq(
optional("comptime"),
$.var_decl,
)),
assign_or_expression_statement: $ => prec(statement_prec, seq(
$._assignment_or_expression,
";",
)),
// Expression level (grammar.y:65)
_expression: $ => choice(
$.binary_expression,
$.catch_expression,
$.unary_expression,
$.asm_expression,
$.if_expression,
$.for_expression,
$.while_expression,
$.break_expression,
$.continue_expression,
$.comptime_expression,
$.nosuspend_expression,
$.resume_expression,
$.block,
$._type_expression,
// This is CurlySuffixExpr in grammar.y
$.type_init_list_expression,
),
// TODO Is this the correct associativity?
if_expression: $ => prec.left(expr_precs.primary,
control_flow_expression($, if_prefix($), $._expression, true)
),
// TODO Is this the correct associativity?
for_expression: $ => prec.left(expr_precs.primary,
control_flow_expression($, for_prefix($), $._expression, false)
),
// TODO Is this the correct associativity?
while_expression: $ => prec.left(expr_precs.primary,
control_flow_expression($, while_prefix($), $._expression, true)
),
block: $ => prec(expr_precs.primary, seq(
"{", repeat($._statement), "}"
)),
type_init_list_expression: $ => prec(expr_precs.primary, seq(
field("type_expr", $._type_expression),
field("list", $.init_list),
)),
// TODO Is this the correct associativity?
comptime_expression: $ => keyword_expression("comptime", $),
nosuspend_expression: $ =>keyword_expression("nosuspend", $),
resume_expression: $ => keyword_expression("resume", $),
return_expression: $ => prec(expr_precs.primary, seq(
"return",
optional(field("expr", $._expression)),
)),
// TODO Check associativity
break_label: $ => seq(":", $.identifier),
break_expression: $ => prec.left(expr_precs.primary, seq(
"break",
optional(field("label", $.break_label)),
optional(field("expr", $._expression)),
)),
continue_expression: $ => prec.left(expr_precs.primary, seq(
"continue",
optional(field("expr", $._expression)),
)),
assignment: $ => seq(
field("left", $._expression),
field("operator", alias(choice(
"*=", "/=",
"%=", "+=",
"-=", "<<=",
">>=", "&=",
"^=", "|=",
"*%=", "+%=",
"-%=", "=",
), $.assign_operator)),
field("right", $._expression),
),
_assignment_or_expression: $ => choice(
$.assignment,
$._expression,
),
field_init: $ => seq(
".", field("name", $.identifier), "=", field("expr", $._expression),
),
init_list: $ => seq(
"{",
choice(
optional_comma_list($.field_init),
optional_comma_list($._expression),
),
"}",
),
// grammar.y:115
_type_expression: $ => choice(
$.unary_type_expression,
$.slice_type_expression,
$.single_ptr_type_expression,
$.multi_ptr_type_expression,
$.array_type_expression,
$.error_union_expression,
$.grouped_expression,
$.builtin_call_expression,
$.identifier,
$.anonymous_init_list_expression,
// Literals
$._string_literal,
$.char_literal,
$.integer_literal,
$.float_literal,
$.enum_literal,
$.error_value_literal,
alias("false", $.false),
alias("true", $.true),
alias("null", $.null),
alias("undefined", $.undefined),
alias("unreachable", $.unreachable),
// Container and function decls
$.fn_proto,
$.error_set_decl,
$.struct_decl,
$.opaque_decl,
$.enum_decl,
$.union_decl,
// Control flow expressions
$.switch_expression,
$.if_type_expression,
$.for_type_expression,
$.while_type_expression,
// Block
$.block_type_expression,
// All these are in SuffixOp
$.bracket_access_expression,
$.field_access,
$.suffix_type_expression,
// Misc
$.comptime_type_expression,
$.async_call_expression,
$.call_expression,
),
error_set_decl: $ => prec(expr_precs.primary_type, seq(
"error", "{",
optional_comma_list($.identifier),
"}",
)),
struct_decl: $ => prec(expr_precs.primary_type, seq(
optional(choice("extern", "packed")),
"struct",
"{", optional($.container_doc_comment),
repeat($._container_members), "}",
)),
opaque_decl: $ => prec(expr_precs.primary_type, seq(
optional(choice("extern", "packed")),
"opaque",
"{", optional($.container_doc_comment),
repeat($._container_members), "}",
)),
enum_decl: $ => prec(expr_precs.primary_type, seq(
optional(choice("extern", "packed")),
"enum",
optional(seq("(", field("enum_int_type_expr", $._expression), ")")),
"{", optional($.container_doc_comment),
repeat($._container_members), "}",
)),
union_decl: $ => prec(expr_precs.primary_type, seq(
optional(choice("extern", "packed")),
"union",
optional(seq(
"(",
choice(
seq("enum", optional(seq("(", field("enum_int_type_expr", $._expression), ")"))),
field("enum_type_expr", $._expression),
),
")",
)),
"{", optional($.container_doc_comment),
repeat($._container_members), "}",
)),
block_type_expression: $ => prec(expr_precs.primary_type, seq(
field("label", $.block_label),
field("block", $.block),
)),
// TODO Is this the correct associativity?
if_type_expression: $ => prec.left(expr_precs.primary_type,
control_flow_expression($, if_prefix($), $._type_expression, true)
),
// TODO Is this the correct associativity?
for_type_expression: $ => prec.left(expr_precs.primary_type,
control_flow_expression($, for_prefix($), $._type_expression, false)
),
// TODO Is this the correct associativity?
while_type_expression: $ => prec.left(expr_precs.primary_type,
control_flow_expression($, while_prefix($), $._type_expression, true)
),
comptime_type_expression: $ => prec(expr_precs.primary_type, seq(
"comptime",
field("type_expr", $._type_expression),
)),
anonymous_init_list_expression: $ => prec(expr_precs.primary_type, seq(
".",
field("list", $.init_list),
)),
// TODO Shouldnt this need an associativity?
suffix_type_operator: $ => choice(".?", ".*"),
suffix_type_expression: $ => prec(expr_precs.suffix, seq(
field("type_expr", $._type_expression),
field("operator", $.suffix_type_operator),
)),
field_access: $ => prec(expr_precs.suffix, seq(
field("left", $._type_expression),
".",
field("right", $.identifier),
)),
unary_type_operator: _ => choice("?", seq("anyframe", "->")),
unary_type_expression: $ => prec(expr_precs.prefix_type, seq(
field("operator", $.unary_type_operator),
field("type_expr", $._type_expression),
)),
slice_type_expression: $ => prec(expr_precs.prefix_type, seq(
"[",
optional(seq(":", field("sentinel", $._expression))),
"]",
optional_comma_list(
choice(
$.byte_align,
"const",
"volatile",
"allowzero",
),
),
field("type_expr", $._type_expression),
)),
_ptr_modifier: $ => choice(
$.pointer_align,
"const",
"volatile",
"allowzero",
),
single_ptr_type_expression: $ => prec(expr_precs.prefix_type, seq(
"*",
optional_comma_list($._ptr_modifier),
field("type_expr", $._type_expression),
)),
multi_ptr_type_expression: $ => prec(expr_precs.prefix_type, seq(
"[", "*",
optional(choice(
"c",
seq(":", field("sentinel", $._expression)),
)),
"]",
optional_comma_list($._ptr_modifier),
field("type_expr", $._type_expression),
)),
array_type_expression: $ => prec(expr_precs.prefix_type, seq(
"[",
field("size_expr", $._expression),
optional(seq(":", field("sentinel", $._expression))),
"]",
field("type_expr", $._type_expression),
)),
// TODO Check associativity
error_union_expression: $ => prec.left(expr_precs.error_union, seq(
field("left", $._type_expression),
"!",
field("right", $._type_expression),
)),
grouped_expression: $ => prec(expr_precs.primary_type, seq(
"(",
field("expr", $._expression),
")",
)),
builtin_identifier: _ => seq("@", /[A-Za-z_][A-Za-z0-9_]*/),
builtin_call_expression: $ => prec(expr_precs.primary_type, seq(
field("name", $.builtin_identifier),
"(",
optional_comma_list($._expression),
")",
)),
bracket_access_expression: $ => prec(expr_precs.suffix, seq(
field("base_expr", $._type_expression),
"[",
field("start_index", $._expression),
optional(seq(
"..",
optional(seq(
optional(field("end_index", $._expression)),
optional(seq(
":",
field("sentinel", $._expression),
)),
)),
)),
"]",
)),
async_call_expression: $ => prec(expr_precs.async_call, seq(
"async",
field("fn_expr", $._type_expression),
"(",
optional_comma_list($._expression),
")",
)),
call_expression: $ => prec(expr_precs.suffix, seq(
field("fn_expr", $._type_expression),
"(",
optional_comma_list($._expression),
")",
)),
asm_output_item: $ => seq(
"[", field("name", $.identifier), "]",
$._string_literal,
"(", choice(seq("->", $._type_expression), $.identifier), ")",
),
asm_output_list: $ => comma_list($.asm_output_item),
asm_output: $ => seq(
":",
optional(field("output_list", $.asm_output_list)),
optional(field("input", $.asm_input)),
),
asm_input_item: $ => seq(
"[", field("name", $.identifier), "]",
$._string_literal,
"(", field("expr", $._expression), ")",
),
asm_input_list: $ => comma_list($.asm_input_item),
asm_clobbers: $ => seq(
":",
optional(field("list", $.string_list)),
),
asm_input: $ => seq(
":",
field("input_list", $.asm_input_list),
optional(field("clobbers", $.asm_clobbers)),
),
asm_expression: $ => prec(expr_precs.primary, seq(
"asm",
optional(field("volatile", alias("volatile", $.volatile))),
"(",
field("template", $._expression),
optional(field("output", $.asm_output)),
")",
)),
// TODO Check associativity
catch_expression: $ => prec.left(expr_precs.bitwise,seq(
field("left", $._expression),
"catch",
optional(field("payload", $.payload)),
field("right", $._expression),
)),
binary_expression: $ => {
// TODO Check associativity.
const table = [
[expr_precs.or, "or"],
[expr_precs.and, "and"],
[expr_precs.compare, choice("==", "!=", "<", "<=", ">", ">=")],
[expr_precs.bitwise, choice("&", "^", "|", "orelse")],
[expr_precs.bitshift, choice("<<", ">>")],
[expr_precs.addition, choice("+", "+%", "-", "-%", "++")],
[expr_precs.multiply, choice("*", "/", "%", "**", "*%", "||")],
];
return choice(
...table.map(([precedence, op]) =>
prec.left(
precedence,
seq(
field("left", $._expression),
field("operator", alias(op, $.binary_operator)),
field("right", $._expression),
)
))
);
},
unary_operator: _ => choice("~", "!", "-", "-%", "&", "try", "await"),
unary_expression: $ => prec(expr_precs.prefix, seq(
field("operator", $.unary_operator),
field("expression", $._expression),
)),
identifier: $ => token(choice(
seq(/[a-zA-Z_]/, repeat(/[a-zA-Z0-9_]/)),
seq('@"', repeat(string_char), token.immediate('"')),
)),
// Comments
container_doc_comment: _ => repeat1(token(seq("//!", /.*/))),
doc_comment: _ => repeat1(token(seq("///", /.*/))),
line_comments: _ => token(repeat1(seq("//", /.*/))),
// Literals
string_literal_single: _ => token(seq('"', repeat(string_char), token.immediate('"'))),
line_string: _ => token(repeat1(seq(
"\\\\", /.*/,
))),
_string_literal: $ => choice($.string_literal_single, $.line_string),
string_list: $ => comma_list($._string_literal),
enum_literal: $ => seq(".", $.identifier),
error_value_literal: $ => seq("error", ".", $.identifier),
char_literal: _ => seq(
"'", char_char, token.immediate("'"),
),
float_literal: _ => token(choice(
seq("0x", hex_int, ".", hex_int, optional(seq(/[pP][-+]?/, dec_int))),
seq(dec_int, ".", dec_int, optional(seq(/[eE][-+]?/, dec_int))),
seq("0x", hex_int, optional("."), /[pP][-+]?/, hex_int),
seq(dec_int, optional("."), /[eE][-+]?/, dec_int),
)),
integer_literal: _ => token(choice(
seq("0b", bin_int),
seq("0o", oct_int),
seq("0x", hex_int),
dec_int,
)),
// Switch stuff
switch_expression: $ => seq(
"switch", "(", field("switched_expr", $._expression), ")", "{",
optional_comma_list($.switch_prong), "}",
),
switch_prong: $ => seq(
field("case", $.switch_case),
"=>", optional(field("payload", $.ptr_payload)),
field("body", $._assignment_or_expression),
),
switch_case: $ => choice(
"else",
comma_list($.switch_item),
),
switch_item: $ => seq(
field("start_expr", $._expression),
optional(seq("...", field("end_expr", $._expression))),
),
// Modifiers
call_conv: $ => seq("callconv", "(", field("expr", $._expression), ")"),
export_extern_mod: $ => choice("export",seq("extern", optional($.string_literal_single))),
threadlocal_mod: _ => "threadlocal",
byte_align: $ => seq("align", "(", field("expr", $._expression), ")"),
pointer_align: $ => seq(
"align", "(", field("bytes_expr", $._expression),
optional(seq(
":", field("bit_range_start", $._expression), ":",
field("bit_range_end", $._expression),
)),
")",
),
link_section: $ => seq("linksection", "(", field("expr", $._expression), ")"),
// Payloads
payload: $ => seq("|", $.identifier, "|"),
ptr_payload: $ => seq("|", optional(field("ptr", "*")), $.identifier, "|"),
ptr_index_payload: $ => seq(
"|", optional(field("ptr", "*")),
field("payload_name", $.identifier), optional(seq(",", field("index_name", $.identifier))), "|",
),
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment