Skip to content

Instantly share code, notes, and snippets.

@include-yy
Last active February 14, 2024 18:14
Show Gist options
  • Save include-yy/c83ca77044430facbc90a937a1b78589 to your computer and use it in GitHub Desktop.
Save include-yy/c83ca77044430facbc90a937a1b78589 to your computer and use it in GitHub Desktop.
A naive implementation of WGSL's parser in tree-sitter

tree-sitter-wgsl --- simple impl of WGSL parser in tree-sitter

本 gist 是对 WGSL-20240205 标准的一个简单 tree-sitter 实现

如何使用

参考官方教程,通过 npm 安装 tree-sitter-cli,将 grammar.js 放在项目根目录,scanner.c 放在 src 目录,通过 tree-sitter generate 命令生成可用的 tree-sitter parser。

本 gist 代码参考了以下项目:

本 gist 的 parser 实现非常粗糙,待成熟后我会给出 github repo 链接。


2024-02-15: tree-sitter-wgsl

const PREC = {
LOGICAL_OR: 1,
LOGICAL_AND: 2,
INCLUSIVE_OR: 3,
EXCLUSIVE_OR: 4,
BITWISE_AND: 5,
EQUAL: 6,
RELATIONAL: 7,
SHIFT: 9,
ADD: 10,
MULTIPLY: 11,
UNARY: 14,
CALL: 15,
}
module.exports = grammar({
name: 'wgsl',
word: $ => $.identifier,
externals: $ => [
$._block_comment,
],
extras: $ => [
$._comment,
$._block_comment,
$._blankspace,
],
inline: $ => [
$.global_decl,
$._reserved,
],
conflicts: $ => [
[$.template_elaborated_ident, $._expression],
[$.type_specifier, $._expression],
],
rules: {
// $2
translation_unit: $ => seq(
repeat($.global_directive),
repeat($.global_decl),
),
global_decl: $ => choice(
";",
seq($.global_variable_decl, ";"),
seq($.global_value_decl, ";"),
seq($.type_alias_decl, ";"),
$.struct_decl,
$.function_decl,
seq($.const_assert_statement, ";"),
),
// $2.3
diagnostic_rule_name: $ => choice(
$.identifier,
seq($.identifier, ".", $.identifier),
),
// $3.5
literal: $ => choice(
$.int_literal,
$.float_literal,
$.bool_literal,
),
// $3.5.1
bool_literal: $ => choice(
"true",
"false",
),
// $3.5.2
int_literal: $ => choice(
$._decimal_int_literal,
$._hex_int_literal,
),
_decimal_int_literal: $ => choice(
token(/0[iu]?/),
token(/[1-9][0-9]*[iu]?/),
),
_hex_int_literal: $ => token(/0[xX][0-9a-fA-F]+[iu]?/),
float_literal: $ => choice(
$._decimal_float_literal,
$._hex_float_literal,
),
_decimal_float_literal: $ => choice(
token(/0[fh]/),
token(/[1-9][0-9]*[fh]/),
token(/[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[fh]?/),
token(/[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fh]?/),
token(/[0-9]+[eE][+-]?[0-9]+[fh]?/),
),
_hex_float_literal: $ => choice(
token(/0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9]+[fh]?)?/),
token(/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*([pP][+-]?[0-9]+[fh]?)?/),
token(/0[xX][0-9a-fA-F]+[pP][+-]?[0-9]+[fh]?/),
),
// $3.7
//ident: $ => $.ident_pattern_token,
//member_ident: $ => $.ident_pattern_token,
//ident_pattern_token: $ =>
//token(/([_\p{XID_Start}][\p{XID_Continue}]+)|([\p{XID_Start}])/uy),
identifier: $ =>
token(/([_\p{XID_Start}][\p{XID_Continue}]+)|([\p{XID_Start}])/uy),
// $3.9
//diagnostic_name_token: $ => $.ident_pattern_token,
// $3.10
template_list: $ => seq(
'<',
commaSep1(choice(
prec.dynamic(3, $.type_specifier),
prec.dynamic(1, $._expression),
)),
alias(token(prec(1, '>')), '>'),
),
// $4
global_directive: $ => choice(
$.diagnostic_directive,
$.enable_directive,
$.requires_directive,
),
// $4.1.1
enable_directive: $ => seq(
"enable",
commaSep1($.identifier),
";",
),
// $4.1.2
requires_directive: $ => seq(
"require",
commaSep1($.identifier),
";",
),
// $4.2
diagnostic_directive: $ => seq("diagnostic", $.diagnostic_control, ";"),
// $6.2.10
struct_decl: $ => seq(
"struct",
field("name", $.identifier),
'{',
commaSep1($.struct_member),
'}',
),
struct_member: $ => seq(
repeat($.attribute),
field("name", $.identifier),
":",
field("type", $.type_specifier),
),
// $6.7
type_alias_decl: $ => seq("alias", $.identifier, "=", $.type_specifier),
// $6.8
type_specifier: $ => $.template_elaborated_ident,
template_elaborated_ident: $ => seq(
$.identifier,
optional($.template_list),
),
// $7.4
variable_or_value_statement: $ => choice(
$.variable_decl,
seq($.variable_decl, "=", $._expression),
seq("let", $.optionally_typed_ident, "=", $._expression),
seq("const", $.optionally_typed_ident, "=", $._expression),
),
variable_decl: $ => seq(
"var",
optional($.template_list),
$.optionally_typed_ident,
),
optionally_typed_ident: $ =>
seq($.identifier, optional(seq(":", $.type_specifier))),
global_variable_decl: $ => seq(
repeat($.attribute),
$.variable_decl,
optional(seq("=", $._expression)),
),
global_value_decl: $ => choice(
seq("const", $.optionally_typed_ident, "=", $._expression),
seq(repeat($.attribute),
"override",
$.optionally_typed_ident,
optional(seq("=", $._expression))),
),
// $8.18
_expression: $ => choice(
$.binary_expression,
$.literal,
$.identifier,
$.template_elaborated_ident,
$.call_expression,
$.paren_expression,
$.unary_expression,
// $.subscript_expression,
// $.field_expression,
$.singular_expression,
),
call_expression: $ => $.call_phrase,
call_phrase: $ => prec(PREC.CALL, seq(
$.template_elaborated_ident,
$.argument_expression_list,
)),
argument_expression_list: $ =>
seq("(", commaSep($._expression), ")"),
paren_expression: $ => seq("(", $._expression, ")"),
component_or_swizzle_specifier: $ => prec.left(PREC.UNARY, seq(
repeat1(choice(
field("index", seq("[", $._expression, "]")),
field("field", seq(".", $.identifier)),
)),
)),
unary_expression: $ => prec.left(PREC.UNARY,
seq(
field("operator", choice("-", "!", "~", "*", "&")),
field("argument", $._expression),
)
),
singular_expression: $ => seq(
choice(
$.template_elaborated_ident,
$.call_expression,
$.literal,
$.paren_expression,
),
$.component_or_swizzle_specifier,
),
binary_expression: $ => {
const table = [
["||", PREC.LOGICAL_OR],
["&&", PREC.LOGICAL_AND],
["|", PREC.INCLUSIVE_OR],
["^", PREC.EXCLUSIVE_OR],
["&", PREC.BITWISE_AND],
["==", PREC.EQUAL],
["!=", PREC.EQUAL],
["<", PREC.RELATIONAL],
[">", PREC.RELATIONAL],
["<=", PREC.RELATIONAL],
[">=", PREC.RELATIONAL],
["<<", PREC.SHIFT],
[">>", PREC.SHIFT],
["+", PREC.ADD],
["-", PREC.ADD],
["*", PREC.MULTIPLY],
["/", PREC.MULTIPLY],
["%", PREC.MULTIPLY],
];
return choice(...table.map(([operator, precedence]) => {
return prec.left(precedence, seq(
field("left", $._expression),
field("operator", operator),
field("right", $._expression),
));
}));
},
lhs_expression: $ => seq(
repeat(choice("*", "&")),
choice(
$.identifier,
seq("(", $.lhs_expression, ")"),
),
optional($.component_or_swizzle_specifier),
),
//$9.1
compound_statement: $ => seq(
repeat($.attribute),
"{",
repeat($._statement),
"}",
),
//$9.2
assignment_statement: $ => choice(
seq(
field("left", $.lhs_expression),
choice("=", $.compound_assignment_operator),
field("right", $._expression)
),
seq(field("left", "_"), "=", field("right", $._expression)),
),
//$9.2.3
compound_assignment_operator: $ => choice(
"+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="
),
//$9.3
increment_statement: $ => seq($.lhs_expression, "++"),
decrement_statement: $ => seq($.lhs_expression, "--"),
//$9.4.1
if_statement: $ => seq(
repeat($.attribute),
"if",
field("condition", $._expression),
field("consequence", $.compound_statement),
optional(seq("else", field("alternative", $.else_statement))),
),
else_statement: $ => choice(
$.compound_statement,
$.if_statement,
),
//$9.4.2
switch_statement: $ => seq(
repeat($.attribute),
"switch",
$._expression,
$._switch_body,
),
_switch_body: $ => seq(
repeat($.attribute),
"{",
repeat1($.switch_clause),
"}",
),
switch_clause: $ => choice(
$.case_clause,
$.default_alone_clause,
),
case_clause: $ =>
seq("case", $.case_selectors, optional(":"), $.compound_statement),
default_alone_clause: $ =>
seq("default", optional(":"), $.compound_statement),
case_selectors: $ => commaSep1(choice("default", $._expression)),
//$9.4.3
loop_statement: $ => seq(
repeat($.attribute),
"loop",
repeat($.attribute),
"{",
repeat($._statement),
optional($.continuing_statement),
"}",
),
//$9.4.4
for_statement: $ => seq(
repeat($.attribute),
"for", "(", $.for_header, ")",
$.compound_statement,
),
for_header: $ => seq(
optional($.for_init),
";",
optional($._expression),
";",
optional($.for_update),
),
for_init: $ => choice(
$.variable_or_value_statement,
$.variable_updating_statement,
$.func_call_statement,
),
for_update: $ => choice(
$.variable_updating_statement,
$.func_call_statement,
),
//$9.4.5
while_statement: $ => seq(
repeat($.attribute),
"while", field("condition", $._expression), $.compound_statement,
),
//$9.4.6
break_statement: $ => "break",
//$9.4.7
break_if_statement: $ => seq("break", "if", $._expression, ";"),
//$9.4.8
continue_statement: $ => "continue",
//$9.4.9
continuing_statement: $ => seq("continuing", $.continuing_compound_statement),
continuing_compound_statement: $ => seq(
repeat($.attribute),
"{",
repeat($._statement),
optional($.break_if_statement),
"}",
),
//$9.4.10
return_statement: $ => seq("return", optional($._expression)),
//$9.5
func_call_statement: $ => $.call_phrase,
//$9.6
const_assert_statement: $ => seq("const_assert", $._expression),
//$9.7
_statement: $ => choice(
";",
seq($.return_statement, ";"),
$.if_statement,
$.switch_statement,
$.loop_statement,
$.for_statement,
$.while_statement,
seq($.func_call_statement, ";"),
seq($.variable_or_value_statement, ";"),
seq($.break_statement, ";"),
seq($.continue_statement, ";"),
seq("discard", ";"),
seq($.variable_updating_statement, ";"),
$.compound_statement,
seq($.const_assert_statement, ";"),
),
variable_updating_statement: $ => choice(
$.assignment_statement,
$.increment_statement,
$.decrement_statement,
),
//$10.1
function_decl: $ => seq(
repeat($.attribute),
"fn",
field("name", $.identifier),
"(",
field("parameters", optional($.param_list)),
")",
field("type", optional(seq("->", repeat($.attribute),
$.template_elaborated_ident))),
field("body", $.compound_statement),
),
param_list: $ => commaSep1($.param),
param: $ => seq(
repeat($.attribute),
$.identifier, ":",
$.type_specifier,
),
//$11
attribute: $ => choice(
seq("@", "align", "(", $._expression, $._attrib_end),
seq("@", "binding", "(", $._expression, $._attrib_end),
seq("@", "builtin", "(", $._expression, $._attrib_end),
seq("@", "const"),
seq("@", "diagnostic", $.diagnostic_control),
seq("@", "group", "(", $._expression, $._attrib_end),
seq("@", "id", "(", $._attrib_end),
seq("@", "interpolate", "(", $._expression, $._attrib_end),
seq("@", "interpolate", "(", $._expression,
",", $._expression, $._attrib_end),
seq("@", "invariant"),
seq("@", "location", "(", $._expression, $._attrib_end),
seq("@", "must_use"),
seq("@", "size", "(", $._expression, $._attrib_end),
seq("@", "workgroup_size", "(", $._expression, $._attrib_end),
seq("@", "workgroup_size", "(",
$._expression, ",", $._expression, $._attrib_end),
seq("@", "workgroup_size", "(",
$._expression, ",", $._expression, ",", $._expression, $._attrib_end),
seq("@", "vertex"),
seq("@", "fragment"),
seq("@", "compute"),
),
_attrib_end: $ => seq(optional(","), ")"),
diagnostic_control: $ => seq(
"(", $.severity_control_name, ",",
$.diagnostic_rule_name, $._attrib_end,
),
//$15.2
// create with regexp replace:
// M-x query-replace-regexp | `'\([a-zA-Z_]+\)'`$ -> token('\1'),
// totol number is 145
_reserved: $ => choice(
token('NULL'), token('Self'), token('abstract'), token('active'),
token('alignas'), token('alignof'), token('as'), token('asm'),
token('asm_fragment'), token('async'), token('attribute'),
token('auto'), token('await'), token('become'), token('binding_array'),
token('cast'), token('catch'), token('class'), token('co_await'),
token('co_return'), token('co_yield'), token('coherent'),
token('column_major'), token('common'), token('compile'),
token('compile_fragment'), token('concept'), token('const_cast'),
token('consteval'), token('constexpr'), token('constinit'),
token('crate'), token('debugger'), token('decltype'), token('delete'),
token('demote'), token('demote_to_helper'), token('do'),
token('dynamic_cast'), token('enum'), token('explicit'),
token('export'), token('extends'), token('extern'), token('external'),
token('fallthrough'), token('filter'), token('final'), token('finally'),
token('friend'), token('from'), token('fxgroup'), token('get'),
token('goto'), token('groupshared'), token('highp'), token('impl'),
token('implements'), token('import'), token('inline'), token('instanceof'),
token('interface'), token('layout'), token('lowp'), token('macro'),
token('macro_rules'), token('match'), token('mediump'), token('meta'),
token('mod'), token('module'), token('move'), token('mut'), token('mutable'),
token('namespace'), token('new'), token('nil'), token('noexcept'),
token('noinline'), token('nointerpolation'), token('noperspective'),
token('null'), token('nullptr'), token('of'), token('operator'),
token('package'), token('packoffset'), token('partition'), token('pass'),
token('patch'), token('pixelfragment'), token('precise'), token('precision'),
token('premerge'), token('priv'), token('protected'), token('pub'),
token('public'), token('readonly'), token('ref'), token('regardless'),
token('register'), token('reinterpret_cast'), token('require'),
token('resource'), token('restrict'), token('self'), token('set'),
token('shared'), token('sizeof'), token('smooth'), token('snorm'),
token('static'), token('static_assert'), token('static_cast'), token('std'),
token('subroutine'), token('super'), token('target'), token('template'),
token('this'), token('thread_local'), token('throw'), token('trait'),
token('try'), token('type'), token('typedef'), token('typeid'),
token('typename'), token('typeof'), token('union'), token('unless'),
token('unorm'), token('unsafe'), token('unsized'), token('use'),
token('using'), token('varying'), token('virtual'), token('volatile'),
token('wgsl'), token('where'), token('with'), token('writeonly'),
token('yield'),
),
//$15.4
severity_control_name: $ => choice(
"error", "warning", "info", "off",
),
swizzle_name: $ => choice(
token(/[rgba]/),
token(/[rgba][rgba]/),
token(/[rgba][rgba][rgba]/),
token(/[rgba][rgba][rgba][rgba]/),
token(/[xyzw]/),
token(/[xyzw][xyzw]/),
token(/[xyzw][xyzw][xyzw]/),
token(/[xyzw][xyzw][xyzw][xyzw]/),
),
// something else here
_comment: $ => seq(token('//'), token(/.*/)),
_blankspace: $ => token(/[\u0020\u0009\u000a\u000b\u000c\u000d\u0085\u200e\u200f\u2028\u2029]/uy)
}
})
function commaSep(rule) {
return optional(commaSep1(rule));
}
function commaSep1(rule) {
return seq(rule, repeat(seq(',', rule)), optional(','));
}
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
BLOCK_COMMENT
};
void *tree_sitter_wgsl_external_scanner_create() {
return NULL;
}
void tree_sitter_wgsl_external_scanner_destroy(void *p) {}
void tree_sitter_wgsl_external_scanner_reset(void *p) {}
unsigned int tree_sitter_wgsl_external_scanner_serialize(void *p, char *buffer) {
return 0;
}
void tree_sitter_wgsl_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
static bool at_eof(TSLexer *lexer) {
return lexer->eof(lexer);
}
// based on https://github.com/tree-sitter/tree-sitter-rust/blob/f7fb205c424b0962de59b26b931fe484e1262b35/src/scanner.c
bool tree_sitter_wgsl_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
while (iswspace(lexer->lookahead)) {
lexer->advance(lexer, true);
}
if (lexer->lookahead != '/') {
return false;
}
advance(lexer);
if (lexer->lookahead != '*') {
return false;
}
advance(lexer);
unsigned int comment_depth = 1;
while (true) {
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '*') {
advance(lexer);
comment_depth += 1;
}
} else if (lexer->lookahead == '*') {
advance(lexer);
if (lexer->lookahead == '/') {
advance(lexer);
comment_depth -= 1;
if (comment_depth == 0) {
lexer->result_symbol = BLOCK_COMMENT;
return true;
}
}
} else if (at_eof(lexer)) {
return false;
} else {
advance(lexer);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment