Skip to content

Instantly share code, notes, and snippets.

@fwip
Created August 3, 2021 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fwip/562ff2ed84de28cc252a0bc457c8d152 to your computer and use it in GitHub Desktop.
Save fwip/562ff2ed84de28cc252a0bc457c8d152 to your computer and use it in GitHub Desktop.
// Helper functions
// A list consists of any number of 'rule', separated by separator, and the last one is optional.
const list = (rule, separator) => seq(rule, repeat(seq(separator, rule)))
const NL = choice("\r\n", "\r", "\n")
const NLOPT = prec(-1, repeat(NL))
// in ascending order of precedence
const operator_table = [
// Name, Prec, symbol, unary/binary, associativity, result-type
['ASSIGN', 10, '=', 'assign', 'right', null],
['ASSIGNSUB', 11, '-=', 'assign', 'right', 'num'],
['ASSIGNADD', 12, '+=', 'assign', 'right', 'num'],
['ASSIGNDIV', 13, '/=', 'assign', 'right', 'num'],
['ASSIGNMULT', 14, '*=', 'assign', 'right', 'num'],
['ASSIGNMOD', 15, '%=', 'assign', 'right', 'num'],
['ASSIGNEXP', 16, '^=', 'assign', 'right', 'num'],
['TERNARY', 20, null, null, 'right', null],
['LOGOR', 30, '||', 'binary', 'left', 'num'],
['LOGAND', 31, '&&', 'binary', 'left', 'num'],
['INMULTIARRAY', 40, null, null, 'left', 'num'],
['INARRAY', 41, null, null, 'left', 'num'],
['NOMATCH', 50, '!~', 'binary', null, 'num'],
['MATCH', 51, '~', 'binary', null, 'num'],
['GE', 60, '>=', 'binary', null, 'num'],
['GT', 61, '>', 'binary', null, 'num'],
['EQ', 62, '==', 'binary', null, 'num'],
['NE', 63, '!=', 'binary', null, 'num'],
['LE', 64, '<=', 'binary', null, 'num'],
['LT', 65, '<', 'binary', null, 'num'],
['CONCAT', 70, null, null, 'left', 'string'],
['SUB', 80, '-', 'binary', 'left', 'num'],
['ADD', 81, '+', 'binary', 'left', 'num'],
['MOD', 82, '%', 'binary', 'left', 'num'],
['DIV', 83, '/', 'binary', 'left', 'num'],
['MULT', 84, '*', 'binary', 'left', 'num'],
['UNARYMINUS', 90, '-', 'lunary', null, 'num'],
['UNARYPLUS', 91, '+', 'lunary', null, 'num'],
['LOGNOT', 100, '!', 'binary', null, 'num'],
['EXPONENT', 110, '^', 'binary', 'right', 'num'],
['POSTDEC', 120, '--', 'runary', null, 'num'],
['POSTINC', 121, '++', 'runary', null, 'num'],
['PREDEC', 122, '--', 'lunary', null, 'num'],
['PREINC', 123, '++', 'lunary', null, 'num'],
['FIELD', 130, '$', null, null, 'string'],
['GROUP', 1000, null, null, null, null],
].map(x => (
{ name: x[0], prec: x[1], symbol: x[2], shape: x[3], assoc: x[4], type: x[5] }
))
let assign_ops = operator_table.filter(x => x.shape == "assign")
let binary_ops = operator_table.filter(x => x.shape == "binary")
let left_unary_ops = operator_table.filter(x => x.shape == "lunary")
let right_unary_ops = operator_table.filter(x => x.shape == "runary")
// Let the grammar begin!
module.exports = grammar({
name: 'awk',
extras: $ => [
/[\t ]/, // tabs and spaces
"\\\n", // line continuation
$.comment,
],
rules: {
program: $ => seq(
NLOPT,
repeat(prec.left(seq($.item, NLOPT))),
),
// An 'item' is, conceptually, a pair of a pattern and an action.
// The pattern may be omitted, in which case the item matches any input line
// The action may be omitted if the pattern is not, in which case the pattern is `{print}`
item: $ => prec.right(choice(
seq($.pattern, $.action),
$.pattern,
$.action,
)),
// A pattern describes when to execute a item
// It can be a keyword, a regex, an expression, or two of those things.
pattern: $ => choice(
'BEGIN',
'END',
$._expr,
seq($._expr, ",", $._expr),
),
// An 'action' is the part of an item that describes what to execute.
// It is always enclosed by curly braces and may contain any number of statements, separated by semicolons. The terminating semicolon is optional.
action: $ => seq(
'{',
NLOPT,
list($._statement, $._terminator),
NLOPT,
'}'),
_terminator: $ => prec(1, choice(
';',
'\n',
prec.left(seq($._terminator, $._terminator))
)),
_statement: $ => choice(
$.print_statement,
$.assignment_statement,
$._expr,
),
assignment_statement: $ => choice(
...assign_ops.map(op => prec(op.prec, seq(
$._lvalue,
op.symbol,
$._expr
)))
),
_lvalue: $ => choice(
$.variable,
$.field,
),
variable: $ => /[A-Za-z][A-Za-z0-9_]*/,
print_statement: $ => seq("print", $.print_expr_list),
printf_statement: $ => seq(
"printf",
//field("format", $._expr),
$._expr,
$.print_expr_list,
),
print_expr_list: $ => list($._expr, optional(',')),
_expr: $ => choice(
$.paren_expr,
$.unary_expr,
$.binary_expr,
$.string_literal,
$.num,
$.variable,
$.regex,
),
num: $ => choice(
$._integer,
$._decimal,
$._scientific_num,
),
paren_expr: $ => prec(1000, seq( '(', $._expr, ')' )),
_integer: $ => /[0-9]+/,
_decimal: $ => /[0-9]*\.[0-9]+/,
_scientific_num: $ => /[0-9]*\.?[0-9]+[+-]?[eE]-?[0-9]*\.?[0-9]*/,
binary_expr: $ => choice(
...binary_ops.map(op => prec.left(op.prec, seq(
$._expr,
op.symbol,
$._expr )))
),
// A regex starts with a slash, then terminates at the first non-escaped slash
regex: $ => seq('/', '/.*[^\\]/', '/'),
unary_expr: $ => choice(
$.field,
...left_unary_ops.map(op => prec(op.prec, seq(
op.symbol, $._expr
)))
),
field: $ => seq('$', choice(
$._expr,
)),
// Taken from tree-sitter-java
string_literal: $ => seq(
'"',
repeat(
choice(
/[^\\"\n]/,
/\\(.|\n)/,
)),
'"'),
// A comment starts with a # anywhere in a line, and continues to the end of the line.
comment: $ => token(seq('#', /.*/, "\n")),
}
});
BEGIN { print "Hello, World" }
[
"BEGIN"
"END"
; Future keywords
; "break"
; "continue"
; "delete"
; "do"
; "else"
; "exit"
; "for"
; "function"
; "if"
; "in"
; "next"
"print"
; "printf"
; "return"
; "while"
] @keyword
(string_literal) @string
(num) @number
(variable) @variable
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment