Created
August 3, 2021 23:28
-
-
Save fwip/562ff2ed84de28cc252a0bc457c8d152 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Helper functions | |
// A list consists of any number of 'rule', separated by separator, and the last one is optional. | |
const list = (rule, separator) => seq(rule, repeat(seq(separator, rule))) | |
const NL = choice("\r\n", "\r", "\n") | |
const NLOPT = prec(-1, repeat(NL)) | |
// in ascending order of precedence | |
const operator_table = [ | |
// Name, Prec, symbol, unary/binary, associativity, result-type | |
['ASSIGN', 10, '=', 'assign', 'right', null], | |
['ASSIGNSUB', 11, '-=', 'assign', 'right', 'num'], | |
['ASSIGNADD', 12, '+=', 'assign', 'right', 'num'], | |
['ASSIGNDIV', 13, '/=', 'assign', 'right', 'num'], | |
['ASSIGNMULT', 14, '*=', 'assign', 'right', 'num'], | |
['ASSIGNMOD', 15, '%=', 'assign', 'right', 'num'], | |
['ASSIGNEXP', 16, '^=', 'assign', 'right', 'num'], | |
['TERNARY', 20, null, null, 'right', null], | |
['LOGOR', 30, '||', 'binary', 'left', 'num'], | |
['LOGAND', 31, '&&', 'binary', 'left', 'num'], | |
['INMULTIARRAY', 40, null, null, 'left', 'num'], | |
['INARRAY', 41, null, null, 'left', 'num'], | |
['NOMATCH', 50, '!~', 'binary', null, 'num'], | |
['MATCH', 51, '~', 'binary', null, 'num'], | |
['GE', 60, '>=', 'binary', null, 'num'], | |
['GT', 61, '>', 'binary', null, 'num'], | |
['EQ', 62, '==', 'binary', null, 'num'], | |
['NE', 63, '!=', 'binary', null, 'num'], | |
['LE', 64, '<=', 'binary', null, 'num'], | |
['LT', 65, '<', 'binary', null, 'num'], | |
['CONCAT', 70, null, null, 'left', 'string'], | |
['SUB', 80, '-', 'binary', 'left', 'num'], | |
['ADD', 81, '+', 'binary', 'left', 'num'], | |
['MOD', 82, '%', 'binary', 'left', 'num'], | |
['DIV', 83, '/', 'binary', 'left', 'num'], | |
['MULT', 84, '*', 'binary', 'left', 'num'], | |
['UNARYMINUS', 90, '-', 'lunary', null, 'num'], | |
['UNARYPLUS', 91, '+', 'lunary', null, 'num'], | |
['LOGNOT', 100, '!', 'binary', null, 'num'], | |
['EXPONENT', 110, '^', 'binary', 'right', 'num'], | |
['POSTDEC', 120, '--', 'runary', null, 'num'], | |
['POSTINC', 121, '++', 'runary', null, 'num'], | |
['PREDEC', 122, '--', 'lunary', null, 'num'], | |
['PREINC', 123, '++', 'lunary', null, 'num'], | |
['FIELD', 130, '$', null, null, 'string'], | |
['GROUP', 1000, null, null, null, null], | |
].map(x => ( | |
{ name: x[0], prec: x[1], symbol: x[2], shape: x[3], assoc: x[4], type: x[5] } | |
)) | |
let assign_ops = operator_table.filter(x => x.shape == "assign") | |
let binary_ops = operator_table.filter(x => x.shape == "binary") | |
let left_unary_ops = operator_table.filter(x => x.shape == "lunary") | |
let right_unary_ops = operator_table.filter(x => x.shape == "runary") | |
// Let the grammar begin! | |
module.exports = grammar({ | |
name: 'awk', | |
extras: $ => [ | |
/[\t ]/, // tabs and spaces | |
"\\\n", // line continuation | |
$.comment, | |
], | |
rules: { | |
program: $ => seq( | |
NLOPT, | |
repeat(prec.left(seq($.item, NLOPT))), | |
), | |
// An 'item' is, conceptually, a pair of a pattern and an action. | |
// The pattern may be omitted, in which case the item matches any input line | |
// The action may be omitted if the pattern is not, in which case the pattern is `{print}` | |
item: $ => prec.right(choice( | |
seq($.pattern, $.action), | |
$.pattern, | |
$.action, | |
)), | |
// A pattern describes when to execute a item | |
// It can be a keyword, a regex, an expression, or two of those things. | |
pattern: $ => choice( | |
'BEGIN', | |
'END', | |
$._expr, | |
seq($._expr, ",", $._expr), | |
), | |
// An 'action' is the part of an item that describes what to execute. | |
// It is always enclosed by curly braces and may contain any number of statements, separated by semicolons. The terminating semicolon is optional. | |
action: $ => seq( | |
'{', | |
NLOPT, | |
list($._statement, $._terminator), | |
NLOPT, | |
'}'), | |
_terminator: $ => prec(1, choice( | |
';', | |
'\n', | |
prec.left(seq($._terminator, $._terminator)) | |
)), | |
_statement: $ => choice( | |
$.print_statement, | |
$.assignment_statement, | |
$._expr, | |
), | |
assignment_statement: $ => choice( | |
...assign_ops.map(op => prec(op.prec, seq( | |
$._lvalue, | |
op.symbol, | |
$._expr | |
))) | |
), | |
_lvalue: $ => choice( | |
$.variable, | |
$.field, | |
), | |
variable: $ => /[A-Za-z][A-Za-z0-9_]*/, | |
print_statement: $ => seq("print", $.print_expr_list), | |
printf_statement: $ => seq( | |
"printf", | |
//field("format", $._expr), | |
$._expr, | |
$.print_expr_list, | |
), | |
print_expr_list: $ => list($._expr, optional(',')), | |
_expr: $ => choice( | |
$.paren_expr, | |
$.unary_expr, | |
$.binary_expr, | |
$.string_literal, | |
$.num, | |
$.variable, | |
$.regex, | |
), | |
num: $ => choice( | |
$._integer, | |
$._decimal, | |
$._scientific_num, | |
), | |
paren_expr: $ => prec(1000, seq( '(', $._expr, ')' )), | |
_integer: $ => /[0-9]+/, | |
_decimal: $ => /[0-9]*\.[0-9]+/, | |
_scientific_num: $ => /[0-9]*\.?[0-9]+[+-]?[eE]-?[0-9]*\.?[0-9]*/, | |
binary_expr: $ => choice( | |
...binary_ops.map(op => prec.left(op.prec, seq( | |
$._expr, | |
op.symbol, | |
$._expr ))) | |
), | |
// A regex starts with a slash, then terminates at the first non-escaped slash | |
regex: $ => seq('/', '/.*[^\\]/', '/'), | |
unary_expr: $ => choice( | |
$.field, | |
...left_unary_ops.map(op => prec(op.prec, seq( | |
op.symbol, $._expr | |
))) | |
), | |
field: $ => seq('$', choice( | |
$._expr, | |
)), | |
// Taken from tree-sitter-java | |
string_literal: $ => seq( | |
'"', | |
repeat( | |
choice( | |
/[^\\"\n]/, | |
/\\(.|\n)/, | |
)), | |
'"'), | |
// A comment starts with a # anywhere in a line, and continues to the end of the line. | |
comment: $ => token(seq('#', /.*/, "\n")), | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEGIN { print "Hello, World" } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
"BEGIN" | |
"END" | |
; Future keywords | |
; "break" | |
; "continue" | |
; "delete" | |
; "do" | |
; "else" | |
; "exit" | |
; "for" | |
; "function" | |
; "if" | |
; "in" | |
; "next" | |
"print" | |
; "printf" | |
; "return" | |
; "while" | |
] @keyword | |
(string_literal) @string | |
(num) @number | |
(variable) @variable |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment