Skip to content

Instantly share code, notes, and snippets.

@conartist6
Last active October 8, 2023 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save conartist6/877fdb0ff87c0170a1bd2f8a51dede4b to your computer and use it in GitHub Desktop.
Save conartist6/877fdb0ff87c0170a1bd2f8a51dede4b to your computer and use it in GitHub Desktop.
Annotated agAST builder example
// Node objects are immutable
// Also immutable are: properties, attributes, children, terminals, and any arrays
// Immutable trees can be cached as valid with regard to a particular grammar!
const freeze = (node) => Object.freeze(Object.seal(node));
// Helpers to make the following code less verbose
let t = {
token: (type, str, attributes) => t.node(type, [t.str([str])], {}, attributes),
node: (type, children, properties, attributes) =>
freeze({
...type,
children: freeze(children),
properties: freeze(properties),
attributes: freeze(attributes),
}),
id: ([str]) => {
const { 0: language, 1: production } = str.split(':');
return { language, production };
},
trivia: ([str]) => freeze({ type: 'Trivia', value: str }),
str: ([str]) => freeze({ type: 'String', value: str }),
ref: ([property]) => freeze({ type: 'Reference', value: property }),
gap: ([property]) => freeze({ type: 'Gap', value: property }),
};
// This tree is an example of agAST (A General Abstract Syntax Tree)
// The structure is meant to be useful in describing code written in any language
// The tree represents the input:
// eat( /\w/ )
let tree = t.node(
// ID specifies the language and production type
// The language name will be resolvable to some well-known URL
// The URL should point to a validator for the language
// CSTML syntax allows omission of language name when it is implied
t.id`Instruction:Call`,
// The children array creates a total ordering of all nodes and tokens
// It ensures that any document can be printed without needing a grammar
[
// A reference child looks up a key in properties
t.ref`verb`,
// It should also be possible to reverse a terminal -> node relationship
// e.g. a linter rule might want to do:
// - let node = properties.open
// - let term = getTerminal(node)
// - getTrailingTrivia(term)
t.ref`open`,
t.trivia` `, // The linter would find this trivia
t.ref`argument`,
// Trivia has no metadata, and so can be a terminal
// Comments are considered a separate language embedded onto trivia!
t.trivia` `,
t.ref`close`,
],
// The properties object allows for fast named lookups
{
verb: t.token(t.id`Instruction:Identifier`, 'eat'),
open: t.token(t.id`Instruction:Punctuator`, '('),
argument: t.node(
t.id`Spamex:RegexMatcher`,
[t.ref`open`, t.ref`[alternatives]`, t.ref`close`, t.ref`flags`],
{
open: t.token(t.id`Spamex:Punctuator`, '/'),
alternatives: [
t.node(t.id`Regex:Alternative`, [t.ref`[elements]`], {
elements: [
t.node(
t.id`Regex:CharacterSet`,
[t.ref`escape`, t.ref`value`],
{
escape: t.token(t.id`Regex:Punctuator`, '\\'),
value: t.token(t.id`Regex:Keyword`, 'w'),
},
{ kind: 'word' },
),
],
}),
],
close: t.token(t.id`Spamex:Punctuator`, '/'),
flags: t.node(t.id`Regex:Flags`),
},
),
close: t.token(t.id`Instruction:Punctuator`, ')'),
},
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment