Skip to content

Instantly share code, notes, and snippets.

@maxbrunsfeld
Last active May 9, 2019 23:54
Show Gist options
  • Save maxbrunsfeld/f56db205f993dc7bc71e8cad2fd5e120 to your computer and use it in GitHub Desktop.
Save maxbrunsfeld/f56db205f993dc7bc71e8cad2fd5e120 to your computer and use it in GitHub Desktop.
Converting raw Tree-sitter syntax tree to a strongly typed structure (TypeScript example)
root {
"name": {
"text": "foo"
},
"parameters": {
"type": "parameters"
},
"body": {
"statements": [
{
"condition": {
"text": "a"
},
"consequence": {
"statements": [
{
"type": "print_statement",
"argument": [
{
"text": "b"
}
]
},
{
"type": "print_statement",
"argument": [
{
"text": "c"
}
]
}
]
},
"alternatives": [
{
"condition": [
{
"text": "d"
}
],
"consequence": [
{
"statements": [
{
"type": "print_statement",
"argument": [
{
"text": "e"
}
]
}
]
}
]
},
{
"condition": [
{
"text": "g"
}
],
"consequence": [
{
"statements": [
{
"type": "print_statement",
"argument": [
{
"text": "h"
}
]
}
]
}
]
},
{
"condition": [
{
"text": "g"
}
],
"consequence": [
{
"statements": [
{
"type": "print_statement",
"argument": [
{
"text": "h"
}
]
}
]
}
]
}
]
}
]
}
}
// Hand-written code (not auto-generated)
function parseToStaticNode(sourceCode: String, language) {
// Get the raw syntax tree.
const parser = new Parser();
parser.setLanguage(Python);
const tree = parser.parse(sourceCode);
const cursor = tree.walk();
// Walk the syntax tree depth first, keeping a stack of field maps
const fieldMapStack = [];
let visitedChildren = false;
while (true) {
// Each node is visited twice:
// once on the way down, and once on the way up.
const node = cursor.currentNode;
// On the way up, all of the node's fields will have been collected
// into the field map.
if (visitedChildren) {
// Convert the field map into a strongly-typed node using the
// generated conversion function.
const typedNode = convertFieldMapToTypedNode(node, fieldMapStack.pop());
// Insert the strongly-typed node into it's parent field map.
const parentFieldMap = fieldMapStack[fieldMapStack.length - 1];
if (!parentFieldMap) return typedNode;
const fieldName = cursor.currentFieldName;
if (fieldName) {
if (!parentFieldMap[fieldName]) parentFieldMap[fieldName] = [];
parentFieldMap[fieldName].push(typedNode);
}
// If there is a next sibling, move forward in the tree.
if (cursor.gotoNextSibling()) {
fieldMapStack.push({type: cursor.currentNode.type});
visitedChildren = false;
}
// If there is a parent, move up in the tree, otherwise terminate.
else if (!cursor.gotoParent()) {
return typedNode
}
}
// On the way down, push a new field map onto the stack.
else if (cursor.gotoFirstChild()) {
fieldMapStack.push({type: cursor.currentNode.type});
} else {
visitedChildren = true;
}
}
}
// Auto-generated data types
class ParametersNode {
constructor () {
// TODO - add fields for parameters
}
}
class IdentifierNode {
text: String
constructor (text) {
this.text = text
}
}
class PrintStatementNode {
arguments: [IdentifierNode]
constructor (args) {
this.arguments = args
}
}
class IfStatementNode {
condition: IdentifierNode
consequence: BlockNode
alternatives: [ElifClause]
constructor (condition, consequence, alternatives) {
this.condition = condition
this.consequence = consequence
this.alternatives = alternatives
}
}
class FunctionDefinition {
name: IdentifierNode
parameters: ParametersNode
body: BlockNode
constructor (name, parameters, body) {
this.name = name
this.parameters = parameters
this.body = body
}
}
class BlockNode {
statements: [IfStatementNode | PrintStatementNode]
constructor (statements) {
this.statements = statements;
}
}
class ElifClause {
condition: IdentifierNode
consequence: BlockNode
constructor (condition, consequence) {
this.condition = condition;
this.consequence = consequence;
}
}
// Auto-generated conversion function
function convertFieldMapToTypedNode(node, fieldMap) {
switch (fieldMap.type) {
case 'if_statement':
return new IfStatementNode(
fieldMap.condition[0],
fieldMap.consequence[0],
fieldMap.alternative
);
case 'function_definition':
return new FunctionDefinition(
fieldMap.name[0],
fieldMap.parameters[0],
fieldMap.body[0]
);
case 'block':
return new BlockNode(
fieldMap.statement,
);
case 'elif_clause':
return new ElifClause(
fieldMap.condition,
fieldMap.consequence,
);
case 'identifier':
return new IdentifierNode(node.text);
default:
return fieldMap;
}
}
// Test program
const fs = require('fs');
const Parser = require('.');
const Python = require('../tree-sitter-python');
const filePath = process.argv[2];
const sourceCode = fs.readFileSync(filePath, 'utf8');
const root = parseToStaticNode(sourceCode, Python);
console.log('root', JSON.stringify(root, null, 2))
def foo(bar, baz):
if a:
print b
print c
elif d:
print e
elif g:
print h
elif g:
print h
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment