Source code for the blog post Abstract Syntax Tree: an Example in C.
Last active
May 31, 2024 09:09
-
-
Save keleshev/6efbf2fc521b2f0797decb19c6932ecc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
typedef struct AST AST; // Forward reference | |
struct AST { | |
enum { | |
AST_MAIN, | |
AST_NUMBER, | |
AST_ADD, | |
AST_MUL, | |
} tag; | |
union { | |
struct AST_MAIN { AST *body; } AST_MAIN; | |
struct AST_NUMBER { int number; } AST_NUMBER; | |
struct AST_ADD { AST *left; AST *right; } AST_ADD; | |
struct AST_MUL { AST *left; AST *right; } AST_MUL; | |
} data; | |
}; | |
AST *ast_new(AST ast) { | |
AST *ptr = malloc(sizeof(AST)); | |
if (ptr) *ptr = ast; | |
return ptr; | |
} | |
void ast_free(AST *ptr) { | |
AST ast = *ptr; | |
switch (ast.tag) { | |
case AST_MAIN: { | |
struct AST_MAIN data = ast.data.AST_MAIN; | |
ast_free(data.body); | |
break; | |
} | |
case AST_NUMBER: { | |
struct AST_NUMBER data = ast.data.AST_NUMBER; | |
break; | |
} | |
case AST_ADD: { | |
struct AST_ADD data = ast.data.AST_ADD; | |
ast_free(data.left); | |
ast_free(data.right); | |
break; | |
} | |
case AST_MUL: { | |
struct AST_MUL data = ast.data.AST_MUL; | |
ast_free(data.left); | |
ast_free(data.right); | |
break; | |
} | |
} | |
free(ptr); | |
} | |
#define AST_NEW(tag, ...) ast_new((AST){tag, {.tag=(struct tag){__VA_ARGS__}}}) | |
void ast_print(AST *ptr) { | |
AST ast = *ptr; | |
switch (ast.tag) { | |
case AST_MAIN: { | |
struct AST_MAIN data = ast.data.AST_MAIN; | |
printf("main() = "); | |
ast_print(data.body); | |
return; | |
} | |
case AST_NUMBER: { | |
struct AST_NUMBER data = ast.data.AST_NUMBER; | |
printf("%d", data.number); | |
return; | |
} | |
case AST_ADD: { | |
struct AST_ADD data = ast.data.AST_ADD; | |
printf("("); | |
ast_print(data.left); | |
printf(" + "); | |
ast_print(data.right); | |
printf(")"); | |
return; | |
} | |
case AST_MUL: { | |
struct AST_MUL data = ast.data.AST_MUL; | |
printf("("); | |
ast_print(data.left); | |
printf(" * "); | |
ast_print(data.right); | |
printf(")"); | |
return; | |
} | |
} | |
} | |
#define emitf printf | |
void ast_emit(AST *ptr) { | |
AST ast = *ptr; | |
switch (ast.tag) { | |
case AST_MAIN: { | |
struct AST_MAIN data = ast.data.AST_MAIN; | |
emitf(".global _main\n"); | |
emitf("_main:\n"); | |
ast_emit(data.body); | |
emitf(" ret\n"); | |
emitf("\n"); | |
return; | |
} | |
case AST_NUMBER: { | |
struct AST_NUMBER data = ast.data.AST_NUMBER; | |
emitf(" mov rax, %d\n", data.number); | |
return; | |
} | |
case AST_ADD: { | |
struct AST_ADD data = ast.data.AST_ADD; | |
ast_emit(data.left); | |
emitf(" push rax\n"); | |
ast_emit(data.right); | |
emitf(" pop rbx\n"); | |
emitf(" add rax, rbx\n"); | |
return; | |
} | |
case AST_MUL: { | |
struct AST_MUL data = ast.data.AST_MUL; | |
ast_emit(data.left); | |
emitf(" push rax\n"); | |
ast_emit(data.right); | |
emitf(" pop rbx\n"); | |
emitf(" mul rbx\n"); | |
return; | |
} | |
} | |
} | |
int main() { | |
// main() = 4 + 2 * 10 + 3 * (5 + 1) | |
AST *term = AST_NEW(AST_MAIN, | |
AST_NEW(AST_ADD, | |
AST_NEW(AST_NUMBER, 4), | |
AST_NEW(AST_ADD, | |
AST_NEW(AST_MUL, | |
AST_NEW(AST_NUMBER, 2), | |
AST_NEW(AST_NUMBER, 10), | |
), | |
AST_NEW(AST_MUL, | |
AST_NEW(AST_NUMBER, 3), | |
AST_NEW(AST_ADD, | |
AST_NEW(AST_NUMBER, 5), | |
AST_NEW(AST_NUMBER, 1), | |
), | |
), | |
), | |
), | |
); | |
printf("/* "); ast_print(term); printf(" */\n"); | |
ast_emit(term); | |
ast_free(term); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type ast = | |
| Main of ast | |
| Number of int | |
| Add of ast * ast | |
| Mul of ast * ast | |
let emitf x = Printf.kprintf print_endline x | |
let rec emit = function | |
| Main body -> | |
emitf ".global _main"; | |
emitf "_main:"; | |
emit body; | |
emitf " ret" | |
| Number n -> | |
emitf " mov rax, %d" n | |
| Add (left, right) -> | |
emit left; | |
emitf " push rax"; | |
emit right; | |
emitf " pop rbx"; | |
emitf " add rax, rbx" | |
| Mul (left, right) -> | |
emit left; | |
emitf " push rax"; | |
emit right; | |
emitf " pop rbx"; | |
emitf " mul rbx" | |
let () = | |
(* main() = 4 + 2 * 10 + 3 * (5 + 1) *) | |
let term = | |
Main ( | |
Add ( | |
Number 4, | |
Add ( | |
Mul (Number 2, Number 10), | |
Mul (Number 3, Add (Number 5, Number 1)) | |
) | |
) | |
) | |
in | |
emit term | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ast.exe: ast.c | |
gcc -Werror -Wswitch -Wimplicit-fallthrough ast.c -o ast.exe | |
clean: | |
rm -f *.o *.exe *.s a.out | |
test.s: ast.exe | |
./ast.exe > test.s | |
test.exe: test.s | |
gcc test.s -masm=intel -o test.exe | |
.PHONY: test | |
test: test.exe | |
./test.exe |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment