Skip to content

Instantly share code, notes, and snippets.

@jstimpfle
Created January 26, 2021 18:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jstimpfle/f9d534723955fd05ff5a19942c69960d to your computer and use it in GitHub Desktop.
Save jstimpfle/f9d534723955fd05ff5a19942c69960d to your computer and use it in GitHub Desktop.
A little stack machine, done in a few hours
#include <assert.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#define NORETURN __attribute__((noreturn))
#define ARRAY_LENGTH(a) ((int) (sizeof (a) / sizeof (a)[0]))
void msg_fv(const char *fmt, va_list ap)
{
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
}
void msg_f(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
msg_fv(fmt, ap);
va_end(ap);
}
void NORETURN fatal_fv(const char *fmt, va_list ap)
{
msg_fv(fmt, ap);
abort();
}
void NORETURN fatal_f(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
msg_fv(fmt, ap);
va_end(ap);
abort();
}
void *_xcalloc(size_t num, size_t size)
{
void *p = calloc(num, size);
if (!p)
{
fatal_f("OOM!");
}
return p;
}
#define xcalloc(num, type) ((type *) _xcalloc((num), sizeof (type)))
enum
{
INSTR_DEBUG,
INSTR_CONST,
INSTR_LOAD,
INSTR_STORE,
INSTR_OP,
INSTR_JMP,
INSTR_JMPIF,
};
enum
{
OP_ADD,
OP_SUB,
OP_CMP,
};
enum
{
CMP_EQ,
CMP_NE,
CMP_LT,
CMP_LE,
CMP_GT,
CMP_GE,
};
enum
{
OBJECT_INT,
OBJECT_PTR,
OBJECT_ARRAY,
};
typedef struct _Instr Instr;
typedef struct _Object Object;
typedef struct _ExecCtx ExecCtx;
typedef struct _Frame Frame;
struct _Instr
{
int kind;
union {
int t_op;
int t_cmp;
int64_t t_int; // which cases?
struct { int64_t pc; } t_jmp;
struct { int64_t pc; int kind; } t_jmpif;
};
};
struct _Object
{
int kind;
union
{
int64_t t_int;
Object *t_ptr;
};
};
struct _Frame
{
int64_t pc;
int64_t stack_size;
};
struct _ExecCtx
{
Instr instrs[64];
int num_instrs;
Object stack[64];
Frame frames[64];
int num_frames;
};
Frame *get_frame(ExecCtx *ctx)
{
return ctx->frames + ctx->num_frames - 1;
}
void push_frame(ExecCtx *ctx)
{
ctx->frames[ctx->num_frames] = ctx->frames[ctx->num_frames - 1];
++ ctx->num_frames;
}
void pop_frame(ExecCtx *ctx)
{
-- ctx->num_frames;
}
void push_object(ExecCtx *ctx, Object object)
{
Frame *frame = get_frame(ctx);
ctx->stack[frame->stack_size] = object;
++ frame->stack_size;
}
Object pop_object(ExecCtx *ctx)
{
Frame *frame = get_frame(ctx);
assert(frame->stack_size >= 1);
-- frame->stack_size;
return ctx->stack[frame->stack_size];
}
void push_int(ExecCtx *ctx, int64_t val)
{
Object object = {0};
object.kind = OBJECT_INT;
object.t_int = val;
push_object(ctx, object);
}
void exec_op(ExecCtx *ctx, int op)
{
Object b = pop_object(ctx);
Object a = pop_object(ctx);
if (op == OP_ADD)
{
assert(a.kind == OBJECT_INT);
assert(b.kind == OBJECT_INT);
Object result = {0};
result.kind = OBJECT_INT;
result.t_int = a.t_int + b.t_int;
push_object(ctx, result);
}
else if (op == OP_SUB)
{
assert(a.kind == OBJECT_INT);
assert(b.kind == OBJECT_INT);
Object result = {0};
result.kind = OBJECT_INT;
result.t_int = a.t_int - b.t_int;
push_object(ctx, result);
}
}
void print_stack(ExecCtx *ctx)
{
Frame *frame = get_frame(ctx);
printf("Frame: ----------\n");
for (int i = 0; i < frame->stack_size; i++)
{
Object *a = ctx->stack + i;
if (a->kind == OBJECT_INT)
{
printf("(int) %" PRIi64, a->t_int);
}
else
{
printf("(not implemented)");
}
printf("\n");
}
}
void interp(ExecCtx *ctx)
{
for (;;)
{
Frame *frame = get_frame(ctx);
Instr *instr = ctx->instrs + frame->pc;
if (frame->pc == ctx->num_instrs)
{
printf("Bye.\n");
return;
}
if (instr->kind == INSTR_CONST)
{
push_int(ctx, instr->t_int);
++ frame->pc;
}
else if (instr->kind == INSTR_OP)
{
exec_op(ctx, instr->t_op);
++ frame->pc;
}
else if (instr->kind == INSTR_JMP)
{
frame->pc = instr->t_jmp.pc;
}
else if (instr->kind == INSTR_JMPIF)
{
Object object = pop_object(ctx);
assert(object.kind == OBJECT_INT);
int64_t val = object.t_int;
int t_cmp = instr->t_jmpif.kind;
int ok = 0;
if (t_cmp == CMP_EQ && val == 0)
ok = 1;
else if (t_cmp == CMP_NE && val != 0)
ok = 1;
else if (t_cmp == CMP_LT && val < 0)
ok = 1;
else if (t_cmp == CMP_LE && val <= 0)
ok = 1;
else if (t_cmp == CMP_GT && val > 0)
ok = 1;
else if (t_cmp == CMP_GE && val >= 0)
ok = 1;
if (ok)
{
int64_t pc = instr->t_jmpif.pc;
assert(0 <= pc && pc < ctx->num_instrs);
frame->pc = instr->t_jmpif.pc;
}
else
{
++ frame->pc;
}
}
else if (instr->kind == INSTR_DEBUG)
{
print_stack(ctx);
++ frame->pc;
}
}
}
void push_instr(ExecCtx *ctx, Instr instr)
{
assert(ctx->num_instrs < ARRAY_LENGTH(ctx->instrs));
ctx->instrs[ctx->num_instrs] = instr;
++ ctx->num_instrs;
}
#define ALL_TOKENS \
DEFINE_TOKEN(T_intlit) \
DEFINE_TOKEN(T_name) \
DEFINE_TOKEN(T_plus) \
DEFINE_TOKEN(T_minus) \
DEFINE_TOKEN(T_if) \
DEFINE_TOKEN(T_braceleft) \
DEFINE_TOKEN(T_braceright) \
DEFINE_TOKEN(T_bracketleft) \
DEFINE_TOKEN(T_bracketright) \
DEFINE_TOKEN(T_parenleft) \
DEFINE_TOKEN(T_parenright) \
DEFINE_TOKEN(T_semicolon) \
DEFINE_TOKEN(T_comma) \
DEFINE_TOKEN(T_dot) \
DEFINE_TOKEN(T_debug)
#define ALL_STMTS \
DEFINE_STMT(STMT_if) \
DEFINE_STMT(STMT_expr)
#define ALL_EXPRS \
DEFINE_EXPR(EXPR_const) \
DEFINE_EXPR(EXPR_binop) \
DEFINE_EXPR(EXPR_call) \
DEFINE_EXPR(EXPR_name)
enum
{
#define DEFINE_TOKEN(x) x,
ALL_TOKENS
#undef DEFINE_TOKEN
NUM_TOKEN_KINDS,
};
enum
{
#define DEFINE_EXPR(x) x,
ALL_EXPRS
#undef DEFINE_EXPR
NUM_EXPR_KINDS,
};
enum
{
#define DEFINE_STMT(x) x,
ALL_STMTS
#undef DEFINE_STMT
NUM_STMT_KINDS,
};
const char *const token_kind_string[] = {
#define DEFINE_TOKEN(x) #x,
ALL_TOKENS
#undef DEFINE_TOKEN
};
const char *const expr_kind_string[] = {
#define DEFINE_EXPR(x) #x,
ALL_EXPRS
#undef DEFINE_EXPR
};
const char *const stmt_kind_string[] = {
#define DEFINE_STMT(x) #x,
ALL_STMTS
#undef DEFINE_STMT
};
typedef struct _Token Token;
struct _Token
{
int kind;
union
{
char t_name[32];
int64_t t_intlit;
};
};
typedef struct _Expr Expr;
struct _Expr
{
int kind;
union
{
Token t_token;
struct
{
int opkind; // for now a T_? enum
Expr *left;
Expr *right;
} t_binop;
struct
{
Expr *callee_expr;
Expr *arg_expr;
} t_call;
};
};
typedef struct _Stmt Stmt;
struct _Stmt
{
int kind;
union
{
struct
{
Expr *cond_expr;
Stmt *body_stmt;
Stmt *else_stmt;
} t_if;
Expr *t_expr;
};
};
Expr *alloc_expr(void)
{
return xcalloc(1, Expr);
}
Stmt *alloc_stmt(void)
{
return xcalloc(1, Stmt);
}
typedef struct _Parse Parse;
struct _Parse
{
const char *buffer;
int size;
int cursor;
int have_token;
Token token;
};
void NORETURN parse_error_f(Parse *p, const char *fmt, ...)
{
fprintf(stderr, "At %d: ", p->cursor);
va_list ap;
va_start(ap, fmt);
fatal_fv(fmt, ap);
va_end(ap);//XXX
}
int look_char(Parse *p)
{
if (p->cursor == p->size)
{
return -1;
}
return p->buffer[p->cursor];
}
void consume_char(Parse *p)
{
assert(p->cursor < p->size);
++ p->cursor;
}
int look_token(Parse *p)
{
if (p->have_token)
{
return 1;
}
Token *t = &p->token;
int c = look_char(p);
if (c == -1)
{
return 0;
}
while (c <= 32)
{
consume_char(p);
c = look_char(p);
if (c == -1)
{
return 0;
}
}
if (('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| (c == '_'))
{
int i = 0;
for (;;)
{
t->t_name[i] = c;
++ i;
consume_char(p);
c = look_char(p);
if (c == -1)
{
break;
}
if (!('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| (c == '_'))
{
break;
}
}
t->t_name[i] = 0;
if (!strcmp(t->t_name, "if"))
{
t->kind = T_if;
}
else
{
t->kind = T_name;
}
p->have_token = 1;
}
else if ('0' <= c && c <= '9')
{
int64_t n = c - '0';
for (;;)
{
consume_char(p);
c = look_char(p);
if (c == -1)
{
break;
}
if (c < '0' || c > '9')
{
break;
}
n = 10 * n + c - '0';
}
t->t_intlit = n;
t->kind = T_intlit;
p->have_token = 1;
}
#define CHAR_TOKEN(ch, tkind) \
else if (c == ch) \
{ \
consume_char(p); \
t->kind = tkind; \
p->have_token = 1; \
}
CHAR_TOKEN('+', T_plus)
CHAR_TOKEN('-', T_minus)
CHAR_TOKEN('{', T_braceleft)
CHAR_TOKEN('}', T_braceright)
CHAR_TOKEN('(', T_parenleft)
CHAR_TOKEN(')', T_parenright)
CHAR_TOKEN('[', T_bracketleft)
CHAR_TOKEN(']', T_bracketright)
CHAR_TOKEN(';', T_semicolon)
CHAR_TOKEN(',', T_comma)
CHAR_TOKEN('.', T_dot)
else
{
// TODO
}
return p->have_token;
}
void consume_token(Parse *p)
{
assert(p->have_token);
p->have_token = 0;
}
void print_token(Token *t)
{
if (t->kind == T_intlit)
{
printf("(int) %" PRIi64, t->t_intlit);
}
else if (t->kind == T_name)
{
printf("(name) %s", t->t_name);
}
else if (t->kind == T_plus)
{
printf("(plus) +");
}
else if (t->kind == T_minus)
{
printf("(minus) -");
}
else if (t->kind == T_if)
{
printf("(if) if");
}
else if (t->kind == T_debug)
{
printf("(debug) debug");
}
else
{
printf("(unknown)");
}
printf("\n");
}
void expect_token(Parse *p, int kind)
{
if (!look_token(p))
{
parse_error_f(p, "expected %s token, found end of file",
token_kind_string[kind]);
}
Token *t = &p->token;
if (t->kind != kind)
{
parse_error_f(p, "expected %s token, found %s token.",
token_kind_string[kind],
token_kind_string[t->kind]);
}
consume_token(p);
}
Expr *parse_expr(Parse *p)
{
if (!look_token(p))
{
fatal_f("Expected expression, found end of file");
}
Token *t = &p->token;
Expr *e = NULL;
if (t->kind == T_name)
{
e = alloc_expr();
e->kind = EXPR_name;
e->t_token = *t;
consume_token(p);
}
else if (t->kind == T_intlit)
{
e = alloc_expr();
e->kind = EXPR_const;
e->t_token = *t;
consume_token(p);
}
else
{
parse_error_f(p, "Failed to parse expression (got: %s token)",
token_kind_string[t->kind]);
}
for (;;)
{
if (!look_token(p))
{
return e;
}
if (t->kind == T_plus
|| t->kind == T_minus)
{
int opkind = (t->kind == T_plus) ? OP_ADD : OP_SUB;
consume_token(p);
Expr *left = e;
Expr *right = parse_expr(p);
e = alloc_expr();
e->kind = EXPR_binop;
e->t_binop.opkind = opkind;
e->t_binop.left = left;
e->t_binop.right = right;
}
else if (t->kind == T_parenleft)
{
Expr *callee = e;
consume_token(p);
// parse arguments. TODO support multiple arguments
Expr *arg = parse_expr(p);
expect_token(p, T_parenright);
e = alloc_expr();
e->kind = EXPR_call;
e->t_call.callee_expr = callee;
e->t_call.arg_expr = arg;
}
else
{
break;
}
}
return e;
}
Stmt *parse_stmt(Parse *p)
{
if (!look_token(p))
{
assert(0);
}
Token *t = &p->token;
if (t->kind == T_if)
{
consume_token(p);
expect_token(p, T_parenleft);
Expr *e = parse_expr(p);
expect_token(p, T_parenright);
Stmt *body = parse_stmt(p);
Stmt *stmt = alloc_stmt();
stmt->kind = STMT_if;
stmt->t_if.cond_expr = e;
stmt->t_if.body_stmt = body;
stmt->t_if.else_stmt = NULL; //TODO
return stmt;
}
else if (t->kind == T_name
|| t->kind == T_intlit)
{
Expr *e = parse_expr(p);
expect_token(p, T_semicolon);
Stmt *stmt = alloc_stmt();
stmt->kind = STMT_expr;
stmt->t_expr = e;
return stmt;
}
else
{
fatal_f("Failed to parse stmt: %s",
token_kind_string[t->kind]);
}
}
const char test_code[] = "if (8 - 3) 5 + 6;";
// returns address where jump location should be stored. So it can be changed
// later.
int64_t *push_jmpif_instrs(ExecCtx *ctx, int cmp, int64_t jmploc)
{
Instr *instr = ctx->instrs + ctx->num_instrs;
++ ctx->num_instrs;
instr->kind = INSTR_JMPIF;
instr->t_jmpif.kind = cmp;
instr->t_jmpif.pc = jmploc;
return &instr->t_jmp.pc;
}
void push_const_int_instr(ExecCtx *ctx, int64_t val)
{
push_instr(ctx, (Instr) { INSTR_CONST, .t_int = val });
}
void push_op_instr(ExecCtx *ctx, int kind)
{
push_instr(ctx, (Instr) { INSTR_OP, .t_op = kind });
}
void push_debug_instr(ExecCtx *ctx)
{
push_instr(ctx, (Instr) { INSTR_DEBUG, });
}
void push_expr_instrs(ExecCtx *ctx, Expr *expr)
{
if (expr->kind == EXPR_const)
{
Token *t = &expr->t_token;
assert(t->kind == T_intlit); //XXX
push_const_int_instr(ctx, t->t_intlit);
}
else if (expr->kind == EXPR_binop)
{
push_expr_instrs(ctx, expr->t_binop.left);
push_expr_instrs(ctx, expr->t_binop.right);
push_op_instr(ctx, expr->t_binop.opkind);
}
else
{
fatal_f("Not implemented");
}
}
void push_stmt_instrs(ExecCtx *ctx, Stmt *stmt)
{
if (stmt->kind == STMT_expr)
{
push_expr_instrs(ctx, stmt->t_expr);
}
else if (stmt->kind == STMT_if)
{
push_expr_instrs(ctx, stmt->t_if.cond_expr);
int64_t *jmploc = push_jmpif_instrs(ctx, CMP_EQ, -666);
push_stmt_instrs(ctx, stmt->t_if.body_stmt);
*jmploc = ctx->num_instrs;
}
else
{
msg_f("Have a %s stmt", stmt_kind_string[stmt->kind]);
fatal_f("Not implemented: compile %s stmt",
stmt_kind_string[stmt->kind]);
}
}
int main(void)
{
ExecCtx *ctx = &(ExecCtx) {0};
Parse *p = &(Parse)
{ .buffer = test_code, .size = sizeof test_code - 1};
Stmt *stmt = parse_stmt(p);
msg_f("parsed a %s stmt", stmt_kind_string[stmt->kind]);
push_stmt_instrs(ctx, stmt);
push_debug_instr(ctx);
interp(ctx);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment