Skip to content

Instantly share code, notes, and snippets.

@coinconclusive
Last active July 1, 2023 21:33
Show Gist options
  • Save coinconclusive/969a570befebe39c525de463291d827f to your computer and use it in GitHub Desktop.
Save coinconclusive/969a570befebe39c525de463291d827f to your computer and use it in GitHub Desktop.
simple interpreter in C with a mark and sweep GC. wip.
/* Copyright © 2023 Anne Redko */
/* Under the MIT License. https://mit-license.org/ */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <stddef.h>
#include <stdarg.h>
#define UNLIKELY(X) __builtin_expect((X), 0)
#define LIKELY(X) __builtin_expect((X), 1)
#define UNREACHABLE() __builtin_unreachable();
#define DEBUG
#ifdef DEBUG
#define FAIL() (void)(*(volatile int*)0 = 0)
#define ASSERT(e) (LIKELY(e) ? (void)0 : FAIL())
#else
#define FAIL() abort()
#define ASSERT(e) (void)(e);
#endif
#ifdef VERBOSE
#define SHOULD_PRINT_GC_SWEEPS 1
#else
#define SHOULD_PRINT_GC_SWEEPS 0
#endif
static inline const char *offset_nullable_ptr_safe(const char *ptr, size_t offset) {
return ptr == NULL ? ptr : ptr + offset;
}
enum ast_type {
AST_INT,
AST_VAR,
AST_STR,
AST_BIN,
AST_INV,
AST_FUN,
AST_BLK,
AST_LET,
AST_RET,
AST_TRN,
AST_SET,
AST_UNR,
AST_ARR,
};
enum ast_bin_type {
AST_BIN_ADD,
AST_BIN_SUB,
AST_BIN_MUL,
AST_BIN_DIV,
AST_BIN_AND,
AST_BIN_BAND,
AST_BIN_OR,
AST_BIN_BOR,
AST_BIN_XOR,
AST_BIN_SHR,
AST_BIN_SHL,
AST_BIN_MOD,
AST_BIN_EQL,
AST_BIN_NEQ,
AST_BIN_GEQ,
AST_BIN_LEQ,
AST_BIN_LTH,
AST_BIN_GTH,
};
const char *ast_bin_type_to_string(enum ast_bin_type type) {
static const char *names[] = {
"add",
"substract",
"multiply",
"divide",
"and",
"binary and",
"or",
"binary or",
"xor",
"shift right",
"shift left",
"modulo",
"equals",
"not equals",
"greater or equals",
"less or equals",
"less than",
"greater than",
};
return names[type];
}
enum ast_unr_type {
AST_UNR_NOT,
AST_UNR_NEG,
AST_UNR_BNEG,
};
const char *ast_unr_type_to_string(enum ast_unr_type type) {
static const char *names[] = {
"boolean not",
"negate",
"binary negate",
};
return names[type];
}
#define MEMALLOC(T) ((T*)calloc(1, sizeof(T)))
struct where { const char *fname; uint32_t line, col; } __attribute__((packed));
struct ast_base { enum ast_type type; struct where where; };
struct ast_var { struct ast_base base; char *name; };
struct ast_int { struct ast_base base; intmax_t value; };
struct ast_str { struct ast_base base; char *value; };
struct ast_bin { struct ast_base base; enum ast_bin_type type; struct ast_base *lhs, *rhs; };
struct ast_inv { struct ast_base base; struct ast_base *fun; size_t arg_count; struct ast_base **args; };
struct ast_fun { struct ast_base base; struct ast_base *body; uint32_t arg_count, min_arg_count; char **args; };
struct ast_blk { struct ast_base base; size_t stmt_count; struct ast_base **stmts; };
struct ast_let { struct ast_base base; char *name; struct ast_base *value; };
struct ast_trn { struct ast_base base; struct ast_base *cond, *then, *other; };
struct ast_set { struct ast_base base; char *name; struct ast_base *value; };
struct ast_ret { struct ast_base base; struct ast_base *value; int error; };
struct ast_unr { struct ast_base base; enum ast_unr_type type; struct ast_base *opr; };
struct ast_arr { struct ast_base base; size_t elem_count; struct ast_base **elems; };
struct ast_var *ast_var_new(const char *name, const struct where *where) {
struct ast_var *n = MEMALLOC(struct ast_var);
n->base.type = AST_VAR;
n->base.where = *where;
n->name = malloc(strlen(name) + 1);
memcpy(n->name, name, strlen(name) + 1);
return n;
}
struct ast_int *ast_int_new(int value, const struct where *where) {
struct ast_int *n = MEMALLOC(struct ast_int);
n->base.type = AST_INT;
n->base.where = *where;
n->value = value;
return n;
}
struct ast_str *ast_str_new(const char *value, const struct where *where) {
struct ast_str *n = MEMALLOC(struct ast_str);
n->base.type = AST_STR;
n->base.where = *where;
n->value = malloc(strlen(value) + 1);
memcpy(n->value, value, strlen(value) + 1);
return n;
}
struct ast_inv *ast_inv_new(struct ast_base *fun, size_t arg_count, struct ast_base **args, const struct where *where) {
struct ast_inv *n = MEMALLOC(struct ast_inv);
n->base.type = AST_INV;
n->base.where = *where;
n->fun = fun;
n->arg_count = arg_count;
if (n->arg_count == 0) n->args = NULL;
else {
n->args = calloc(n->arg_count, sizeof(struct ast_base*));
for (size_t i = 0; i < n->arg_count; ++i) {
n->args[i] = args[i];
}
}
return n;
}
struct ast_blk *ast_blk_new(size_t stmt_count, struct ast_base **stmts, const struct where *where) {
struct ast_blk *n = MEMALLOC(struct ast_blk);
n->base.type = AST_BLK;
n->base.where = *where;
n->stmt_count = stmt_count;
if (n->stmt_count == 0) n->stmts = NULL;
else {
n->stmts = calloc(n->stmt_count, sizeof(struct ast_base*));
for (size_t i = 0; i < n->stmt_count; ++i) {
n->stmts[i] = stmts[i];
}
}
return n;
}
struct ast_let *ast_let_new(const char *name, struct ast_base *value, const struct where *where) {
struct ast_let *n = MEMALLOC(struct ast_let);
n->base.type = AST_LET;
n->base.where = *where;
n->name = malloc(strlen(name) + 1);
memcpy(n->name, name, strlen(name) + 1);
n->value = value;
return n;
}
struct ast_ret *ast_ret_new(struct ast_base *value, int error, const struct where *where) {
struct ast_ret *n = MEMALLOC(struct ast_ret);
n->base.type = AST_RET;
n->base.where = *where;
n->value = value;
n->error = error;
return n;
}
struct ast_unr *ast_unr_new(enum ast_unr_type type, struct ast_base *opr, const struct where *where) {
struct ast_unr *n = MEMALLOC(struct ast_unr);
n->base.type = AST_UNR;
n->base.where = *where;
n->type = type;
n->opr = opr;
return n;
}
struct ast_set *ast_set_new(const char *name, struct ast_base *value, const struct where *where) {
struct ast_set *n = MEMALLOC(struct ast_set);
n->base.type = AST_SET;
n->base.where = *where;
n->name = malloc(strlen(name) + 1);
memcpy(n->name, name, strlen(name) + 1);
n->value = value;
return n;
}
struct ast_trn *ast_trn_new(struct ast_base *cond, struct ast_base *then, struct ast_base *other, const struct where *where) {
struct ast_trn *n = MEMALLOC(struct ast_trn);
n->base.type = AST_TRN;
n->base.where = *where;
n->cond = cond;
n->then = then;
n->other = other;
return n;
}
struct ast_fun *ast_fun_new(
struct ast_base *body,
uint32_t arg_count,
uint32_t min_arg_count,
char **args,
const struct where *where
) {
struct ast_fun *n = MEMALLOC(struct ast_fun);
n->base.type = AST_FUN;
n->base.where = *where;
n->body = body;
n->arg_count = arg_count;
n->min_arg_count = min_arg_count;
if (n->arg_count == 0) n->args = NULL;
else {
n->args = calloc(n->arg_count, sizeof(char*));
for (size_t i = 0; i < n->arg_count; ++i) {
n->args[i] = malloc(strlen(args[i]) + 1);
memcpy(n->args[i], args[i], strlen(args[i]) + 1);
}
}
return n;
}
struct ast_bin *ast_bin_new(enum ast_bin_type type, struct ast_base *lhs, struct ast_base *rhs, const struct where *where) {
struct ast_bin *n = MEMALLOC(struct ast_bin);
n->base.type = AST_BIN;
n->base.where = *where;
n->type = type;
n->lhs = lhs;
n->rhs = rhs;
return n;
}
struct ast_arr *ast_arr_new(size_t elem_count, struct ast_base **elems, const struct where *where) {
struct ast_arr *n = MEMALLOC(struct ast_arr);
n->base.type = AST_ARR;
n->base.where = *where;
n->elem_count = elem_count;
if (n->elem_count == 0) n->elems = NULL;
else {
n->elems = calloc(n->elem_count, sizeof(struct ast_base*));
for (size_t i = 0; i < n->elem_count; ++i) {
n->elems[i] = elems[i];
}
}
return n;
}
void print_ast(struct ast_base *n, int indent, FILE *fout) {
for (int i = 0; i < indent; ++i) fputs(" ", fout);
if (n == NULL) { fprintf(fout, "NULL\n"); return; }
switch (n->type) {
case AST_INT: fprintf(fout, "int %ld\n", ((struct ast_int*)n)->value); return;
case AST_VAR: fprintf(fout, "var '%s'\n", ((struct ast_var*)n)->name); return;
case AST_STR: fprintf(fout, "str \"%s\"\n", ((struct ast_str*)n)->value); return;
case AST_BIN:
fprintf(fout, "bin: %s\n", ast_bin_type_to_string(((struct ast_bin*)n)->type));
print_ast(((struct ast_bin*)n)->lhs, indent + 1, fout);
print_ast(((struct ast_bin*)n)->rhs, indent + 1, fout);
return;
case AST_UNR:
fprintf(fout, "unr: %s\n", ast_unr_type_to_string(((struct ast_unr*)n)->type));
print_ast(((struct ast_unr*)n)->opr, indent + 1, fout);
return;
case AST_INV: fprintf(fout, "inv:\n");
print_ast(((struct ast_inv*)n)->fun, indent + 1, fout);
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i)
print_ast(((struct ast_inv*)n)->args[i], indent + 1, fout);
return;
case AST_FUN: fprintf(fout, "fun: |");
for (size_t i = 0; i < ((struct ast_fun*)n)->arg_count; ++i) {
fprintf(fout, "%s", ((struct ast_fun*)n)->args[i]);
if (i != ((struct ast_fun*)n)->arg_count - 1) fprintf(fout, ", ");
}
fprintf(fout, "|\n");
print_ast(((struct ast_fun*)n)->body, indent + 1, fout);
return;
case AST_SET: fprintf(fout, "set: %s =\n", ((struct ast_set*)n)->name);
print_ast(((struct ast_set*)n)->value, indent + 1, fout);
return;
case AST_LET: fprintf(fout, "let: %s :=\n", ((struct ast_set*)n)->name);
print_ast(((struct ast_set*)n)->value, indent + 1, fout);
return;
case AST_RET: fprintf(fout, "ret%s:\n", ((struct ast_ret*)n)->error ? " (error)" : "");
print_ast(((struct ast_ret*)n)->value, indent + 1, fout);
return;
case AST_TRN: fprintf(fout, "trn:\n");
print_ast(((struct ast_trn*)n)->cond, indent + 1, fout);
print_ast(((struct ast_trn*)n)->then, indent + 1, fout);
print_ast(((struct ast_trn*)n)->other, indent + 1, fout);
return;
case AST_BLK: fprintf(fout, "blk:\n");
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) {
print_ast(((struct ast_blk*)n)->stmts[i], indent + 1, fout);
}
return;
case AST_ARR: fprintf(fout, "arr:\n");
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) {
print_ast(((struct ast_arr*)n)->elems[i], indent + 1, fout);
}
return;
default: fprintf(stderr, "print: unknown ast type: %d!\n", n->type); return;
}
}
struct ast_base *copy_ast(struct ast_base *n) {
if (n == NULL) { fprintf(stderr, "copy_ast(NULL)!\n"); FAIL(); return NULL; }
switch (n->type) {
case AST_INT: return &ast_int_new(((struct ast_int*)n)->value, &n->where)->base;
case AST_VAR: return &ast_var_new(((struct ast_var*)n)->name, &n->where)->base;
case AST_STR: return &ast_str_new(((struct ast_str*)n)->value, &n->where)->base;
case AST_BIN:
return &ast_bin_new(((struct ast_bin*)n)->type,
copy_ast(((struct ast_bin*)n)->lhs),
copy_ast(((struct ast_bin*)n)->rhs),
&n->where
)->base;
case AST_UNR:
return &ast_unr_new(((struct ast_unr*)n)->type,
copy_ast(((struct ast_unr*)n)->opr),
&n->where
)->base;
case AST_INV: {
struct ast_base **args = calloc(sizeof(struct ast_base*), ((struct ast_inv*)n)->arg_count);
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i) {
args[i] = copy_ast(((struct ast_inv*)n)->args[i]);
}
struct ast_inv *r = ast_inv_new(
copy_ast(((struct ast_inv*)n)->fun),
((struct ast_inv*)n)->arg_count,
args,
&n->where
);
if (((struct ast_inv*)n)->arg_count > 0)
free(args);
return &r->base;
}
case AST_BLK: {
struct ast_base **stmts = calloc(sizeof(struct ast_base*), ((struct ast_blk*)n)->stmt_count);
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) {
stmts[i] = copy_ast(((struct ast_blk*)n)->stmts[i]);
}
struct ast_blk *r = ast_blk_new(((struct ast_blk*)n)->stmt_count, stmts, &n->where);
if (((struct ast_blk*)n)->stmt_count > 0) free(stmts);
return &r->base;
}
case AST_TRN:
return &ast_trn_new(
copy_ast(((struct ast_trn*)n)->cond),
copy_ast(((struct ast_trn*)n)->then),
((struct ast_trn*)n)->other == NULL ? NULL : copy_ast(((struct ast_trn*)n)->other),
&n->where
)->base;
case AST_FUN:
return &ast_fun_new(
copy_ast(((struct ast_fun*)n)->body),
((struct ast_fun*)n)->arg_count,
((struct ast_fun*)n)->min_arg_count,
((struct ast_fun*)n)->args,
&n->where
)->base;
case AST_LET:
return &ast_let_new(
((struct ast_let*)n)->name,
copy_ast(((struct ast_let*)n)->value),
&n->where
)->base;
case AST_SET:
return &ast_set_new(
((struct ast_set*)n)->name,
copy_ast(((struct ast_set*)n)->value),
&n->where
)->base;
case AST_RET:
return &ast_ret_new(
copy_ast(((struct ast_ret*)n)->value),
((struct ast_ret*)n)->error,
&n->where
)->base;
case AST_ARR: {
struct ast_base **elems = NULL;
if (((struct ast_arr*)n)->elem_count > 0)
elems = calloc(sizeof(struct ast_base*), ((struct ast_arr*)n)->elem_count);
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) {
elems[i] = copy_ast(((struct ast_arr*)n)->elems[i]);
}
struct ast_arr *r = ast_arr_new(((struct ast_arr*)n)->elem_count, elems, &n->where);
if (((struct ast_arr*)n)->elem_count > 0) free(elems);
return &r->base;
}
default:
fprintf(stderr, "copy: unknown ast type: %d!\n", n->type);
return NULL;
}
}
void free_ast(struct ast_base *n) {
if (n == NULL) { fprintf(stderr, "free_ast(NULL)!\n"); return; }
switch (n->type) {
case AST_INT: break;
case AST_VAR: free(((struct ast_var*)n)->name); break;
case AST_STR: free(((struct ast_str*)n)->value); break;
case AST_BIN:
free_ast(((struct ast_bin*)n)->lhs);
free_ast(((struct ast_bin*)n)->rhs);
break;
case AST_UNR:
free_ast(((struct ast_unr*)n)->opr);
break;
case AST_INV:
free_ast(((struct ast_inv*)n)->fun);
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i) {
free_ast(((struct ast_inv*)n)->args[i]);
((struct ast_inv*)n)->args[i] = NULL;
}
if (((struct ast_inv*)n)->arg_count > 0)
free(((struct ast_inv*)n)->args);
break;
case AST_BLK:
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) {
free_ast(((struct ast_blk*)n)->stmts[i]);
((struct ast_blk*)n)->stmts[i] = NULL;
}
if (((struct ast_blk*)n)->stmt_count > 0)
free(((struct ast_blk*)n)->stmts);
break;
case AST_TRN:
free_ast(((struct ast_trn*)n)->cond);
free_ast(((struct ast_trn*)n)->then);
if (((struct ast_trn*)n)->other != NULL)
free_ast(((struct ast_trn*)n)->other);
break;
case AST_FUN:
free_ast(((struct ast_fun*)n)->body);
for (size_t i = 0; i < ((struct ast_fun*)n)->arg_count; ++i) {
free(((struct ast_fun*)n)->args[i]);
((struct ast_fun*)n)->args[i] = NULL;
}
if (((struct ast_fun*)n)->arg_count > 0)
free(((struct ast_fun*)n)->args);
break;
case AST_LET: free(((struct ast_let*)n)->name); free_ast(((struct ast_let*)n)->value); break;
case AST_SET: free(((struct ast_set*)n)->name); free_ast(((struct ast_set*)n)->value); break;
case AST_RET: free_ast(((struct ast_ret*)n)->value); break;
case AST_ARR:
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) {
free_ast(((struct ast_arr*)n)->elems[i]);
((struct ast_arr*)n)->elems[i] = NULL;
}
if (((struct ast_arr*)n)->elem_count > 0)
free(((struct ast_arr*)n)->elems);
break;
default: fprintf(stderr, "free: unknown ast type: %d!\n", n->type); break;
}
free(n);
}
enum token_type {
TOKEN_EOF,
TOKEN_VAR,
TOKEN_INT,
TOKEN_STR,
TOKEN_ARR, // =>
TOKEN_EAR, // =!>
TOKEN_EQL, // ==
TOKEN_NEQ, // !=
TOKEN_GEQ, // >=
TOKEN_LEQ, // <=
TOKEN_SHR, // >>
TOKEN_SHL, // <<
TOKEN_AND, // &&
TOKEN_OR, // ||
TOKEN_ADDEQ, // +=
TOKEN_SUBEQ, // -=
TOKEN_MULEQ, // *=
TOKEN_DIVEQ, // /=
TOKEN_MODEQ, // %=
TOKEN_SHREQ, // >>=
TOKEN_SHLEQ, // <<=
TOKEN_OREQ, // ||=
TOKEN_BOREQ, // |=
TOKEN_ANDEQ, // &&=
TOKEN_BANDEQ, // &=
TOKEN_OPEQ_FIRST_ = TOKEN_ADDEQ,
TOKEN_OPEQ_LAST_ = TOKEN_BANDEQ,
};
const char *token_type_to_string(enum token_type type) {
static const char *names[] = {
"end of input",
"variable",
"integer",
"string",
"'=>'",
"'=!>'",
"'=='",
"'!='",
"'>='",
"'<='",
"'>>'",
"'<<'",
"'&&'",
"'||'",
"'+='",
"'-='",
"'*='",
"'/='",
"'%='",
"'>>='",
"'<<='",
"'||='",
"'|='",
"'&&='",
"'&='",
};
return names[type];
}
void print_where(struct where where, FILE *fout) {
fprintf(fout, "%s:%d:%d", where.fname, where.line + 1, where.col + 1);
}
struct token { struct where where; char type; size_t value_offset; };
struct lexer {
size_t tok_count;
struct token *toks;
size_t data_size;
size_t data_alloced;
char *data;
};
#define TOKEN_VALUE(LEX, TOK) (offset_nullable_ptr_safe((LEX)->data, (TOK).value_offset))
void print_token(const struct lexer *lex, const struct token *token, FILE *fout) {
if (token->type == TOKEN_VAR || token->type == TOKEN_INT || token->type == TOKEN_STR) {
fprintf(stdout, "%s: '%s'\n", token_type_to_string(token->type), TOKEN_VALUE(lex, *token));
} else if (token->type < ' ') {
fprintf(stdout, "%s\n", token_type_to_string(token->type));
} else {
fprintf(stdout, "'%c'\n", token->type);
}
}
void lexer_free(struct lexer *lex) {
if (lex->tok_count > 0) free(lex->toks);
if (lex->data_alloced > 0) free(lex->data);
}
void lexer_push(struct lexer *lex, char c) {
if (lex->data_size >= lex->data_alloced) {
lex->data_alloced += 64;
if (lex->data_alloced == 64) lex->data = malloc(64);
else lex->data = realloc(lex->data, lex->data_alloced);
}
lex->data_size += 1;
lex->data[lex->data_size - 1] = c;
}
void lexer_add(struct lexer *lex, char type, size_t value_offset, struct where where) {
lex->tok_count += 1;
if (lex->tok_count == 1) lex->toks = malloc(sizeof(struct token));
else lex->toks = realloc(lex->toks, sizeof(struct token) * lex->tok_count);
lex->toks[lex->tok_count - 1].type = type;
lex->toks[lex->tok_count - 1].value_offset = value_offset;
lex->toks[lex->tok_count - 1].where = where;
}
char lexer_eat(struct where *where, FILE *fin) {
char c = fgetc(fin);
if (!isprint(c) && !isspace(c) && c != EOF) {
fprintf(stderr, "illegal character '\\x%02x'!\n", c);
exit(1);
}
if (c == '\n') {
where->col = 0;
where->line += 1;
} else {
where->col += 1;
}
return c;
}
void lexer_add_from(struct lexer *lex, FILE *fin, const char *fname) {
struct where where = { .col = 0, .line = 0, .fname = fname };
char c = lexer_eat(&where, fin);
while (c != EOF) {
while (isspace(c)) c = lexer_eat(&where, fin);
while (c == '#') {
while (c != '\n') c = lexer_eat(&where, fin);
while (isspace(c)) c = lexer_eat(&where, fin);
}
struct where where_start = where;
where_start.col -= 1;
if (c == EOF) break;
else if (isalpha(c) || c == '_') {
size_t start = lex->data_size;
do {
lexer_push(lex, c);
c = lexer_eat(&where, fin);
} while (isalnum(c) || c == '_');
lexer_push(lex, '\0');
lexer_add(lex, TOKEN_VAR, start, where_start);
} else if (isdigit(c)) {
size_t start = lex->data_size;
do {
lexer_push(lex, c);
c = lexer_eat(&where, fin);
} while (isdigit(c) || c == '_');
lexer_push(lex, '\0');
lexer_add(lex, TOKEN_INT, start, where_start);
} else if (c == '"') {
size_t start = lex->data_size;
c = lexer_eat(&where, fin);
while (c != '"' && c != EOF) {
if (c == '\\') {
c = lexer_eat(&where, fin);
switch (c) {
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case '0': c = '\0'; break;
case 'a': c = '\a'; break;
case 'e': c = '\033'; break;
case '"': c = '"'; break;
case '\'': c = '\''; break;
case '\\': c = '\\'; break;
case EOF:
default:
fprintf(stderr, "warning: bad escape sequence: '\\%c'!\n", c);
c = EOF;
break;
}
}
if (c != EOF) lexer_push(lex, c);
c = lexer_eat(&where, fin);
}
c = lexer_eat(&where, fin);
lexer_push(lex, '\0');
lexer_add(lex, TOKEN_STR, start, where_start);
} else if (c == '=') {
c = lexer_eat(&where, fin);
if (c == '=') {
c = lexer_eat(&where, fin);
lexer_add(lex, TOKEN_EQL, 0, where_start);
} else if (c == '>') {
c = lexer_eat(&where, fin);
lexer_add(lex, TOKEN_ARR, 0, where_start);
} else if (c == '!') {
c = lexer_eat(&where, fin);
if (c == '>') {
c = lexer_eat(&where, fin);
lexer_add(lex, TOKEN_EAR, 0, where_start);
} else {
lexer_add(lex, TOKEN_EQL, 0, where_start);
lexer_add(lex, '>', 0, where_start);
}
} else lexer_add(lex, '=', 0, where_start);
} else if (c == '!') {
c = lexer_eat(&where, fin);
if (c == '=') {
c = lexer_eat(&where, fin);
lexer_add(lex, TOKEN_NEQ, 0, where_start);
} else lexer_add(lex, '!', 0, where_start);
}
#define EQ_TOKEN(X, T) { \
c = lexer_eat(&where, fin); \
if (c == '=') { \
lexer_add(lex, T, 0, where_start); \
c = lexer_eat(&where, fin); \
} else lexer_add(lex, X, 0, where_start); \
}
#define EQ_2TOKEN(X, TE, T2, T2E) { \
c = lexer_eat(&where, fin); \
if (c == '=') lexer_add(lex, TE, 0, where_start); \
if (c == X) { \
c = lexer_eat(&where, fin); \
if (c == '=') lexer_add(lex, T2E, 0, where_start); \
else { lexer_add(lex, T2, 0, where_start); } \
} \
else { lexer_add(lex, X, 0, where_start); } \
}
else if (c == '+') EQ_TOKEN('+', TOKEN_ADDEQ)
else if (c == '-') EQ_TOKEN('-', TOKEN_SUBEQ)
else if (c == '*') EQ_TOKEN('*', TOKEN_MULEQ)
else if (c == '/') EQ_TOKEN('/', TOKEN_DIVEQ)
else if (c == '%') EQ_TOKEN('%', TOKEN_MODEQ)
else if (c == '&') EQ_2TOKEN('&', TOKEN_BANDEQ, TOKEN_AND, TOKEN_ANDEQ)
else if (c == '|') EQ_2TOKEN('|', TOKEN_BOREQ, TOKEN_OR, TOKEN_OREQ)
else if (c == '>') EQ_2TOKEN('>', TOKEN_GEQ, TOKEN_SHR, TOKEN_SHREQ)
else if (c == '<') EQ_2TOKEN('<', TOKEN_LEQ, TOKEN_SHL, TOKEN_SHLEQ)
#undef EQ_TOKEN
#undef EQ_2TOKEN
else {
lexer_add(lex, c, 0, where_start);
c = lexer_eat(&where, fin);
}
}
where.col -= 1;
lexer_add(lex, TOKEN_EOF, 0, where);
}
enum ast_bin_type token_type_opeq_to_ast_bin_type(enum token_type type) {
switch (type) {
case TOKEN_ADDEQ: return AST_BIN_ADD;
case TOKEN_SUBEQ: return AST_BIN_SUB;
case TOKEN_MULEQ: return AST_BIN_MUL;
case TOKEN_DIVEQ: return AST_BIN_DIV;
case TOKEN_MODEQ: return AST_BIN_MOD;
case TOKEN_SHREQ: return AST_BIN_SHR;
case TOKEN_SHLEQ: return AST_BIN_SHL;
case TOKEN_OREQ: return AST_BIN_OR;
case TOKEN_BOREQ: return AST_BIN_BOR;
case TOKEN_ANDEQ: return AST_BIN_AND;
case TOKEN_BANDEQ: return AST_BIN_BAND;
default: fprintf(stderr, "tokeneqop->bintype: bad type\n"); exit(1);
}
}
struct err_ast { struct ast_base *ast; const char *error; const struct token *rest; };
struct err_asts { size_t ast_count; struct ast_base **asts; const char *error; const struct token *rest; };
#define ERR_AST_IS_ERR(X) ((X).error != NULL)
#define ERR_AST_IS_AST(X) ((X).ast != NULL)
struct parser {
struct lexer *lex;
};
struct err_ast parse_int(struct parser *p, const struct token *in) {
if (in->type != TOKEN_INT) return (struct err_ast){ NULL, "expected variable", in };
return (struct err_ast){ &ast_int_new(atoi(TOKEN_VALUE(p->lex, *in)), &in->where)->base, NULL, in + 1 };
}
struct err_ast parse_var(struct parser *p, const struct token *in) {
if (in->type != TOKEN_VAR) return (struct err_ast){ NULL, "expected variable", in };
struct ast_var *n = ast_var_new(TOKEN_VALUE(p->lex, *in), &in->where);
return (struct err_ast){ &n->base, NULL, in + 1 };
}
struct err_ast parse_str(struct parser *p, const struct token *in) {
if (in->type != TOKEN_STR) return (struct err_ast){ NULL, "expected string", in };
struct ast_str *n = ast_str_new(TOKEN_VALUE(p->lex, *in), &in->where);
return (struct err_ast){ &n->base, NULL, in + 1 };
}
static inline void safe_eat_token(const struct token **in) {
*in = (*in)->type == TOKEN_EOF ? *in : ++*in;
}
struct err_ast parse_expr(struct parser *p, const struct token *in);
struct err_ast parse_stmt(struct parser *p, const struct token *in);
struct err_asts parse_stmts(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_stmt(p, in);
if (ERR_AST_IS_ERR(r)) return (struct err_asts){ 0, NULL, r.error, r.rest };
in = r.rest;
struct ast_base **stmts = malloc(sizeof(struct ast_base*));
size_t stmt_count = 1;
stmts[0] = r.ast;
while (in->type == ';') {
safe_eat_token(&in);
if (in->type == '}' || in->type == TOKEN_EOF) break;
r = parse_stmt(p, in);
if (ERR_AST_IS_ERR(r)) {
for (size_t i = 0; i < stmt_count; ++i) free_ast(stmts[i]);
if (stmt_count > 0) free(stmts);
return (struct err_asts){ 0, NULL, r.error, r.rest };
}
in = r.rest;
stmt_count += 1;
stmts = realloc(stmts, sizeof(struct ast_base*) * stmt_count);
stmts[stmt_count - 1] = r.ast;
}
return (struct err_asts){ stmt_count, stmts, NULL, in };
}
struct err_ast parse_block(struct parser *p, const struct token *in) {
if (in->type != '{') return (struct err_ast){ NULL, "expected '{'", in };
safe_eat_token(&in);
size_t stmt_count = 0;
struct ast_base **stmts = NULL;
if ((in + 1)->type != '}') {
struct err_asts r = parse_stmts(p, in);
if (ERR_AST_IS_ERR(r)) return (struct err_ast){ NULL, r.error, r.rest };
in = r.rest;
stmt_count = r.ast_count;
stmts = r.asts;
if (in->type != '}') {
for (size_t i = 0; i < stmt_count; ++i) free_ast(stmts[i]);
if (stmt_count > 0) free(stmts);
return (struct err_ast){ NULL, "expected closing bracket", in };
}
} else safe_eat_token(&in);
safe_eat_token(&in);
struct err_ast r = { &ast_blk_new(stmt_count, stmts, &in->where)->base, NULL, in };
if (stmt_count > 0) free(stmts);
return r;
}
struct err_ast parse_array(struct parser *p, const struct token *in) {
if (in->type != '[') return (struct err_ast){ NULL, "expected '['", in };
size_t elem_count = 0;
struct ast_base **elems = NULL;
if ((in + 1)->type != ']') {
do {
safe_eat_token(&in);
struct err_ast r = parse_expr(p, in);
if (ERR_AST_IS_ERR(r)) {
for (size_t i = 0; i < elem_count; ++i) free_ast(elems[i]);
if (elem_count > 0) free(elems);
return r;
}
in = r.rest;
elem_count += 1;
if (elem_count == 1) elems = malloc(sizeof(struct ast_base*));
else elems = realloc(elems, sizeof(struct ast_base*) * elem_count);
elems[elem_count - 1] = r.ast;
} while (in->type == ',');
if (in->type != ']') {
for (size_t i = 0; i < elem_count; ++i) free_ast(elems[i]);
if (elem_count > 0) free(elems);
return (struct err_ast){ NULL, "expected closing ']'", in };
}
} else safe_eat_token(&in);
safe_eat_token(&in);
struct err_ast r = { &ast_arr_new(elem_count, elems, &in->where)->base, NULL, in };
if (elem_count > 0) free(elems);
return r;
}
struct err_ast parse_atom(struct parser *p, const struct token *in) {
if (in->type == '!' || in->type == '-' || in->type == '~') {
enum ast_unr_type t
= in->type == '!' ? AST_UNR_NOT
: in->type == '-' ? AST_UNR_NEG
: in->type == '~' ? AST_UNR_BNEG : 0;
safe_eat_token(&in);
struct err_ast r = parse_atom(p, in);
if (ERR_AST_IS_ERR(r)) return r;
return (struct err_ast){ &ast_unr_new(t, r.ast, &in->where)->base, NULL, r.rest };
}
struct err_ast r;
r = parse_var(p, in);
if (ERR_AST_IS_AST(r)) return r;
r = parse_int(p, in);
if (ERR_AST_IS_AST(r)) return r;
r = parse_str(p, in);
if (ERR_AST_IS_AST(r)) return r;
if (in->type == '{') { return parse_block(p, in); }
if (in->type == '[') { return parse_array(p, in); }
if (in->type == '(') {
safe_eat_token(&in);
r = parse_expr(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
if (in->type != ')') {
free_ast(r.ast);
return (struct err_ast){ NULL, "expected closing parenthesis (subexpr)", in };
}
safe_eat_token(&in);
return (struct err_ast){ r.ast, NULL, in };
}
return (struct err_ast){ NULL, "expected atom", in };
}
struct err_ast parse_fun(struct parser *p, const struct token *in, int *body_error) {
*body_error = 0;
if (in->type != '(' && in->type != TOKEN_VAR)
return (struct err_ast){ NULL, "expected function arguments", in };
uint32_t arg_count = 0, min_arg_count = 0;
int optional = 0;
char **args = NULL;
if (in->type != TOKEN_VAR) { // not one argument
if ((in + 1)->type != ')') {
int first = 1;
do {
safe_eat_token(&in);
if (in->type == '?' && !optional) {
optional = 1;
safe_eat_token(&in);
}
struct err_ast r2 = parse_var(p, in);
if (ERR_AST_IS_ERR(r2)) {
for (size_t i = 0; i < arg_count; ++i) free(args[i]);
if (arg_count > 0) free(args);
// *body_error = !first;
return (struct err_ast){ NULL, "expected argument name", in };
}
in = r2.rest;
arg_count += 1;
if (!optional) min_arg_count += 1;
if (arg_count == 1) args = malloc(sizeof(char*));
else args = realloc(args, sizeof(char*) * arg_count);
char *argname = ((struct ast_var*)r2.ast)->name;
args[arg_count - 1] = malloc(strlen(argname) + 1);
memcpy(args[arg_count - 1], argname, strlen(argname) + 1);
free_ast(r2.ast); // we don't need the node anymore. find a better way of doing this?
first = 0;
} while (in->type == ',');
if (in->type != ')') {
for (size_t i = 0; i < arg_count; ++i) free(args[i]);
if (arg_count > 0) free(args);
return (struct err_ast){ NULL, "expected closing parenthesis", in };
}
} else safe_eat_token(&in); // eat the opening paren
} else {
arg_count = 1;
args = calloc(1, sizeof(char*));
size_t arglen = strlen(TOKEN_VALUE(p->lex, *in)) + 1;
args[0] = malloc(arglen);
memcpy(args[0], TOKEN_VALUE(p->lex, *in), arglen);
}
safe_eat_token(&in);
if (in->type != TOKEN_ARR) { // TODO: error arrows?
for (size_t i = 0; i < arg_count; ++i) free(args[i]);
if (arg_count > 0) free(args);
return (struct err_ast){ NULL, "expected arrow", in };
}
safe_eat_token(&in);
struct err_ast r = parse_expr(p, in);
if (ERR_AST_IS_ERR(r)) {
for (size_t i = 0; i < arg_count; ++i) free(args[i]);
if (arg_count > 0) free(args);
*body_error = 1;
return r;
}
in = r.rest;
r = (struct err_ast){ &ast_fun_new(r.ast, arg_count, min_arg_count, args, &in->where)->base, NULL, in };
for (size_t i = 0; i < arg_count; ++i) free(args[i]);
if (arg_count > 0) free(args);
return r;
}
struct err_ast parse_inv(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_atom(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == '(') {
size_t arg_count = 0;
struct ast_base **args = NULL;
if ((in + 1)->type != ')') {
struct err_ast r_arg;
do {
safe_eat_token(&in);
r_arg = parse_expr(p, in);
if (ERR_AST_IS_ERR(r_arg)) {
for (size_t i = 0; i < arg_count; ++i) free_ast(args[i]);
if (arg_count > 0) free(args);
free_ast(r.ast);
return r_arg;
}
in = r_arg.rest;
arg_count += 1;
if (arg_count == 1) args = malloc(sizeof(struct ast_base*));
else args = realloc(args, sizeof(struct ast_base*) * arg_count);
args[arg_count - 1] = r_arg.ast;
} while (in->type == ',');
if (in->type != ')') {
for (size_t i = 0; i < arg_count; ++i) free_ast(args[i]);
if (arg_count > 0) free(args);
free_ast(r.ast);
return (struct err_ast){ NULL, "expected closing parenthesis (inv)", in };
}
} else safe_eat_token(&in);
safe_eat_token(&in);
int had_block = 0;
if (in->type == '(' || in->type == TOKEN_VAR) {
int body_error;
struct err_ast r_fun = parse_fun(p, in, &body_error);
if (ERR_AST_IS_AST(r_fun)) {
in = r_fun.rest;
arg_count += 1;
if (arg_count == 1) args = malloc(sizeof(struct ast_base*));
else args = realloc(args, sizeof(struct ast_base*) * arg_count);
args[arg_count - 1] = r_fun.ast;
had_block = 1;
} else if (ERR_AST_IS_ERR(r_fun) && body_error) {
return r;
}
}
r = (struct err_ast){ &ast_inv_new(r.ast, arg_count, args, &in->where)->base, NULL, in };
if (arg_count > 0) free(args);
if (had_block) break;
}
return r;
}
struct err_ast parse_bandbor(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_inv(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == '&' || in->type == '|') {
enum ast_bin_type t = in->type == '&' ? AST_BIN_BAND : AST_BIN_BOR;
safe_eat_token(&in);
struct err_ast r2 = parse_inv(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in };
}
return r;
}
struct err_ast parse_muldiv(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_bandbor(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == '*' || in->type == '/') {
enum ast_bin_type t = in->type == '*' ? AST_BIN_MUL : AST_BIN_DIV;
safe_eat_token(&in);
struct err_ast r2 = parse_bandbor(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in };
}
return r;
}
struct err_ast parse_addsub(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_muldiv(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == '+' || in->type == '-') {
enum ast_bin_type t = in->type == '+' ? AST_BIN_ADD : AST_BIN_SUB;
safe_eat_token(&in);
struct err_ast r2 = parse_muldiv(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in };
}
return r;
}
struct err_ast parse_cmp(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_addsub(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == '<' || in->type == '>' || in->type == TOKEN_GEQ ||
in->type == TOKEN_LEQ || in->type == TOKEN_EQL || in->type == TOKEN_NEQ) {
enum ast_bin_type t;
switch (in->type) {
case '<': t = AST_BIN_LTH; break;
case '>': t = AST_BIN_GTH; break;
case TOKEN_GEQ: t = AST_BIN_GEQ; break;
case TOKEN_LEQ: t = AST_BIN_LEQ; break;
case TOKEN_EQL: t = AST_BIN_EQL; break;
case TOKEN_NEQ: t = AST_BIN_NEQ; break;
default: UNREACHABLE();
}
safe_eat_token(&in);
struct err_ast r2 = parse_addsub(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in };
}
return r;
}
struct err_ast parse_andor(struct parser *p, const struct token *in) {
struct err_ast r;
r = parse_cmp(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
while (in->type == TOKEN_AND || in->type == TOKEN_OR) {
enum ast_bin_type t = in->type == '+' ? AST_BIN_ADD : AST_BIN_SUB;
safe_eat_token(&in);
struct err_ast r2 = parse_cmp(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in };
}
return r;
}
struct err_ast parse_primary(struct parser *p, const struct token *in) {
return parse_andor(p, in);
}
struct err_ast parse_stmt(struct parser *p, const struct token *in) {
if (in->type == TOKEN_ARR || in->type == TOKEN_EAR
|| (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "return") == 0)) {
int error = in->type == TOKEN_EAR;
safe_eat_token(&in);
struct err_ast r = parse_expr(p, in);
if (ERR_AST_IS_ERR(r)) return r;
return (struct err_ast){ &ast_ret_new(r.ast, error, &in->where)->base, NULL, r.rest };
}
if (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "if") == 0) {
safe_eat_token(&in);
struct err_ast r_cond = parse_expr(p, in);
if (ERR_AST_IS_ERR(r_cond)) return r_cond;
in = r_cond.rest;
struct err_ast r_then = parse_block(p, in);
if (ERR_AST_IS_ERR(r_then)) {
free_ast(r_cond.ast);
return r_then;
}
in = r_then.rest;
struct ast_base *other = NULL;
if (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "else") == 0) {
safe_eat_token(&in);
struct err_ast r_other = parse_block(p, in);
if (ERR_AST_IS_ERR(r_other)) {
free_ast(r_cond.ast);
free_ast(r_then.ast);
return r_other;
}
in = r_other.rest;
other = r_other.ast;
}
return (struct err_ast){ &ast_trn_new(r_cond.ast, r_then.ast, other, &in->where)->base, NULL, in };
}
return parse_expr(p, in);
}
struct err_ast parse_expr(struct parser *p, const struct token *in) {
if (in->type == '(' || in->type == TOKEN_VAR) {
int body_error;
struct err_ast r = parse_fun(p, in, &body_error);
if (ERR_AST_IS_AST(r)) return r;
if (ERR_AST_IS_ERR(r) && body_error) {
return r;
}
}
if (
in->type == TOKEN_VAR && (
(in + 1)->type == '=' ||
(in + 1)->type == ':' ||
((in + 1)->type >= TOKEN_OPEQ_FIRST_ &&
(in + 1)->type <= TOKEN_OPEQ_LAST_))
) {
struct err_ast r_name = parse_var(p, in);
if (ERR_AST_IS_ERR(r_name)) return r_name;
const char *name = ((struct ast_var*)r_name.ast)->name;
in = r_name.rest;
int is_let = (in->type == ':');
if (is_let) safe_eat_token(&in);
if (in->type != '=' && !is_let) {
enum ast_bin_type t = token_type_opeq_to_ast_bin_type(in->type);
safe_eat_token(&in);
struct err_ast r_value = parse_expr(p, in);
if (ERR_AST_IS_ERR(r_value)) {
free_ast(r_name.ast);
return r_value;
}
return (struct err_ast){
&ast_set_new(name, &ast_bin_new(t, r_name.ast, r_value.ast, &in->where)->base, &in->where)->base,
NULL, r_value.rest
};
} else {
if (in->type != '=') {
return r_name; // not assignment, just a var (TODO: verify that this works)
}
safe_eat_token(&in);
}
struct err_ast r_value = parse_expr(p, in);
if (ERR_AST_IS_ERR(r_value)) {
free_ast(r_name.ast);
return r_value;
}
struct err_ast r;
if (is_let) {
r = (struct err_ast){ &ast_let_new(name, r_value.ast, &in->where)->base, NULL, r_value.rest };
} else {
r = (struct err_ast){ &ast_set_new(name, r_value.ast, &in->where)->base, NULL, r_value.rest };
}
free_ast(r_name.ast);
return r;
}
struct err_ast r = parse_primary(p, in);
if (ERR_AST_IS_ERR(r)) return r;
in = r.rest;
if (in->type == '?') {
safe_eat_token(&in);
struct err_ast r2 = parse_expr(p, in);
if (ERR_AST_IS_ERR(r2)) {
free_ast(r.ast);
return r2;
}
in = r2.rest;
struct err_ast r3 = { /* important! */ NULL };
if (in->type == ':') {
safe_eat_token(&in);
r3 = parse_expr(p, in);
if (ERR_AST_IS_ERR(r3)) {
free_ast(r.ast);
free_ast(r2.ast);
return r3;
}
in = r3.rest;
}
return (struct err_ast){ &ast_trn_new(r.ast, r2.ast, r3.ast, &in->where)->base, NULL, in };
}
return r;
}
enum value_type { VALUE_NIL, VALUE_INT, VALUE_EXT, VALUE_DAT, VALUE_OBJ, VALUE_RET };
struct gc_obj_ctx;
struct gc {
size_t obj_count;
struct gc_obj_base **objs;
};
enum gc_obj_type { GC_OBJ_STR, GC_OBJ_ARR, GC_OBJ_MAP, GC_OBJ_FUN, GC_OBJ_CTX };
struct value;
struct gc_obj_base { enum gc_obj_type type; int marked; };
struct eval_context {
struct gc *gc;
struct gc_obj_ctx *this;
size_t stack_size, stack_capacity;
struct value *stack;
size_t callstack_size, callstack_capacity;
struct callstack_entry *callstack;
};
struct ret_value;
struct value_ext {
struct value (*func)(struct eval_context *ctx, size_t arg_count, struct value *args);
const char *name;
struct where where;
};
struct value {
enum value_type type;
union {
intmax_t int_value;
struct gc_obj_base *obj_value;
void *dat_value;
struct ret_value *ret_value;
struct value_ext ext_value;
};
};
struct callstack_entry {
struct where call_location;
struct value callee;
};
struct ret_value {
int error;
struct value value;
};
struct gc_obj_str { struct gc_obj_base base; char *value; };
struct gc_obj_arr { struct gc_obj_base base; size_t count; struct value *items; };
struct gc_obj_fun {
struct gc_obj_base base;
struct gc_obj_ctx *ctx;
uint32_t arg_count, min_arg_count;
char **args;
struct ast_base *code;
struct where where;
char *name;
};
struct gc_obj_ctx {
struct gc_obj_base base;
struct gc_obj_ctx *parent;
size_t var_count;
struct {
char *name;
struct value value;
} *vars;
};
void gc_free(struct gc *gc) {
if (gc->obj_count > 0) free(gc->objs);
}
void gc_add(struct gc *gc, struct gc_obj_base *o) {
gc->obj_count += 1;
if (gc->obj_count == 1) gc->objs = malloc(sizeof(struct gc_obj_base*));
else gc->objs = realloc(gc->objs, sizeof(struct gc_obj_base*) * gc->obj_count);
gc->objs[gc->obj_count - 1] = o;
}
struct gc_obj_str *gc_obj_str_new(struct gc *gc, char *value) {
struct gc_obj_str *o = MEMALLOC(struct gc_obj_str);
gc_add(gc, &o->base);
o->base.type = GC_OBJ_STR;
o->value = malloc(strlen(value) + 1);
memcpy(o->value, value, strlen(value) + 1);
return o;
}
struct gc_obj_str *gc_obj_str_new_moved(struct gc *gc, char *value) {
struct gc_obj_str *o = MEMALLOC(struct gc_obj_str);
gc_add(gc, &o->base);
o->base.type = GC_OBJ_STR;
o->value = value;
return o;
}
struct gc_obj_arr *gc_obj_arr_new_empty(struct gc *gc) {
struct gc_obj_arr *o = MEMALLOC(struct gc_obj_arr);
gc_add(gc, &o->base);
o->base.type = GC_OBJ_ARR;
o->count = 0;
o->items = NULL;
return o;
}
struct gc_obj_fun *gc_obj_fun_new(
struct gc *gc,
uint32_t arg_count,
uint32_t min_arg_count,
char **args,
struct gc_obj_ctx *ctx,
struct ast_base *code,
const char *name
) {
struct gc_obj_fun *o = MEMALLOC(struct gc_obj_fun);
gc_add(gc, &o->base);
o->base.type = GC_OBJ_FUN;
o->ctx = ctx;
o->code = copy_ast(code);
o->arg_count = arg_count;
o->min_arg_count = min_arg_count;
if (name) {
o->name = malloc(strlen(name) + 1);
memcpy(o->name, name, strlen(name) + 1);
}
else o->name = NULL;
if (o->arg_count > 0) {
o->args = calloc(o->arg_count, sizeof(char*));
for (size_t i = 0; i < o->arg_count; ++i) {
o->args[i] = malloc(strlen(args[i]) + 1);
memcpy(o->args[i], args[i], strlen(args[i]) + 1);
}
} else {
o->args = NULL;
}
return o;
}
struct gc_obj_ctx *gc_obj_ctx_new(struct gc *gc, struct gc_obj_ctx *parent) {
struct gc_obj_ctx *o = MEMALLOC(struct gc_obj_ctx);
gc_add(gc, &o->base);
o->base.type = GC_OBJ_CTX;
o->parent = parent;
o->var_count = 0;
o->vars = NULL;
return o;
}
struct value gc_obj_ctx_get_var(struct gc_obj_ctx *ctx, const char *name) {
for (size_t i = 0; i < ctx->var_count; ++i) {
if (strcmp(ctx->vars[i].name, name) == 0) return ctx->vars[i].value;
}
if (ctx->parent != NULL) return gc_obj_ctx_get_var(ctx->parent, name);
fprintf(stderr, "no such variable: '%s'!\n", name);
exit(1);
}
void gc_obj_ctx_set_var(struct gc_obj_ctx *ctx, const char *name, struct value value) {
for (size_t i = 0; i < ctx->var_count; ++i) {
if (strcmp(ctx->vars[i].name, name) == 0) {
ctx->vars[i].value = value;
return;
}
}
if (ctx->parent != NULL) {
gc_obj_ctx_set_var(ctx->parent, name, value);
return;
}
fprintf(stderr, "no such variable: '%s'! (set)\n", name);
exit(1);
}
void gc_obj_ctx_set_new_var(struct gc_obj_ctx *ctx, const char *name, struct value value) {
for (size_t i = 0; i < ctx->var_count; ++i) {
if (strcmp(ctx->vars[i].name, name) == 0)
ctx->vars[i].value = value;
}
ctx->var_count += 1;
if (ctx->var_count == 1) { ctx->vars = calloc(1, sizeof(*ctx->vars)); }
else { ctx->vars = realloc(ctx->vars, sizeof(*ctx->vars) * ctx->var_count); }
ctx->vars[ctx->var_count - 1].name = malloc(strlen(name) + 1);
memcpy(ctx->vars[ctx->var_count - 1].name, name, strlen(name) + 1);
ctx->vars[ctx->var_count - 1].value = value;
}
void free_gc_obj(struct gc_obj_base *o) {
if (o == NULL) { fprintf(stderr, "free_gc_obj(NULL)!\n"); return; }
switch (o->type) {
case GC_OBJ_STR: {
free(((struct gc_obj_str*)o)->value);
} break;
case GC_OBJ_ARR: {
struct gc_obj_arr *arr = (struct gc_obj_arr*)o;
for (size_t i = 0; i < arr->count; ++i)
if (arr->items[i].type == VALUE_OBJ)
free_gc_obj(arr->items[i].obj_value);
if (arr->count > 0) free(arr->items);
} break;
case GC_OBJ_FUN: {
struct gc_obj_fun *fun = (struct gc_obj_fun*)o;
for (size_t i = 0; i < fun->arg_count; ++i)
free(fun->args[i]);
if (fun->arg_count > 0) free(fun->args);
free_ast(fun->code);
if (fun->name) free(fun->name);
} break;
case GC_OBJ_CTX: {
struct gc_obj_ctx *ctx = (struct gc_obj_ctx*)o;
for (size_t i = 0; i < ctx->var_count; ++i)
free(ctx->vars[i].name);
if (ctx->var_count > 0) free(ctx->vars);
} break;
case GC_OBJ_MAP:
default:
fprintf(stderr, "free: unknown gc object type: %d!\n", o->type);
break;
}
free(o);
}
size_t gc_obj_arr_insert(struct gc_obj_arr *arr, struct value item, size_t idx) {
ASSERT(idx <= arr->count);
arr->count += 1;
if (arr->count == 1) arr->items = malloc(sizeof(struct value));
else arr->items = realloc(arr->items, sizeof(struct value) * arr->count);
memmove(arr->items + idx + 1, arr->items + idx, (arr->count - 1 - idx) * sizeof(*arr->items));
arr->items[idx] = item;
return arr->count;
}
size_t gc_obj_arr_delete(struct gc_obj_arr *arr, struct value item, size_t idx) {
ASSERT(idx < arr->count);
memmove(arr->items + idx, arr->items + idx + 1, (arr->count - 1 - idx) * sizeof(*arr->items));
arr->count -= 1;
if (arr->count == 0) free(arr->items);
else arr->items = realloc(arr->items, sizeof(struct value) * arr->count);
return arr->count;
}
size_t gc_obj_arr_push(struct gc_obj_arr *arr, struct value item) {
return gc_obj_arr_insert(arr, item, arr->count);
}
size_t gc_obj_arr_pop(struct gc_obj_arr *arr, struct value item) {
ASSERT(arr->count > 0);
return gc_obj_arr_insert(arr, item, arr->count);
}
struct outpipe {
void (*putc)(struct outpipe *self, char c);
void (*puts)(struct outpipe *self, const char *s);
};
struct file_outpipe {
struct outpipe base;
FILE *fout;
};
void file_outpipe_putc(struct outpipe *self, char c) {
fputc(c, ((struct file_outpipe*)self)->fout);
}
void file_outpipe_puts(struct outpipe *self, const char *s) {
fputs(s, ((struct file_outpipe*)self)->fout);
}
struct file_outpipe file_outpipe_of(FILE *fout) {
return (struct file_outpipe){
.base.putc = &file_outpipe_putc,
.base.puts = &file_outpipe_puts,
.fout = fout
};
}
struct str_outpipe {
struct outpipe base;
size_t size, alloced;
char *str;
};
void str_outpipe_putc(struct outpipe *self, char c);
void str_outpipe_puts(struct outpipe *self, const char *s);
void str_outpipe_grow(struct str_outpipe *p, size_t size);
void str_outpipe_init(struct str_outpipe *p) {
p->base.putc = &str_outpipe_putc;
p->base.puts = &str_outpipe_puts;
p->alloced = 0;
p->size = 0;
p->str = NULL;
str_outpipe_grow(p, 1);
p->str[0] = '\0';
}
void str_outpipe_free(struct str_outpipe *p) {
if (p->alloced > 0) free(p->str);
}
void str_outpipe_grow(struct str_outpipe *p, size_t size) {
p->size = size;
if (p->size >= p->alloced) {
p->alloced += 64;
if (p->alloced == 64) p->str = malloc(p->alloced);
else p->str = realloc(p->str, p->alloced);
}
}
void str_outpipe_putc(struct outpipe *self, char c) {
struct str_outpipe *p = (struct str_outpipe*)self;
str_outpipe_grow(p, p->size + 1); // p->size >= 2 now.
p->str[p->size - 2] = c; // this was '\0'.
p->str[p->size - 1] = '\0';
}
void str_outpipe_puts(struct outpipe *self, const char *s) {
struct str_outpipe *p = (struct str_outpipe*)self;
size_t old_size = p->size; // >= 1.
str_outpipe_grow(p, p->size + strlen(s));
memcpy(p->str + old_size - 1, s, strlen(s));
p->str[p->size - 1] = '\0';
}
__attribute__((format(printf, 2, 3)))
void printf_topipe(struct outpipe *p, const char *fmt, ...) {
va_list va;
va_start(va, fmt);
int sz = vsnprintf(NULL, 0, fmt, va);
va_end(va);
char buf[sz + 1];
va_start(va, fmt);
vsnprintf(buf, sizeof buf, fmt, va);
va_end(va);
p->puts(p, buf);
}
enum print_flags {
PRINT_FLAG_COLOR = 1 << 0,
PRINT_FLAG_STRUCT = 1 << 1,
};
#define LIT_COLOR(S, F) ((F) & PRINT_FLAG_COLOR ? "\033[33m" S "\033[m" : S)
#define KWD_COLOR(S, F) ((F) & PRINT_FLAG_COLOR ? "\033[35m" S "\033[m" : S)
void print_gc_obj(struct gc_obj_base *o, struct outpipe *fout, enum print_flags flags);
void print_value(struct value v, struct outpipe *fout, enum print_flags flags) {
switch (v.type) {
case VALUE_NIL: printf_topipe(fout, LIT_COLOR("nil", flags)); break;
case VALUE_INT: printf_topipe(fout, LIT_COLOR("%zd", flags), v.int_value); break;
case VALUE_EXT: printf_topipe(fout, "<%s %s>", KWD_COLOR("ext", flags), v.ext_value.name); break;
case VALUE_DAT: printf_topipe(fout, "<%s %p>", KWD_COLOR("dat", flags), v.dat_value); break;
case VALUE_OBJ: print_gc_obj(v.obj_value, fout, flags); break;
case VALUE_RET:
printf_topipe(fout, "<%s %s", KWD_COLOR("ret", flags), v.ret_value->error ? KWD_COLOR("error ", flags) : "");
print_value(v.ret_value->value, fout, flags);
fout->putc(fout, '>');
break;
default: fprintf(stderr, "print: unknown value type: %d!\n", v.type); return;
}
}
int gc_obj_equal(struct gc_obj_base *o1, struct gc_obj_base *o2);
int value_equal(struct value a, struct value b) {
if (a.type != b.type) return 0;
switch (a.type) {
case VALUE_NIL: return 1;
case VALUE_INT: return a.int_value == b.int_value;
case VALUE_EXT: return a.ext_value.func == b.ext_value.func;
case VALUE_DAT: return a.dat_value == b.dat_value;
case VALUE_OBJ: return gc_obj_equal(a.obj_value, b.obj_value);
case VALUE_RET: return 0;
default: fprintf(stderr, "equal: unknown value type: %d!\n", a.type); break;
}
return 0;
}
int gc_obj_equal(struct gc_obj_base *o1, struct gc_obj_base *o2) {
if (o1 == o2) return 1;
if (o1->type != o2->type) return 0;
switch (o1->type) {
case GC_OBJ_STR:
return strcmp(((struct gc_obj_str*)o1)->value, ((struct gc_obj_str*)o2)->value) == 0;
case GC_OBJ_FUN:
return 0;
case GC_OBJ_ARR:
if (((struct gc_obj_arr*)o1)->count != ((struct gc_obj_arr*)o2)->count)
return 0;
for (size_t i = 0; i < ((struct gc_obj_arr*)o1)->count; ++i) {
if (!value_equal(((struct gc_obj_arr*)o1)->items[i], ((struct gc_obj_arr*)o2)->items[i]))
return 0;
}
return 0;
default: fprintf(stderr, "equal: unknown gc object type: %d!\n", o1->type);
}
return 0;
}
static void print_escaped_string_(const char *str, struct outpipe *fout, int color) {
for (; *str; ++str) {
if (!isprint(*str)) {
if (color) fout->puts(fout, "\033[34m");
fout->putc(fout, '\\');
switch (*str) {
case '\n': fout->putc(fout, 'n'); break;
case '\t': fout->putc(fout, 't'); break;
case '\r': fout->putc(fout, 'r'); break;
case '\0': fout->putc(fout, '0'); break;
case '\a': fout->putc(fout, 'a'); break;
case '\"': fout->putc(fout, '"'); break;
case '\'': fout->putc(fout, '\''); break;
case '\\': fout->putc(fout, '\\'); break;
default: printf_topipe(fout, "%02x", *str); break;
}
if (color) fout->puts(fout, "\033[32m");
} else fout->putc(fout, *str);
}
}
void print_gc_obj(struct gc_obj_base *o, struct outpipe *fout, enum print_flags flags) {
switch (o->type) {
case GC_OBJ_STR:
if (flags & PRINT_FLAG_STRUCT) {
fout->puts(fout, flags & PRINT_FLAG_COLOR ? "\033[32m\"" : "\"");
print_escaped_string_(((struct gc_obj_str*)o)->value, fout, flags & PRINT_FLAG_COLOR);
fout->puts(fout, flags & PRINT_FLAG_COLOR ? "\033[m\"" : "\"");
} else {
printf_topipe(fout, "%s", ((struct gc_obj_str*)o)->value);
}
break;
case GC_OBJ_FUN:
if (((struct gc_obj_fun*)o)->name != NULL)
printf_topipe(fout, "<%s %s>", KWD_COLOR("fun", flags), ((struct gc_obj_fun*)o)->name);
else
printf_topipe(fout, "<%s %p>", KWD_COLOR("fun", flags), o);
break;
case GC_OBJ_CTX: printf_topipe(fout, "<%s %p>", KWD_COLOR("ctx", flags), o); break;
case GC_OBJ_ARR:
printf_topipe(fout, LIT_COLOR("#%zu", flags), ((struct gc_obj_arr*)o)->count);
fout->putc(fout, '[');
for (size_t i = 0; i < ((struct gc_obj_arr*)o)->count; ++i) {
print_value(((struct gc_obj_arr*)o)->items[i], fout, flags);
if (i != ((struct gc_obj_arr*)o)->count - 1) fout->puts(fout, ", ");
}
fout->putc(fout, ']');
break;
default: fprintf(stderr, "print: unknown gc object type: %d!\n", o->type); return;
}
}
void gc_mark(struct gc *gc, struct gc_obj_base *o) {
if (o->marked) return;
o->marked = 1;
switch (o->type) {
case GC_OBJ_STR: break;
case GC_OBJ_FUN:
gc_mark(gc, &((struct gc_obj_fun*)o)->ctx->base);
break;
case GC_OBJ_ARR:
for (size_t i = 0; i < ((struct gc_obj_arr*)o)->count; ++i)
if (((struct gc_obj_arr*)o)->items[i].type == VALUE_OBJ)
gc_mark(gc, ((struct gc_obj_arr*)o)->items[i].obj_value);
break;
case GC_OBJ_CTX:
if (((struct gc_obj_ctx*)o)->parent) gc_mark(gc, &((struct gc_obj_ctx*)o)->parent->base);
for (size_t i = 0; i < ((struct gc_obj_ctx*)o)->var_count; ++i)
if (((struct gc_obj_ctx*)o)->vars[i].value.type == VALUE_OBJ)
gc_mark(gc, ((struct gc_obj_ctx*)o)->vars[i].value.obj_value);
break;
case GC_OBJ_MAP:
default:
fprintf(stderr, "gc_mark: unknown gc object type: %d!\n", o->type);
break;
}
}
void gc_sweep(struct gc *gc) {
size_t last = 0;
size_t sweeped = 0;
for (size_t i = 0; i < gc->obj_count; ++i) {
if (gc->objs[i] && !gc->objs[i]->marked) {
free_gc_obj(gc->objs[i]);
gc->objs[i] = NULL;
sweeped += 1;
} else {
gc->objs[i]->marked = 0; // unmark
memmove(&gc->objs[last], &gc->objs[i], sizeof(struct gc_obj_base*));
++last;
}
}
gc->obj_count -= sweeped;
if (SHOULD_PRINT_GC_SWEEPS)
fprintf(stderr, "\n[GC: sweeped %zu objects!]\n", sweeped);
}
void gc_mark_value_(struct gc *gc, struct value val) {
if (val.type == VALUE_OBJ)
gc_mark(gc, val.obj_value);
if (val.type == VALUE_RET && val.ret_value->value.type == VALUE_OBJ)
gc_mark(gc, val.ret_value->value.obj_value);
}
void gc_marksweep(struct gc *gc, struct gc_obj_ctx *ctx, struct eval_context *eval) {
gc_mark(gc, &ctx->base);
for (size_t i = 0; i < eval->stack_size; ++i)
gc_mark_value_(gc, eval->stack[i]);
for (size_t i = 0; i < eval->callstack_size; ++i)
gc_mark_value_(gc, eval->callstack[i].callee);
gc_sweep(gc);
}
void gc_sweepall(struct gc *gc) {
for (size_t i = 0; i < gc->obj_count; ++i) {
free_gc_obj(gc->objs[i]);
}
gc->obj_count = 0;
free(gc->objs);
}
__attribute__((format(printf, 2, 3)))
struct value error_value(struct eval_context *ctx, const char *fmt, ...) {
va_list va;
va_start(va, fmt);
size_t len = vsnprintf(NULL, 0, fmt, va);
va_end(va);
char buf[len + 1];
va_start(va, fmt);
vsnprintf(buf, sizeof buf, fmt, va);
va_end(va);
struct ret_value *ret = calloc(1, sizeof(*ret));
ret->value = (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_str_new(ctx->gc, buf)->base };
ret->error = 1;
return (struct value){ .type = VALUE_RET, .ret_value = ret };
}
struct value concat_strings(struct gc *gc, struct gc_obj_str *lhs, struct gc_obj_str *rhs) {
char *nstr = calloc(1, strlen(lhs->value) + strlen(rhs->value) + 1);
memcpy(nstr, lhs->value, strlen(lhs->value));
memcpy(nstr + strlen(lhs->value), rhs->value, strlen(rhs->value));
return (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_str_new_moved(gc, nstr)->base };
}
static inline void eval_ast(struct eval_context *eval, struct ast_base *n);
void eval_ensure_stack_capacity_(struct eval_context *eval, size_t mincap) {
if (eval->stack_capacity < mincap) {
if (eval->stack_capacity == 0) eval->stack = malloc(sizeof(struct value) * mincap);
else eval->stack = realloc(eval->stack, sizeof(struct value) * mincap);
eval->stack_capacity = mincap;
}
}
void eval_ensure_callstack_capacity_(struct eval_context *eval, size_t mincap) {
if (eval->callstack_capacity < mincap) {
if (eval->callstack_capacity == 0) eval->callstack = malloc(sizeof(struct callstack_entry) * mincap);
else eval->callstack = realloc(eval->callstack, sizeof(struct callstack_entry) * mincap);
eval->callstack_capacity = mincap;
}
}
void eval_free(struct eval_context *eval) {
if (eval->stack_capacity > 0) free(eval->stack);
}
void eval_push(struct eval_context *eval, struct value val) {
eval_ensure_stack_capacity_(eval, ++eval->stack_size);
eval->stack[eval->stack_size - 1] = val;
}
struct value eval_pop(struct eval_context *eval) {
if (eval->stack_size == 0) {
fprintf(stderr, "eval_pop: stack underflow!\n");
FAIL();
}
return eval->stack[--eval->stack_size];
}
struct callstack_entry *eval_push_call(struct eval_context *eval) {
eval_ensure_callstack_capacity_(eval, ++eval->callstack_size);
return &eval->callstack[eval->callstack_size - 1];
}
void eval_pop_call(struct eval_context *eval) {
if (eval->callstack_size == 0) {
fprintf(stderr, "eval_pop_call: stack underflow!\n");
FAIL();
}
--eval->callstack_size;
}
static struct value convert_index(struct eval_context *eval, intmax_t index, size_t count) {
if (index < 0) index += count;
if (UNLIKELY(index >= (intmax_t)count))
return error_value(eval, "array index too big (out of bounds)!");
if (UNLIKELY(index < 0))
return error_value(eval, "array index too small (out of bounds)!");
return (struct value){ .type = VALUE_INT, .int_value = index };
}
struct value eval_invoke(
struct eval_context *eval,
struct value fun,
size_t arg_count,
struct value *args,
struct where *where
) {
struct callstack_entry *call = eval_push_call(eval);
call->call_location = *where;
call->callee = fun;
if (fun.type == VALUE_EXT) {
struct value ret = fun.ext_value.func(eval, arg_count, args);
eval_pop_call(eval);
return ret;
}
if (fun.type != VALUE_OBJ) {
return error_value(eval, "can only invoke functions, arrays and extern functions!");
}
if (fun.obj_value->type == GC_OBJ_ARR) {
if (arg_count != 1 && arg_count != 2) {
return error_value(eval, "invoking arrays has to pass either 1 or 2 arguments!");
}
if (args[0].type != VALUE_INT) {
return error_value(eval, "an array index has to be an integer!");
}
struct value idx = convert_index(eval, args[0].int_value, ((struct gc_obj_arr*)fun.obj_value)->count);
if (idx.type == VALUE_RET) return idx;
struct value *item = &((struct gc_obj_arr*)fun.obj_value)->items[idx.int_value];
if (arg_count == 2) *item = args[1];
eval_pop_call(eval);
return *item;
}
if (fun.obj_value->type == GC_OBJ_FUN) {
struct gc_obj_fun *fo = (struct gc_obj_fun*)fun.obj_value;
if (arg_count < fo->min_arg_count) {
return error_value(eval,
"function expects at least %u arguments, but %zu provided!", fo->min_arg_count, arg_count);
}
struct gc_obj_ctx *nctx = gc_obj_ctx_new(eval->gc, fo->ctx);
size_t f_min_arg_count = arg_count < fo->arg_count ? arg_count : fo->arg_count;
for (size_t i = 0; i < f_min_arg_count; ++i) {
gc_obj_ctx_set_new_var(nctx, fo->args[i], args[i]);
}
if (arg_count < fo->arg_count) {
for (size_t i = arg_count; i < fo->arg_count; ++i) {
gc_obj_ctx_set_new_var(nctx, fo->args[i], (struct value){ .type = VALUE_NIL });
}
}
struct gc_obj_ctx *old_this = eval->this;
eval->this = nctx;
// so that the old this doesn't get sweeped during this call.
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &old_this->base });
eval_ast(eval, fo->code);
gc_marksweep(eval->gc, eval->this, eval);
struct value res = eval_pop(eval);
eval_pop(eval); // pop old this.
eval->this = old_this;
if (res.type == VALUE_RET) {
if (res.ret_value->error) return res;
struct value ret = res.ret_value->value;
free(res.ret_value);
eval_pop_call(eval);
return ret;
} else {
eval_pop_call(eval);
return res;
}
}
// eval_pop_call(eval);
fprintf(stderr, "invoke: not implemented\n");
exit(1);
}
static inline void _eval_ast(struct eval_context *eval, struct ast_base *n);
/** the stack is strictly larger by one after calling this. */
static inline void eval_ast(struct eval_context *eval, struct ast_base *n) {
size_t before = eval->stack_size;
_eval_ast(eval, n);
size_t after = eval->stack_size;
if ((ptrdiff_t)after - (ptrdiff_t)before != 1) {
fprintf(stderr, "eval_ast protocol violation: stack must be strictly larger by 1\n");
fprintf(stderr, " before: %zu\n", before);
fprintf(stderr, " after: %zu\n", after);
FAIL();
}
}
static inline void _eval_ast(struct eval_context *eval, struct ast_base *n) {
#define RETURN_IF_RET(X) if (eval->stack[eval->stack_size - 1].type == VALUE_RET) { X; return; }
switch (n->type) {
case AST_INT:
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = ((struct ast_int*)n)->value });
break;
case AST_VAR:
eval_push(eval, gc_obj_ctx_get_var(eval->this, ((struct ast_var*)n)->name));
break;
case AST_STR:
eval_push(eval, (struct value){ .type = VALUE_OBJ,
.obj_value = &gc_obj_str_new(eval->gc, ((struct ast_str*)n)->value)->base });
break;
case AST_TRN: {
struct ast_trn *trn = (struct ast_trn*)n;
eval_ast(eval, trn->cond); RETURN_IF_RET();
struct value cond = eval_pop(eval);
if (cond.type != VALUE_INT || cond.int_value)
eval_ast(eval, trn->then);
else if (trn->other != NULL)
eval_ast(eval, trn->other);
else
eval_push(eval, (struct value){0});
} break;;
case AST_UNR: switch (((struct ast_unr*)n)->type) {
#define UNARY_OP(OP) { \
eval_ast(eval, ((struct ast_unr*)n)->opr); RETURN_IF_RET(); struct value v = eval_pop(eval); \
if (v.type != VALUE_INT) { \
eval_push(eval, error_value(eval, "the unary %s operator expects numbers as operands!", \
ast_unr_type_to_string(((struct ast_unr*)n)->type))); \
} else { \
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = OP v.int_value }); \
} \
}
case AST_UNR_NOT: UNARY_OP(!); break;
case AST_UNR_NEG: UNARY_OP(-); break;
case AST_UNR_BNEG: UNARY_OP(~); break;
#undef UNARY_OP
default:
fprintf(stderr, "eval: unknown unr operator %d!\n", ((struct ast_unr*)n)->type);
exit(1);
} break;
case AST_BIN: switch (((struct ast_bin*)n)->type) {
case AST_BIN_ADD: {
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET();
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval));
struct value rhs = eval_pop(eval);
struct value lhs = eval_pop(eval);
if (lhs.type == VALUE_INT && rhs.type == VALUE_INT) {
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = lhs.int_value + rhs.int_value });
} else if (lhs.type == VALUE_OBJ && lhs.obj_value->type == GC_OBJ_STR &&
rhs.type == VALUE_OBJ && rhs.obj_value->type == GC_OBJ_STR) {
eval_push(eval, concat_strings(
eval->gc,
(struct gc_obj_str*)lhs.obj_value,
(struct gc_obj_str*)rhs.obj_value));
} else {
eval_push(eval, error_value(eval, "the + operator expects numbers or strings as operands!"));
}
} break;
case AST_BIN_EQL: {
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET();
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval));
struct value rhs = eval_pop(eval);
struct value lhs = eval_pop(eval);
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = value_equal(lhs, rhs) });
} break;
case AST_BIN_NEQ: {
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET();
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval));
struct value rhs = eval_pop(eval);
struct value lhs = eval_pop(eval);
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = !value_equal(lhs, rhs) });
} break;
#define BINARY_OP(OP) { \
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET(); \
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval)); \
struct value rhs = eval_pop(eval); \
struct value lhs = eval_pop(eval); \
if (lhs.type != VALUE_INT || rhs.type != VALUE_INT) { \
eval_push(eval, error_value(eval, "the %s operator expects numbers as operands!", \
ast_bin_type_to_string(((struct ast_bin*)n)->type))); \
} else { \
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = lhs.int_value OP rhs.int_value }); \
} \
}
case AST_BIN_SUB: BINARY_OP(-); break;
case AST_BIN_MUL: BINARY_OP(*); break;
case AST_BIN_DIV: BINARY_OP(/); break;
case AST_BIN_OR: BINARY_OP(||); break;
case AST_BIN_BOR: BINARY_OP(|); break;
case AST_BIN_AND: BINARY_OP(&&); break;
case AST_BIN_BAND: BINARY_OP(&); break;
case AST_BIN_SHL: BINARY_OP(<<); break;
case AST_BIN_SHR: BINARY_OP(>>); break;
case AST_BIN_MOD: BINARY_OP(%); break;
case AST_BIN_GTH: BINARY_OP(>); break;
case AST_BIN_LTH: BINARY_OP(<); break;
case AST_BIN_GEQ: BINARY_OP(>=); break;
case AST_BIN_LEQ: BINARY_OP(<=); break;
#undef BINARY_OP
default:
fprintf(stderr, "eval: unknown bin operator %d!\n", ((struct ast_bin*)n)->type);
exit(1);
} break;
case AST_FUN: {
struct ast_fun *fun = (struct ast_fun*)n;
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_fun_new(
eval->gc,
fun->arg_count, fun->min_arg_count,
fun->args,
eval->this,
fun->body,
NULL
)->base });
} break;
case AST_RET: {
eval_ast(eval, ((struct ast_ret*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval);
struct ret_value *retval = calloc(1, sizeof(*retval));
retval->value = v;
retval->error = ((struct ast_ret*)n)->error;
struct value val = { .type = VALUE_RET, .ret_value = retval };
eval_push(eval, val);
if (retval->error) {
struct callstack_entry *e = eval_push_call(eval);
e->call_location = n->where;
e->callee = val;
}
} break;
case AST_SET: {
eval_ast(eval, ((struct ast_set*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval);
gc_obj_ctx_set_var(eval->this, ((struct ast_set*)n)->name, v);
eval_push(eval, (struct value){0});
} break;
case AST_LET: {
eval_ast(eval, ((struct ast_let*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval);
if (v.type == VALUE_OBJ && v.obj_value->type == GC_OBJ_FUN) {
struct gc_obj_fun *fun = (struct gc_obj_fun *)v.obj_value;
if (fun->name == NULL) {
fun->name = malloc(strlen(((struct ast_let*)n)->name) + 1);
memcpy(fun->name, ((struct ast_let*)n)->name, strlen(((struct ast_let*)n)->name) + 1);
}
}
gc_obj_ctx_set_new_var(eval->this, ((struct ast_let*)n)->name, v);
eval_push(eval, (struct value){0});
} break;
case AST_BLK: {
struct ast_blk *blk = (struct ast_blk*)n;
struct gc_obj_ctx *old_this = eval->this;
eval->this = gc_obj_ctx_new(eval->gc, eval->this);
for (size_t i = 0; i < blk->stmt_count; ++i) {
eval_ast(eval, blk->stmts[i]);
if (eval->stack[eval->stack_size - 1].type == VALUE_RET) break;
if (i < blk->stmt_count - 1) eval_pop(eval);
}
eval->this = old_this;
} break;
case AST_INV: {
struct ast_inv *inv = (struct ast_inv*)n;
eval_ast(eval, inv->fun); RETURN_IF_RET(); struct value fun = eval->stack[eval->stack_size - 1];
struct value *args = inv->arg_count > 0 ? calloc(inv->arg_count, sizeof(struct value)) : NULL;
for (size_t i = 0; i < inv->arg_count; ++i) {
eval_ast(eval, inv->args[i]);
args[i] = eval->stack[eval->stack_size - 1];
if (args[i].type == VALUE_RET) {
for (size_t j = 0; j <= i; ++j) eval_pop(eval); // pop already pushed arguments.
eval_pop(eval); // pop `fun`.
eval_push(eval, args[i]); // push return argument.
free(args);
return;
}
}
struct value res = eval_invoke(eval, fun, inv->arg_count, args, &n->where);
for (size_t i = 0; i < inv->arg_count; ++i) eval_pop(eval); // pop arguments.
if(inv->arg_count > 0) free(args);
eval_pop(eval); // pop `fun`.
eval_push(eval, res);
} break;
case AST_ARR: {
struct ast_arr *arr = (struct ast_arr*)n;
struct gc_obj_arr *obj = gc_obj_arr_new_empty(eval->gc);
for (size_t i = 0; i < arr->elem_count; ++i) {
eval_ast(eval, arr->elems[i]); RETURN_IF_RET();
gc_obj_arr_push(obj, eval->stack[eval->stack_size - 1]);
eval_pop(eval);
}
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &obj->base });
} break;
default:
fprintf(stderr, "eval: unknown ast type: %d!\n", n->type);
abort();
// exit(1);
}
}
static void wse_std_print(struct eval_context *ctx, FILE *fout, size_t arg_count, struct value *args) {
struct file_outpipe foutp = file_outpipe_of(fout);
for (size_t i = 0; i < arg_count; ++i) {
print_value(args[i], &foutp.base, PRINT_FLAG_COLOR);
if (i != arg_count - 1) foutp.base.putc(&foutp.base, ' ');
}
foutp.base.putc(&foutp.base, '\n');
}
static struct gc_obj_base *wse_std_tostring(struct eval_context *ctx, struct value val) {
struct str_outpipe pipe;
str_outpipe_init(&pipe);
print_value(val, &pipe.base, 0);
struct gc_obj_str *res = gc_obj_str_new(ctx->gc, pipe.str);
str_outpipe_free(&pipe);
return &res->base;
}
struct value wse_std_tostring_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count != 1) {
return error_value(ctx, "tostring expects exactly one argument (%zu given)!\n", arg_count);
}
return (struct value){ .type = VALUE_OBJ, .obj_value = wse_std_tostring(ctx, args[0]) };
}
struct value wse_std_print_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
wse_std_print(ctx, stdout, arg_count, args);
return (struct value){0};
}
struct value wse_std_fprint_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count == 0) {
return error_value(ctx, "fprint expects at least one argument (the file)!\n");
}
if (args[0].type != VALUE_DAT) {
return error_value(ctx, "fprint expects the first argument to be a file pointer!\n");
}
wse_std_print(ctx, args[0].dat_value, arg_count - 1, args + 1);
return (struct value){0};
}
struct value wse_std_arrinsert_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count < 2 || arg_count > 3) {
return error_value(ctx, "arrinsert expected at 2 or 3 arguments (array, item and index)");
}
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_ARR) {
return error_value(ctx, "arrinsert expects the first argument to be an array!\n");
}
struct gc_obj_arr *arr = (struct gc_obj_arr*)args[0].obj_value;
size_t index = arr->count;
if (arg_count == 3) {
if (args[2].type != VALUE_INT) {
return error_value(ctx, "arrinsert expects the third argument to be an integer!\n");
}
struct value val = convert_index(ctx, args[2].int_value, arr->count);
if (val.type == VALUE_RET) return val;
index = val.int_value;
}
size_t ret = gc_obj_arr_insert(arr, args[1], index);
return (struct value){ .type = VALUE_INT, .int_value = ret };
}
struct value wse_std_arrdelete_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count < 1 || arg_count > 2) {
return error_value(ctx, "arrdelete expected 1-2 arguments (array and index)");
}
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_ARR) {
return error_value(ctx, "arrdelete expects the first argument to be an array!\n");
}
struct gc_obj_arr *arr = (struct gc_obj_arr*)args[0].obj_value;
if (arr->count <= 0) {
return error_value(ctx, "cannot arrdelete from empty array.");
}
size_t index = arr->count - 1;
if (arg_count == 2) {
if (args[1].type != VALUE_INT) {
return error_value(ctx, "arrdelete expects the third argument to be an integer!\n");
}
struct value val = convert_index(ctx, args[1].int_value, arr->count);
if (val.type == VALUE_RET) return val;
index = val.int_value;
}
size_t ret = gc_obj_arr_delete((struct gc_obj_arr*)args[0].obj_value, args[1], index);
return (struct value){ .type = VALUE_INT, .int_value = ret };
}
struct value wse_std_countof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count != 1) {
return error_value(ctx, "countof expects one argument (container)");
}
if (args[0].type != VALUE_OBJ) {
return error_value(ctx, "countof expects the argument to be an array, a map or a context!\n");
}
if (args[0].obj_value->type == GC_OBJ_ARR) {
return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_arr*)args[0].obj_value)->count };
}
if (args[0].obj_value->type == GC_OBJ_CTX) {
return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_ctx*)args[0].obj_value)->var_count };
}
return error_value(ctx, "countof: not implemented or argument not a container!\n");
}
struct value wse_std_parentof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count != 1) {
return error_value(ctx, "parentof expects one argument (context)");
}
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_CTX) {
return error_value(ctx, "parentof expects the argument to be a context!\n");
}
return (struct value){ .type = VALUE_OBJ, .obj_value = &((struct gc_obj_ctx*)args[0].obj_value)->parent->base };
}
struct value wse_std_keysof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) {
if (arg_count != 1) {
return error_value(ctx, "keysof expects one argument (container)");
}
if (args[0].type != VALUE_OBJ) {
return error_value(ctx, "keysof expects the argument to be an array, a map or a context!\n");
}
// if (args[0].obj_value->type == GC_OBJ_ARR) {
// return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_arr*)args[0].obj_value)->count };
// }
// if (args[0].obj_value->type == GC_OBJ_CTX) {
// return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_ctx*)args[0].obj_value)->var_count };
// }
return error_value(ctx, "keysof: not implemented or argument not a container!\n");
}
static const char *just_fname_(const char *path) {
const char *chunk = path;
while (1) { // while 1? idk if this is safe
const char *s = chunk;
for (; *s != '/'; ++s) {
if (*s == '\0') return chunk;
}
chunk = s + 1;
}
return chunk; // unreachable? UB lets go
}
static void init_wse_std(struct gc_obj_ctx *ctx) {
#define WHERE_C (struct where){ .fname = just_fname_(__FILE__), .line = __LINE__, .col = 1 }
#define STDFUNC(NAME) (struct value){ .type = VALUE_EXT, .ext_value = { wse_std_##NAME##_bind, #NAME, WHERE_C } }
#define SETSTDFUNC(NAME) gc_obj_ctx_set_new_var(ctx, #NAME, STDFUNC(NAME))
gc_obj_ctx_set_new_var(ctx, "nil", (struct value){ .type = VALUE_NIL });
SETSTDFUNC(tostring);
SETSTDFUNC(print);
SETSTDFUNC(fprint);
gc_obj_ctx_set_new_var(ctx, "stdout", (struct value){ .type = VALUE_DAT, .dat_value = stdout });
gc_obj_ctx_set_new_var(ctx, "stderr", (struct value){ .type = VALUE_DAT, .dat_value = stderr });
gc_obj_ctx_set_new_var(ctx, "stdin", (struct value){ .type = VALUE_DAT, .dat_value = stdin });
SETSTDFUNC(arrinsert);
SETSTDFUNC(arrdelete);
SETSTDFUNC(countof);
SETSTDFUNC(parentof);
SETSTDFUNC(keysof);
#undef SETSTDFUNC
#undef STDFUNC
#undef WHERE_C
}
int run_file(struct eval_context *ectx, const char *path) {
struct lexer lex = {0};
{
FILE *fin = fopen("main.wse", "r");
if (fin == NULL) {
return perror("failed to open file"), 1;
}
lexer_add_from(&lex, fin, "main.wse");
fclose(fin);
}
// for (size_t i = 0; i < lex.tok_count; ++i) {
// print_where(lex.toks[i].where, stdout);
// fputc('\t', stdout);
// print_token(&lex, &lex.toks[i], stdout);
// }
struct parser pars = { .lex = &lex };
int ret = 0;
if (lex.toks->type != TOKEN_EOF) {
struct err_asts r = parse_stmts(&pars, lex.toks);
if (ERR_AST_IS_ERR(r)) {
print_where(r.rest->where, stderr);
fprintf(stderr, "\t\033[1;31mError\033[m: %s\n", r.error);
print_where(r.rest->where, stdout);
printf("\tgot: ");
print_token(&lex, r.rest, stdout);
ret = 1;
} else if (r.rest && r.rest->type != TOKEN_EOF) {
print_where(r.rest->where, stderr);
fprintf(stderr, "\t\033[1;31mError\033[m: expected end of input\n");
ret = 1;
} else {
for (size_t i = 0; i < r.ast_count; ++i) {
eval_ast(ectx, r.asts[i]);
// we can safely free the ast since functions
// (the only things that reference asts)
// copy their code. (we also dont waste memory)
free_ast(r.asts[i]);
r.asts[i] = NULL;
if (ectx->stack[ectx->stack_size - 1].type == VALUE_RET) break;
}
}
for (size_t i = 0; i < r.ast_count; ++i) {
if (r.asts[i] != NULL) free_ast(r.asts[i]);
}
if (r.ast_count > 0) free(r.asts);
}
lexer_free(&lex);
return ret;
}
void print_stacktrace(struct eval_context *eval) {
struct file_outpipe foutp = file_outpipe_of(stderr);
fprintf(stderr, "\033[1mStacktrace\033[m:\n");
for (size_t i = eval->callstack_size; i --> 0; ) {
fprintf(stderr, " \033[1mat\033[m: ");
print_where(eval->callstack[i].call_location, stderr);
fputs("\t", stderr);
print_value(eval->callstack[i].callee, &foutp.base, PRINT_FLAG_COLOR | PRINT_FLAG_STRUCT);
fputs("\n", stderr);
}
fprintf(stderr, " \033[1m-- entry --\033[m\n");
}
int run_file_full(const char *path) {
struct gc gc = {0};
struct gc_obj_ctx *globals = gc_obj_ctx_new(&gc, NULL);
init_wse_std(globals);
struct eval_context ectx = {
.gc = &gc, .this = globals,
.stack_capacity = 0, .stack_size = 0, .stack = NULL
};
int ret = 0;
ret = run_file(&ectx, path);
if (ectx.stack_size > 0 && ectx.stack[ectx.stack_size - 1].type == VALUE_RET) {
if (ectx.stack[ectx.stack_size - 1].ret_value->error) {
struct file_outpipe foutp = file_outpipe_of(stderr);
print_stacktrace(&ectx);
fprintf(stderr, "\033[1;31mError\033[m: ");
print_value(ectx.stack[ectx.stack_size - 1].ret_value->value, &foutp.base, PRINT_FLAG_COLOR);
fprintf(stderr, "\n");
ret = 1;
}
free(ectx.stack[ectx.stack_size - 1].ret_value);
}
eval_free(&ectx);
gc_sweepall(&gc);
gc_free(&gc);
return ret;
}
int cli(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "usage:\n\t%s <fname>\n", argv[0]);
return 1;
}
return run_file_full(argv[1]);
}
int main(int argc, char *argv[]) {
return cli(argc, argv);
}
@coinconclusive
Copy link
Author

coinconclusive commented Apr 24, 2023

Compiling:

Tested only with clang on arch linux. Should work on gcc but im not sure if __builtin_unreachable exists there. (TODO: make it compiler-independent)

Debug

clang wse.c -o wse -Wall -Wpedantic -Wno-format-pedantic -fsanitize=address,undefined -O0 -g

Release

clang wse.c -o wse -Wall -Wpedantic -Wno-format-pedantic -O3

Running:

To run, simply execute the generated executable. If debugging and running unmodified code and if the sanitizer reports something, please comment (with the code that triggered the error)!

./wse <filename>

@coinconclusive
Copy link
Author

coinconclusive commented Apr 25, 2023

Syntax:

The syntax is a combination of javascript, a little bit of go and some rust. [This comment isn't finished yet.]

Comments

Comments start with # and span the whole line. (they only work outside of strings and other literals like that)

Basic values

There are:

  • Strings:
      "This is a string"
      # Escapes are the same as in C
      "\n" # - Newline, ASCII 10
      "\t" # - Tab, ASCII 9
      "\r" # - Carriage Return, ASCII 13
      "\0" # - Nul, ASCII 0
      "\a" # - Bell, ASCII 7
      "\e" # - Escape, ASCII 27
      "\"" # - Literal ", ASCII 34
      "\'" # - Literal ', ASCII 39
      "\\" # - Literal \, ASCII 92
    Octal escapes arent supported yet.
  • Integers:
      1938291 # No hexadecimal and octal for now.
      1_938_291 # You can add _ for clarity. They are ignored.
      1___213_ # This is valid. (the same as 1213)

Syntax sugar:

if

if condition { ... };
if condition { ... } else { ... };

Is equivalent to (but allowed only in statements)

condition ? { ... };
condition ? { ... } : { ... };

return

return value

Is equivalent to (but allowed only in statements)

=> value;

compound assignments

a += 3;
b <<= 4;

Is equivalent to (including other operators of course)

a = a + 3;
b = b << 4;

function arguments

func(a, b) x => { ... };
func() (x, y) => { ... };

Is equivalent to

func(a, b, x => { ... });
func((x, y) => { ... });

Example programs:

Hello World

print("Hello, World!");

Hello World with functions

(x => # function taking one argument. multiple arguments with comma and parens
  fprint(stderr, "Hello, " + tostring(x))
)(12); # invoke the function immediately

Functions, branches and returns

(x => fprint(stderr, "Hello, " + tostring(x)))((() => { # code block in { ... }
  a := 15; # new variable, local to this function (block)
  b := 1; # another variable
  a = a + b;
  a == 16 ? { # ternary operator (if statements are syntax sugar) 'cond ? then : else'
    b = b + a;
    => b # return from *function* (not block) with value
  }; # false branch optional
  1; # returned from block
})());

Same as above, but with syntax sugar

(x => fprint(stderr, "Hello, " + tostring(x)))((() => {
  a := 15;
  b := 1;
  a += b;
  if a == 16 {
    b += a;
    return b;
  };
  1;
})());

Stuff

assert := (e, msg) => !e ? { =!> "assertion error: " + tostring(msg); };
unless := (cond, func) => !cond ? func(cond);
partial := (arg1, func) => arg2 => func(arg1, arg2);

say_hi_to := name => fprint(stderr, "Hello, " + tostring(name));

with := (o, f) => {
	fprint(stderr, "begin with " + tostring(o));
	r := f(o);
	fprint(stderr, "end with " + tostring(o));
	return r;
};

r := with("coinconclusive") name => {
	say_hi_to(name);
	unless(1 == 2) _ => print("universe normal");
	return 2;
};

print(r);

add12 := partial(12) (a, b) => a + b;
print(add12(6));

Iterators

default := (x, d) => x == nil ? d : x;

range := (a, b, ? s) => {
  i := a;
  j := a;
  s = default(s, 1);
  () => { j = i; i += s; j == b ? nil : j }
};

each := (it, fn) => {
  loop := v => v != nil ? { fn(v); loop(it()) };
  loop(it())
};

map := (it, fn) => {
  i := 0; j := 0; v := nil;
  () => { j = i; i + 1; v = it(); v == nil ? nil : fn(v, j) }
};

fold := (it, fn, ? start) => {
  r := start == nil ? it() : start;
  if r == nil { return nil };
  loop := (a, b) => b == nil ? a : loop(fn(a, b), it());
  loop(r, it())
};

arrayof := it => {
  arr := [];
  each(it) e => arrinsert(arr, e);
  arr
};

assert := (e, msg) => {
  if !e {
    =!> "Assertion failed: " + tostring(msg);
  }
};

print(fold(map(range(1, 4)) e => e * e) (a, b) => a + b);

@coinconclusive
Copy link
Author

coinconclusive commented Apr 25, 2023

TODO:

See also the TODOs in the code.

  • add error return (exceptions) support
  • convert eval errors to error returns
  • add error return syntax (=!> value)
  • add if syntax sugar
  • add return syntax sugar
  • allow } or EOI after ; in block/top-level statements
  • add block function argument sugar (f(1, 2) |x| { ... } is f(1, 2, |x| { ...} ))
  • combined assign operators (+=, etc)
  • change function syntax to use arrows ((args, ...) => body)
  • make sure gc actually works
  • array literals ([a, b, c])
  • get/set item from array (arr(index) to get, arr(index, value) to set)
  • array builtins (newsize = arrpusharrinsert(arr, value[, index]), oldvalue = arrpoparrdelete(arr[, index]))
  • pipelines (o |> f(a, b) |> g(x) a => a + 1 is g(f(o, a, b), a => a + 1))
  • io builtins (file = iofopen(fname[, mode]), iofclose(file), data = iofread(file[, n]))
  • os builtins (osargs name?, osenv(name[, value]) name?, osexit([code]))
  • map literals ({ "a": b, c: d })
  • get/set item for map (map(key[, value]))
  • map builtins (oldvalue = mapremove(key))
  • context object creation (new([parent]))
  • field access operator for context objects (ctx.var)
  • field setting for context objects (ctx.var = val) (parser note: finally use the error case)
  • cli
  • modules (mod = import(modpath), mod is a context object)
  • handle recursive imports (module cache)
  • builtins module?
  • methods? |f, x| |*args| f(x, *args)?
  • varargs (|a, *args| f(a, *args))
  • stdlib (stuffs)

@coinconclusive
Copy link
Author

Code

Some insight about the interpreter code itself. This isn't done.

QoL

  • Safe nullable pointer offset. (NULL + x is UB)

    static inline const char *offset_nullable_ptr_safe(const char *ptr, size_t offset) {
      return ptr == NULL ? ptr : ptr + offset;
    }
  • Allocate memory for a type.

    #define MEMALLOC(T) ((T*)calloc(1, sizeof(T)))

Semantic naming

Idk how to call this properly, but basically:

Structs

  • "base class" struct
    struct x_base {
      enum x_type type;
      ...
    };
  • "sub class" struct
    struct x_y {
      struct x_base base;
      ...
    }

Functions

  • New "sub class" struct. Allocates the struct in some way.
    struct x_y *x_y_new([allocator, ]...) {
      struct x_y *x = MEMALLOC(struct x_y);
    }
  • Free a pointer to a "sub class" struct. Does free(x)
    void free_x(struct x_base *x) {
      if (x == NULL) { fprintf(stderr, "free_x(NULL)!\n"); return; }
      switch (x->type) {
        ...
        default: fprintf(stderr, "free: unknown x type: %d!\n", x->type); break;
      }
      free(x);
    }
  • Free internal struct fields
    void x_free(struct x *x) { ... }

Common patterns

Array Free

When freeing arrays, this is how it usually happens

// free individual items
for (size_t i = 0; i < x_count; ++i) {
  free_x(xs[i]);  // or
  x_free(&xs[i]); // or
  free(xs[i]);
}

// free the whole array if it exists
// (the array is NULL when there are no items)
if (x_count > 0) free(xs);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment