Skip to content

Instantly share code, notes, and snippets.

@jstimpfle
Created February 17, 2021 22:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jstimpfle/fdd458f3b956ece1c5efd8bae8b1ede2 to your computer and use it in GitHub Desktop.
Save jstimpfle/fdd458f3b956ece1c5efd8bae8b1ede2 to your computer and use it in GitHub Desktop.
Toy Lisp (work in progress)
#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#define NORETURN __declspec(noreturn)
#else
#error TODO
#endif
#define i32 int32_t
#define i64 int64_t
#define u32 uint32_t
#define u64 uint64_t
enum
{
OBJECT_INT,
OBJECT_STRING,
OBJECT_SYMBOL,
OBJECT_PAIR,
OBJECT_CONS,
};
typedef struct _Object Object;
typedef struct _Symbol Symbol;
typedef struct _Int Int;
typedef struct _String String;
typedef struct _Pair Pair;
typedef struct _Cons Cons;
struct _Object
{
int object_kind;
};
struct _Int
{
int object_kind;
i64 value;
};
struct _String
{
int object_kind;
char *buffer;
int len;
};
struct _Symbol
{
int object_kind;
char *name;
};
struct _Pair
{
int object_kind;
Object *first;
Object *second;
};
struct _Cons
{
int object_kind;
Object *object;
Cons *next;
};
void *LISP_NIL = NULL;
Object *LISP_FALSE = (Object *) &(Int) { OBJECT_INT, 0 };
Object *LISP_TRUE = (Object *) &(Int) { OBJECT_INT, 1 };
void msg_fv(const char *msg, va_list ap)
{
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
}
void msg_f(const char *msg, ...)
{
va_list ap;
va_start(ap, msg);
msg_fv(msg, ap);
va_end(ap);
}
void NORETURN runtime_error_fv(const char *msg, va_list ap)
{
fprintf(stderr, "ERROR: ");
msg_fv(msg, ap);
exit(1);
}
void NORETURN runtime_error(const char *msg, ...)
{
va_list ap;
va_start(ap, msg);
runtime_error_fv(msg, ap);
//va_end(ap);
}
#define LISP_FOREACH(name, cons) \
for (Cons *name = (Cons *) (cons); \
name != LISP_NIL; \
name = name->next)
void lisp_print_object(Object *obj)
{
if (obj == LISP_NIL)
{
printf("NIL");
}
else if (obj->object_kind == OBJECT_INT)
{
printf("%" PRIi64, ((Int *) obj)->value);
}
else if (obj->object_kind == OBJECT_STRING)
{
printf("%s", ((String *) obj)->buffer);
}
else if (obj->object_kind == OBJECT_SYMBOL)
{
printf("'%s'", ((Symbol *) obj)->name);
}
else if (obj->object_kind == OBJECT_CONS)
{
printf("(list");
for (Cons *cons = (Cons *) obj;
cons != LISP_NIL;
cons = cons->next)
{
printf(" ");
lisp_print_object(cons->object);
}
printf(")");
}
else if (obj->object_kind == OBJECT_PAIR)
{
printf("(pair)");
}
else
{
runtime_error("Invalid object kind");
}
}
Object *lisp_head(Object *obj)
{
assert(obj != LISP_NIL);
assert(obj->object_kind == OBJECT_CONS);
return ((Cons *) obj)->object;
}
Object *lisp_tail(Object *obj)
{
assert(obj != LISP_NIL);
assert(obj->object_kind == OBJECT_CONS);
return (Object *) ((Cons *) obj)->next;
}
int lisp_length(Object *obj)
{
if (obj == LISP_NIL)
{
return 0;
}
assert(obj->object_kind == OBJECT_CONS);
int n = 0;
for (Cons *cons = (Cons *) obj;
cons != NULL;
cons = cons->next)
{
++ n;
}
return n;
}
i64 lisp_get_int(Object *obj)
{
assert(obj != LISP_NIL);
assert(obj->object_kind == OBJECT_INT);
return ((Int *) obj)->value;
}
void *_lisp_alloc(size_t size)
{
void *ptr = calloc(1, size);
if (ptr == NULL)
{
runtime_error("OOM");
}
return ptr;
}
#define lisp_alloc(type) ((type *) _lisp_alloc(sizeof (type)))
Object *lisp_alloc_int(i64 value)
{
Int *result = lisp_alloc(Int);
result->object_kind = OBJECT_INT;
result->value = value;
return (Object *) result;
}
Object *lisp_alloc_string(const char *buffer, int len)
{
String *result = lisp_alloc(String);
result->object_kind = OBJECT_STRING;
result->buffer = _lisp_alloc(len + 1);
memcpy(result->buffer, buffer, len);
result->buffer[len] = 0;
result->len = len;
return (Object *) result;
}
Object *lisp_alloc_symbol(const char *name, int len)
{
Symbol *result = lisp_alloc(Symbol);
result->object_kind = OBJECT_SYMBOL;
result->name = _lisp_alloc(len);
memcpy(result->name, name, len);
return (Object *) result;
}
Object *builtin_sym_eq(Object *a, Object *b)
{
assert(a->object_kind == OBJECT_SYMBOL);
assert(b->object_kind == OBJECT_SYMBOL);
if (!strcmp(((Symbol *) a)->name,
((Symbol *) b)->name))
{
return LISP_TRUE;
}
else
{
return LISP_FALSE;
}
}
Object *lisp_eq(Object *a, Object *b)
{
if (a == LISP_NIL || b == LISP_NIL)
{
return a == b ? LISP_TRUE : LISP_FALSE;
}
if (a->object_kind != b->object_kind)
{
return LISP_FALSE;
}
if (a->object_kind == OBJECT_SYMBOL)
{
return builtin_sym_eq(a, b);
}
else if (a->object_kind == OBJECT_INT)
{
return ((Int *) a)->value == ((Int *) b)->value ? LISP_TRUE : LISP_FALSE;
}
else if (a->object_kind == OBJECT_STRING)
{
String *sa = (String *) a;
String *sb = (String *) b;
if (sa->len != sb->len)
return LISP_FALSE;
if (memcmp(sa->buffer, sb->buffer, sa->len) != 0)
return LISP_FALSE;
return LISP_TRUE;
}
else if (a->object_kind == OBJECT_PAIR)
{
Pair *pa = (Pair *) a;
Pair *pb = (Pair *) b;
if (lisp_eq(pa->first, pb->first) == LISP_FALSE)
{
return LISP_FALSE;
}
return lisp_eq(pa->second, pb->second);
}
else if (a->object_kind == OBJECT_CONS)
{
Cons *ca = (Cons *) a;
Cons *cb = (Cons *) b;
if (lisp_eq(ca->object, cb->object) == LISP_FALSE)
{
return LISP_FALSE;
}
return lisp_eq((Object *) ca->next, (Object *) cb->next);
}
else
{
runtime_error("Invalid object kind");
}
}
Cons *lisp_cons(Object *a, Cons *b)
{
Cons *cons = lisp_alloc(Cons);
cons->object_kind = OBJECT_CONS;
cons->object = a;
cons->next = b;
return cons;
}
typedef struct _Env Env;
struct _Env
{
Env *parent;
Cons *list;
};
Pair *env_find(Env *env, Symbol *sym)
{
for (; env != NULL; env = env->parent)
{
for (Cons *item = env->list;
item != LISP_NIL;
item = item->next)
{
Object *opair = item->object;
//printf("opair: "); lisp_print_object(opair); printf("\n");
assert(opair != NULL && opair->object_kind == OBJECT_PAIR);
Pair *pair = (Pair *) opair;
Object *first = pair->first;
//printf("first: "); lisp_print_object(first); printf("\n");
assert(first->object_kind == OBJECT_SYMBOL);
if (builtin_sym_eq(first, (Object *) sym) == LISP_TRUE)
{
return pair;
}
}
}
return NULL;
}
void env_add(Env *env, Object *osym, Object *obj)
{
{
assert(osym->object_kind == OBJECT_SYMBOL);
Symbol *sym = (Symbol *) osym;
Pair *found = NULL;
// look for binding - but only in current env.
// XXX code is duplicated from above
for (Cons *item = env->list;
item != LISP_NIL;
item = item->next)
{
Object *opair = item->object;
assert(opair != NULL && opair->object_kind == OBJECT_PAIR);
Pair *pair = (Pair *) opair;
Object *first = pair->first;
assert(first->object_kind == OBJECT_SYMBOL);
if (builtin_sym_eq(first, (Object *) sym) == LISP_TRUE)
{
found = pair;
}
}
if (found != NULL)
{
runtime_error("Binding '%s' already present in env", sym->name);
}
}
Pair *pair = lisp_alloc(Pair);
pair->object_kind = OBJECT_PAIR;
pair->first = osym;
pair->second = obj;
env->list = lisp_cons((Object *) pair, env->list);
}
void env_set(Env *env, Symbol *sym, Object *obj)
{
Pair *pair = env_find(env, sym);
if (pair == NULL)
{
runtime_error("No binding '%s' in env", sym->name);
}
pair->second = obj;
}
Object *env_get(Env *env, Symbol *sym)
{
Pair *pair = env_find(env, sym);
if (pair == NULL)
{
runtime_error("No binding '%s' in env", sym->name);
}
//printf("found key "); lisp_print_object((Object *) sym);
//printf(", return value: "); lisp_print_object(pair->second); printf("\n");
return pair->second;
}
int curchar;
void next_char(void)
{
int c = fgetc(stdin);
curchar = (c == EOF) ? -1 : c;
}
void NORETURN parse_error_f(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
runtime_error_fv(fmt, ap);
//va_end(ap);
}
Cons **current_list_end;
void parse_expr(void)
{
Object *result = NULL;
for (;;)
{
if (curchar == ')')
{
//XXX this is a weird hack. Should probably go.
return;
}
if (curchar == -1)
{
// same
return;
}
if (curchar > ' ')
{
break;
}
next_char();
}
if (curchar == '(')
{
next_char();
Cons **old = current_list_end;
Cons *list = NULL;
current_list_end = &list;
for (;;)
{
if (curchar == -1)
{
parse_error_f("EOF while looking for closing ')'");
}
if (curchar == ')')
{
next_char();
break;
}
parse_expr();
}
current_list_end = old;
result = (Object *) list;
}
else if ('0' <= curchar && curchar <= '9')
{
i64 value = 0;
for (;;)
{
value = 10 * value + curchar - '0';
next_char();
if (curchar < '0' || curchar > '9')
{
break;
}
}
result = lisp_alloc_int(value);
}
else if (curchar == '"')
{
char buf[128];
int n = 0;
int escaped = 0;
for (;;)
{
next_char();
if (curchar == -1)
{
runtime_error("Found EOF when looking for closing '\"'");
}
if (!escaped && curchar == '"')
{
next_char();
break;
}
if (!escaped && curchar == '\\')
{
escaped = 1;
continue;
}
int c;
if (escaped)
{
escaped = 0;
if (curchar == 'n')
{
c = '\n';
}
else
{
c = curchar;
}
}
else
{
c = curchar;
}
if (n + 1 == sizeof buf)
{
runtime_error("String literal too long!");
}
buf[n] = (char) c;
++ n;
}
buf[n] = 0;
result = lisp_alloc_string(buf, n);
}
else if (('a' <= curchar && curchar <= 'z')
|| ('A' <= curchar && curchar <= 'Z'))
{
char buf[32];
int n = 0;
for (;;)
{
if (n == sizeof buf - 1)
{
runtime_error("Max symbol size is %d", (int) sizeof buf - 1);
}
buf[n] = (char) curchar;
++ n;
next_char();
if (!(curchar == '-'
|| ('a' <= curchar && curchar <= 'z')
|| ('A' <= curchar && curchar <= 'Z')
|| ('0' <= curchar && curchar <= '9')))
{
break;
}
}
buf[n] = 0;
++n;
result = lisp_alloc_symbol(buf, n);
}
else
{
parse_error_f("Can't parse expression starting with: %d (%c)",
curchar, curchar);
}
printf("parsed object: ");
lisp_print_object(result);
printf("\n");
Cons *cons = lisp_cons(result, LISP_NIL);
*current_list_end = cons;
current_list_end = &cons->next;
}
int lisp_symbol_is_string(Object *obj, const char *name)
{
assert(obj->object_kind == OBJECT_SYMBOL);
return !strcmp(((Symbol *) obj)->name, name);
}
Env *current_env;
Object *lisp_eval(Object *obj);
Cons *lisp_eval_list(Object *obj)
{
assert(obj == LISP_NIL || obj->object_kind == OBJECT_CONS);
if (obj == LISP_NIL)
{
return LISP_NIL;
}
Cons *next = lisp_eval_list(lisp_tail(obj));
Object *value = lisp_eval(lisp_head(obj));
return lisp_cons(value, next);
}
Object *lisp_eval(Object *obj);
Object *lisp_eval_quasiquote(Object *obj)
{
if (obj == LISP_NIL)
{
return obj;
}
if (obj->object_kind == OBJECT_CONS)
{
Object *first = lisp_head(obj);
if (first->object_kind == OBJECT_SYMBOL) {
if (lisp_symbol_is_string(first, "unquote")) {
Object *rest = lisp_tail(obj);
assert(lisp_length(rest) == 1);
Object *out = lisp_eval(lisp_head(rest));
return out;
}
}
// if it wasn't an "unquote", map lisp_eval_quasiquote to this list.
// TODO: I think we should have a lisp_map() function
Cons *out = NULL;
Cons **list_end = &out;
for (Cons *item = (Cons *) obj;
item != LISP_NIL;
item = item->next)
{
*list_end = lisp_cons(lisp_eval_quasiquote(item->object), NULL);
list_end = &(*list_end)->next;
}
return (Object *) out;
}
else {
// XXX: what about OBJECT_PAIR? We should probably just remove
// it!
return obj;
}
}
Object *lisp_eval(Object *obj)
{
if (obj == LISP_NIL)
{
return obj;
}
else if (obj->object_kind == OBJECT_INT)
{
return obj;
}
else if (obj->object_kind == OBJECT_STRING)
{
return obj;
}
else if (obj->object_kind == OBJECT_CONS)
{
Object *first = lisp_eval(lisp_head(obj));
Object *rest = lisp_tail(obj);
if (first->object_kind == OBJECT_SYMBOL)
// Check if this is a special form
{
if (lisp_symbol_is_string(first, "if"))
{
assert(lisp_length(rest) == 3);
Object *a = lisp_head(rest);
rest = lisp_tail(rest);
Object *b = lisp_head(rest);
rest = lisp_tail(rest);
Object *c = lisp_head(rest);
if (lisp_eval(a) == LISP_TRUE)
{
return lisp_eval(b);
}
else
{
return lisp_eval(c);
}
}
else if (lisp_symbol_is_string(first, "eval"))
{
assert(lisp_length(rest) == 1);
return lisp_eval(lisp_eval(lisp_head(rest)));
}
else if (lisp_symbol_is_string(first, "quote"))
{
assert(lisp_length(rest) == 1);
return lisp_head(rest);
}
else if (lisp_symbol_is_string(first, "quasiquote")) {
assert(lisp_length(rest) == 1);
return lisp_eval_quasiquote(lisp_head(rest));
}
else if (lisp_symbol_is_string(first, "unquote")) {
runtime_error("unquote used in non-quoted region");
}
if (lisp_symbol_is_string(first, "lambda"))
{
return obj;
}
if (lisp_symbol_is_string(first, "macro"))
{
return obj;
}
else if (lisp_symbol_is_string(first, "add"))
{
assert(lisp_length(rest) == 2);
Object *a = lisp_eval(lisp_head(rest));
rest = lisp_tail(rest);
Object *b = lisp_eval(lisp_head(rest));
assert(a->object_kind == OBJECT_INT);
assert(b->object_kind == OBJECT_INT);
i64 value = lisp_get_int(a) + lisp_get_int(b);
return lisp_alloc_int(value);
}
else if (lisp_symbol_is_string(first, "eq"))
{
assert(lisp_length(rest) == 2);
Object *a = lisp_eval(lisp_head(rest));
rest = lisp_tail(rest);
Object *b = lisp_eval(lisp_head(rest));
return lisp_eq(a, b);
}
else if (lisp_symbol_is_string(first, "print"))
{
assert(lisp_length(rest) == 1);
Object *value = lisp_eval(lisp_head(rest));
lisp_print_object(value); printf("\n");
return LISP_NIL;
}
else if (lisp_symbol_is_string(first, "do"))
{
Object *out = LISP_NIL;
for (Cons *item = (Cons *) rest;
item != LISP_NIL;
item = item->next)
{
out = lisp_eval(item->object);
}
return out;
}
else if (lisp_symbol_is_string(first, "define"))
{
assert(lisp_length(rest) == 2);
Object *name = lisp_head(rest);
rest = lisp_tail(rest);
Object *value = lisp_head(rest);
env_add(current_env, name, lisp_eval(value));
return LISP_NIL;
}
else if (lisp_symbol_is_string(first, "head"))
{
assert(lisp_length(rest) == 1);
Object *a = lisp_eval(lisp_head(rest));
assert(a == LISP_NIL || a->object_kind == OBJECT_CONS);
return lisp_head(a);
}
else if (lisp_symbol_is_string(first, "tail"))
{
assert(lisp_length(rest) == 1);
Object *a = lisp_eval(lisp_head(rest));
assert(a == LISP_NIL || a->object_kind == OBJECT_CONS);
return lisp_tail(a);
}
else if (lisp_symbol_is_string(first, "cons"))
{
assert(lisp_length(rest) == 2);
Object *a = lisp_eval(lisp_head(rest));
rest = lisp_tail(rest);
Object *b = lisp_eval(lisp_head(rest));
assert(b == LISP_NIL || b->object_kind == OBJECT_CONS);
return (Object *) lisp_cons(a, (Cons *) b);
}
else if (lisp_symbol_is_string(first, "list"))
{
return (Object *) lisp_eval_list(rest);
}
}
// Otherwise, this is a function call.
//
// Procedure for evaluation:
//
// - Evaluate first arg and examine:
// - If function, evaluate all remaining args and apply them.
// - Else if macro, apply macro to all remaining args unevaluated
// - Else, runtime error.
//
{
//printf("first here: "); lisp_print_object(first); printf("\n");
assert(first->object_kind == OBJECT_CONS && "lambda expected");
assert(lisp_length(first) == 3);
Object *lambda = lisp_head(first);
//printf("object_kind: %d, ", lambda->object_kind); lisp_print_object(lambda); printf("\n");
assert(lambda->object_kind == OBJECT_SYMBOL);
int islambda = lisp_symbol_is_string(lambda, "lambda");
int ismacro = lisp_symbol_is_string(lambda, "macro");
assert(islambda || ismacro);
first = lisp_tail(first);
Object *params = lisp_head(first);
first = lisp_tail(first);
Object *body = lisp_head(first);
Object *args = rest;
assert(args == LISP_NIL || args->object_kind == OBJECT_CONS);
Env *env = lisp_alloc(Env);
env->parent = current_env;
assert(lisp_length(args) == lisp_length(params));
while (params)
{
assert(args);
Object *arg = lisp_head(args);
Object *value = islambda ? lisp_eval(arg) : arg;
env_add(env, lisp_head(params), value);
params = lisp_tail(params);
args = lisp_tail(args);
}
current_env = env;
Object *out = lisp_eval(body);
current_env = env->parent;
return out;
}
}
else if (obj->object_kind == OBJECT_SYMBOL)
{
// Check if it is a prim, in which case no env-lookup is needed.
static const char *const prims[] = {
"if", "eval", "quote", "quasiquote", "unquote", "add", "eq",
"lambda", "macro", "print", "do", "define", "head",
"tail", "cons", "list"
};
for (int i = 0;
i < (int) (sizeof prims / sizeof prims[0]);
++ i)
{
if (lisp_symbol_is_string(obj, prims[i]))
{
return obj;
}
}
// Otherwise, look up from env.
return env_get(current_env, (Symbol *) obj);
}
else
{
runtime_error("Can't evaluate object kind #%d", obj->object_kind);
}
}
int main(void)
{
Cons *list = LISP_NIL;
current_list_end = &list;
current_env = lisp_alloc(Env);
env_add(current_env, lisp_alloc_symbol("NIL", 3), LISP_NIL);
next_char();
while (curchar != -1)
{
parse_expr();
}
printf("\n\n\n");
for (Cons *item = list;
item != LISP_NIL;
item = item->next)
{
Object *result = lisp_eval(item->object);
(void) result;
//lisp_print_object(result); printf("\n");
}
return 0;
}
(define section
(lambda (name)
(do
(print "")
(print name)
(print "-----------------"))))
(define x 3)
(section "Basic tests")
(print x)
(print NIL)
(print (eq 3 4))
(print (eq 3 3))
(if (eq x x) (print 42) (print 43))
(if (eq 3 4) (print 42) (print 43))
(print (if (eq x x) 42 43))
(print (if (eq 3 4) 42 43))
(print (eq 3 NIL))
(print (eq NIL NIL))
(section "MACROS")
(define ftwice (lambda (x) (list x x)))
(define mtwice (macro (x) (do (eval x) (eval x))))
(define mtwice2 (macro (x)
(eval (quasiquote
(do
(unquote x)
(unquote x))))))
(do
(ftwice (print 17))
(print 0)
(mtwice (print 18))
(print 0)
(mtwice2 (print 19))
(print 0))
(section "Mapping / Filtering")
(define foreach
(lambda (xs f)
(if (eq xs NIL)
NIL
(do (f (head xs)) (foreach (tail xs) f)))))
(foreach (list 57 58 59) print)
(foreach (list 57 58 59) (lambda (x) (print (add x x))))
(define filter
(lambda (xs f)
(if (eq NIL xs)
NIL
(do
(define x (head xs))
(define rest (filter (tail xs) f))
(if (f x) (cons x rest) rest)))))
(print (filter (list 0 1 2 3) (lambda (x) (eq x 2))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment