Blog 2020/1/11
<- previous | index | next ->
Let's learn how to write a Lisp interpreter in C!
In part 8, we'll implement an environment.
An environment can be built using any associative data structure.
Though the typical choice is a hash table, we will instead start with something which is a bit simpler to implement: the association list.
We can build an association list (or a-list) using nothing but our linked-list objects!
Visually, here is a pointer to an a-list which associates the key x
to the value 42
and pi
to 3.14159
:
Note: though we could make this slightly more efficient by storing the key and value as a dotted pair, I have decided to avoid basing this Lisp implementation on Cons cells. Later, we will ditch the a-list for a hash table anyway.
Our a-list implementation consists of two functions: alist_push()
and alist_lookup()
:
forms.c (error handling elided):
/* Creates a new key-value pair and pushes it onto the A-list.
Returns 0 or error. */
int alist_push(List** alistpp, Symbol* keyp, Form* valuep) {
List* alistp = *alistpp;
/* construct a key-value "pair" (list) */
List* vlp;
new_list(&vlp, valuep);
List* kvlp;
new_list(&kvlp, (Form*)keyp);
kvlp->nextp = vlp;
List* newheadp;
new_list(&newheadp, (Form*)kvlp);
newheadp->nextp = alistp;
*alistpp = newheadp;
return 0;
}
/* Returns the value which corresponds to keyp.
Returns a Form or NULL. */
Form* alist_lookup(List* alistp, Symbol* keyp) {
while (true) {
if (is_list_empty(alistp)) {
return NULL;
} else {
List* kvlp = (List*)(alistp->datap);
if (form_eq(kvlp->datap, (Form*)keyp)) {
return kvlp->nextp->datap;
} else {
alistp = alistp->nextp;
continue;
}
}
}
}
In order to lookup values from an a-list, we need to be able to check if two keys (symbols) are equal.
But rather than implementing equality on symbols specifically, we'll go ahead and implement it for forms in general:
/* Are a and b equal? */
bool form_eq(Form* a, Form* b) {
if (a->type != b->type) {
return false;
} else if (is_clong(a)) {
CLong* al = (CLong*)a;
CLong* bl = (CLong*)b;
return al->value == bl->value;
} else if (is_cdouble(a)) {
CDouble* ad = (CDouble*)a;
CDouble* bd = (CDouble*)b;
return ad->value == bd->value;
} else if (is_cstring(a)) {
CString* as = (CString*)a;
CString* bs = (CString*)b;
return (strcmp(as->valuep, bs->valuep) == 0);
} else if (is_symbol(a)) {
Symbol* as = (Symbol*)a;
Symbol* bs = (Symbol*)b;
return (strcmp(as->valuep, bs->valuep) == 0);
} else if (is_list(a)) {
List* al = (List*)a;
List* bl = (List*)b;
while (true) {
if (is_list_empty(al) && is_list_empty(bl)) {
return true;
} else if (!is_list_empty(al) && !is_list_empty(bl)) {
if (form_eq(al->datap, bl->datap)) {
al = al->nextp;
bl = bl->nextp;
continue;
} else {
return false;
}
} else {
return false;
}
}
} else {
assert(false);
}
}
An environment is a stack of nested scopes (a-lists).
If we were to evaluate the following lambda:
((lambda (a b) (+ a b)) 1 2)
this would create a local scope
where a
was bound to 1
and b
was bound to 2
.
This also presumes a global scope
where +
is bound to some sort of buit-in function.
The local scope is stacked on top of the global scope, and shadows any conflicting keys.
We can also implement this environment / stack of nested scopes as a linked list!
Visually, here is a pointer to such an environment:
Our environment implementation will consist of env_bind()
, env_lookup()
, and env_push_scope()
:
forms.c (error handling elided):
/* Push a new key-value pair onto the environment.
Returns 0 or error. */
int env_bind(List* envp, Symbol* keyp, Form* valuep) {
assert(!is_list_empty(envp));
List* alistp = (List*)(envp->datap);
alist_push(&alistp, keyp, valuep);
envp->datap = (Form*)alistp;
return 0;
}
/* Recursively looks up the value from the environment stack.
Returns a Form or NULL. */
Form* env_lookup(List* envp, Symbol* keyp) {
List* alistp;
while (!is_list_empty(envp)) {
alistp = (List*)(envp->datap);
Form* valuep = alist_lookup(alistp, keyp);
if (valuep) {
return valuep;
} else {
envp = envp->nextp;
continue;
}
}
return NULL;
}
/* Pushes a new scope onto envpp.
Returns 0 or error. */
int env_push_scope(List** envpp) {
List* envp = *envpp;
List* alistp = g_emptylist;
List* newheadp;
new_list(&newheadp, (Form*)alistp);
newheadp->nextp = envp;
*envpp = newheadp;
return 0;
}
While atoms still evaluate to themselves, we add a special-case for symbols, which are looked up in the environment:
/* Evaluates formp into resultpp.
Returns 0. */
-int eval_form(Form* formp, Form** resultpp) {
- /* for now, all forms evaluate to themselves. */
- if (is_symbol(formp) || is_clong(formp) || is_cdouble(formp)
- || is_cstring(formp) || is_list(formp))
- {
+int eval_form(Form* formp, Form** resultpp, List* envp) {
+ /* literals evaluate to themselves. */
+ if (is_clong(formp) || is_cdouble(formp) || is_cstring(formp)) {
+ *resultpp = formp;
+ return 0;
+
+ /* symbols. */
+ } else if (is_symbol(formp)) {
+ Symbol* symp = (Symbol*)formp;
+ return eval_symbol(symp, resultpp, envp);
+
+ /* for now, lists evaluate to themselves. */
+ } else if (is_list(formp)) {
*resultpp = formp;
return 0;
Note that we also had to update the signature of eval_form()
to take an environment as an argument:
-int eval_form(Form* formp, Form** resultpp);
+int eval_form(Form* formp, Form** resultpp, List* envp);
And eval_symbol()
simply calls env_lookup()
:
/* Evaluates symp into resultpp.
Returns 0 or error. */
static int eval_symbol(Symbol* symp, Form** resultpp, List* envp) {
/* symbols evaluate to their corresponding value from the env. */
Form* valuep = env_lookup(envp, symp);
if (valuep == NULL) {
return E_eval_symbol__unbound;
} else {
*resultpp = valuep;
return 0;
}
}
repl()
also needs to take an environment, which it passes to eval_form()
:
/* Starts a read-eval-print loop.
Loops until EOF or I/O error.
Returns 0 or error. */
-int repl() {
+int repl(List* envp) {
FBuff* fbp;
int err = new_fbuff(&fbp, stdin);
if (err) {
@@ -80,7 +80,7 @@
/* evaluate the form. */
Form* resultp;
- err = eval_form(formp, &resultp);
+ err = eval_form(formp, &resultp, envp);
if (err) {
int err2 = fprintf(stderr, "Error %d.\n", err);
if (err2 < 0) {
We update main()
to create a default or global environment.
int main() {
list_main();
- int err = repl();
+ List* envp = new_default_env();
+ int err = repl(envp);
if (err) {
fprintf(stderr, "Error %d.\n", err);
return err;
We will hard-code a few values into our environment so that we can test it out.
/* Creates a default environment.
Asserts on error. */
List* new_default_env() {
List* envp = g_emptylist;
assert(env_push_scope(&envp) == 0);
assert(env_bind_clong(envp, "the-answer", 42) == 0);
assert(env_bind_cdouble(envp, "pi", 3.14159) == 0);
assert(env_bind_cstring(envp, "greeting", "Hello, world!") == 0);
return envp;
}
Note that we assert()
the success of these bind calls. If our Lisp interpreter can't even establish a global environment, we aren't interested in trying to recover from that.
We also add a few convenience functions for binding atoms: env_bind_clong()
, env_bind_cdouble()
, and env_bind_cstring()
:
forms.c (error handling elided):
/* Convenience function to push a long into the env.
Returns 0 or error. */
int env_bind_clong(List* envp, char* key, long value) {
Symbol* keyp;
new_symbol(&keyp, key);
CLong* valuep;
new_clong(&valuep, value);
return env_bind(envp, keyp, (Form*)valuep);
}
/* Convenience function to push a double into the env.
Returns 0 or error. */
int env_bind_cdouble(List* envp, char* key, double value) {
Symbol* keyp;
new_symbol(&keyp, key);
CDouble* valuep;
new_cdouble(&valuep, value);
return env_bind(envp, keyp, (Form*)valuep);
}
/* Convenience function to push a string into the env.
Returns 0 or error. */
int env_bind_cstring(List* envp, char* key, char* value) {
Symbol* keyp;
new_symbol(&keyp, key);
CString* valuep;
new_cstring(&valuep, value);
return env_bind(envp, keyp, (Form*)valuep);
}
$ ./lisp
> the-answer
42
> pi
3.141590
> greeting
"Hello, world!"
>
Great! We have a working environment.
In part 9, we'll add nil and boolean types, and create environment bindings for their (singleton) values.