Blog 2020/1/10
<- previous | index | next ->
Let's learn how to write a Lisp interpreter in C!
In part 6, we put the 'L' into Lisp: support for lists!
We create a struct
to represent lists:
struct List_ {
FormType type;
Form* datap;
struct List_* nextp;
};
typedef struct List_ List;
and update FormType
:
TypeCDouble = 30,
TypeCString = 40,
+ TypeList = 100,
};
typedef enum FormType_ FormType;
We use a global singleton object to represent the empty list:
extern List* g_emptylist;
We can check if a list is empty by simple pointer comparison:
/* Is listp the empty list? */
bool is_list_empty(List* lp) {
return lp == g_emptylist;
}
We need to initialize this emptylist object at program start:
#include <stdio.h>
int main() {
+ list_main();
forms.c (error handling elided):
/* Initialize the global empty list singleton.
Asserts on failure.
Call this function once from main(). */
void list_main() {
assert(g_emptylist == NULL);
g_emptylist = malloc(sizeof(List));
assert(g_emptylist != NULL);
g_emptylist->type = TypeList;
g_emptylist->nextp = g_emptylist;
g_emptylist->datap = (Form*)g_emptylist;
}
We also add a list utility function to push a new node onto the head of the list:
forms.c (error handling elided):
/* Push formp as the new head of listpp.
Returns 0 or errno. */
int list_push(List** listpp, Form* formp) {
List* list2p;
new_list(&list2p, formp);
list2p->nextp = *listpp;
*listpp = list2p;
return 0;
}
We update read_form()
to add a case for lists:
if (ch1 == '\0') {
return EOF;
+ /* a list. */
+ } else if (ch1 == '(') {
+ return read_list(fbp, formpp);
+
+ /* a list closer at this point is a syntax error. */
+ } else if (ch1 == ')') {
+ return E_read_form__unexpected_list_closer;
+
/* string literal. */
} else if (ch1 == '"') {
assert(buffp != buff);
And here's the implementation of read_list()
:
reader.c (some error handling elided):
/* Read all of the forms in this Lisp list.
Returns 0 or error. */
static int read_list(FBuff* fbp, Form** formpp) {
/* note: the leading '(' has already been consumed. */
int i = 0;
int ws_count;
char ch1;
List* headp = g_emptylist;
List* tailp = g_emptylist;
while (true) {
err = fbuff_discard_ws(fbp, &ws_count);
if (err) {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_1;
} else {
return err;
}
}
err = fbuff_getch(fbp, &ch1);
if (err) {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_2;
} else {
return err;
}
/* we've reached the end of the list. */
} else if (ch1 == ')') {
*formpp = (Form*)headp;
return 0;
} else {
fbuff_ungetch(fbp, ch1);
/* no space between atoms is an error. */
if (i > 0 && ws_count == 0 && is_ch_delim(ch1) == false) {
return E_read_list__missing_ws;
}
/* read the next form in the list. */
Form* formp;
err = read_form(fbp, &formp);
if (err) {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_3;
} else {
return err;
}
/* append the form onto the list. */
} else {
List* newp;
new_list(&newp, formp);
if (headp == g_emptylist) {
headp = newp;
tailp = newp;
} else {
tailp->nextp = newp;
tailp = newp;
}
}
}
i++;
}
}
/* Does ch indicate the end of a token? */
static bool is_ch_delim(char ch) {
return is_ch_ws(ch) || ch == ')' || ch == '(';
}
We need to update fbuff_discard_ws()
because read_list()
needs to know how many whitespace characters were discarded:
/* Advances fbp past any leading whitespace.
Note: commas are considered whitespace.
Returns 0, EOF, or errno. */
-static int fbuff_discard_ws(FBuff* fbp) {
+static int fbuff_discard_ws(FBuff* fbp, int* countp) {
int err;
char ch;
+ int count = 0;
while (true) {
err = fbuff_getch(fbp, &ch);
if (err) {
return err;
} else if (is_ch_ws(ch)) {
+ count++;
continue;
} else {
fbuff_ungetch(fbp, ch);
+ if (countp != NULL) {
+ *countp = count;
+ }
break;
}
}
We update fbuff_get_token()
to handle single-character tokens, e.g. (
and )
:
cursor++;
}
- /* the rest of the chars. */
- while (true) {
- size_t len = cursor - *buffpp;
-
- /* we have run out of room. */
- if (len == bufflen) {
- return E_file_get_token__buff_overflow;
- }
+ /* this is a single-character token. */
+ if (is_ch_single_character_token(ch)) {
+ *cursor = '\0';
+
+ /* this is a multi-character token. */
+ } else {
+ /* the rest of the chars. */
+ while (true) {
+ size_t len = cursor - *buffpp;
+
+ /* we have run out of room. */
/* Does ch constitute a token on its own? */
static bool is_ch_single_character_token(char ch) {
char* p = strchr("()", (int)ch);
return (p != NULL);
}
int eval_form(Form* formp, Form** resultpp) {
/* for now, all forms evaluate to themselves. */
if (is_symbol(formp) || is_clong(formp) || is_cdouble(formp)
- || is_cstring(formp))
+ || is_cstring(formp) || is_list(formp))
{
*resultpp = formp;
return 0;
} else if (is_cstring(formp)) {
CString* csp = (CString*)formp;
return print_cstring(csp, fp);
+ } else if (is_list(formp)) {
+ List* lp = (List*)formp;
+ return print_list(lp, fp);
} else {
assert(false);
}
printer.c (error handling elided):
/* Prints the List in lp into fp.
Returns 0 or errno. */
static int print_list(List* lp, FILE* fp) {
fputs("(", fp);
List* i = lp;
while (!is_list_empty(i)) {
if (i != lp) {
fputs(" ", fp);
}
print_form(i->datap, fp);
i = i->nextp;
}
fputs(")", fp);
return 0;
}
This is point at which we will drop the types from our printer output. This means our printer will output valid Lisp.
printer.c (error handling elided):
- fprintf(fp, "Symbol: %s", symp->valuep);
+ fprintf(fp, "%s", symp->valuep);
@@ -24,7 +24,7 @@
- fprintf(fp, "CLong: %li", lp->value);
+ fprintf(fp, "%li", lp->value);
@@ -36,7 +36,7 @@
- fprintf(fp, "CDouble: %f", dp->value);
+ fprintf(fp, "%f", dp->value);
@@ -141,7 +141,7 @@
- fprintf(fp, "CString: \"%s\"", esc);
+ fprintf(fp, "\"%s\"", esc);
Remember our test from part 1?
$ ./lisp
> (+ 1 1)
Symbol: (+
Symbol: 1
Symbol: 1)
>
Let's try it now:
$ ./lisp
> (+ 1 1)
(+ 1 1)
Progress!
In part 7 we will wrap up our initial reader implementation with support for comments. After that, we'll start focusing on evaluation!