cellularmitosis/Makefile

## README.md

      
    Raw
  

              README.md
            
          
    Blog 2020/1/10
<- previous |
index |
next ->
A Lisp interpreter in C, part 6: lists

Let's learn how to write a Lisp interpreter in C!
In part 6, we put the 'L' into Lisp: support for lists!
Lisp forms

We create a struct to represent lists:
forms.h:
struct List_ {
    FormType type;
    Form* datap;
    struct List_* nextp;
};
typedef struct List_ List;
and update FormType:
forms.h:
     TypeCDouble = 30,
     TypeCString = 40,
+    TypeList = 100,
 };
 typedef enum FormType_ FormType;
 
We use a global singleton object to represent the empty list:
forms.h:
extern List* g_emptylist;
We can check if a list is empty by simple pointer comparison:
forms.c:
/* Is listp the empty list? */
bool is_list_empty(List* lp) {
    return lp == g_emptylist;
}
We need to initialize this emptylist object at program start:
main.c:
#include <stdio.h>
  
 int main() {
+    list_main();

forms.c (error handling elided):
/* Initialize the global empty list singleton.
Asserts on failure.
Call this function once from main(). */
void list_main() {
    assert(g_emptylist == NULL);
    g_emptylist = malloc(sizeof(List));
    assert(g_emptylist != NULL);
    g_emptylist->type = TypeList;
    g_emptylist->nextp = g_emptylist;
    g_emptylist->datap = (Form*)g_emptylist;
}
We also add a list utility function to push a new node onto the head of the list:
forms.c (error handling elided):
/* Push formp as the new head of listpp.
Returns 0 or errno. */
int list_push(List** listpp, Form* formp) {
    List* list2p;
    new_list(&list2p, formp);
    list2p->nextp = *listpp;
    *listpp = list2p;
    return 0;
}
The reader

We update read_form() to add a case for lists:
reader.c:
     if (ch1 == '\0') {
         return EOF;
 
+    /* a list. */
+    } else if (ch1 == '(') {
+        return read_list(fbp, formpp);
+
+    /* a list closer at this point is a syntax error. */
+    } else if (ch1 == ')') {
+        return E_read_form__unexpected_list_closer;
+
     /* string literal. */
     } else if (ch1 == '"') {
         assert(buffp != buff);
And here's the implementation of read_list():
reader.c (some error handling elided):
/* Read all of the forms in this Lisp list.
Returns 0 or error. */
static int read_list(FBuff* fbp, Form** formpp) {
    /* note: the leading '(' has already been consumed. */

    int i = 0;
    int ws_count;
    char ch1;
    List* headp = g_emptylist;
    List* tailp = g_emptylist;

    while (true) {
        err = fbuff_discard_ws(fbp, &ws_count);
        if (err) {
            /* reaching EOF before ')' is an error. */
            if (err == EOF) {
                return E_read_list__premature_eof_1;
            } else {
                return err;
            }
        }

        err = fbuff_getch(fbp, &ch1);
        if (err) {
            /* reaching EOF before ')' is an error. */
            if (err == EOF) {
                return E_read_list__premature_eof_2;
            } else {
                return err;
            }

        /* we've reached the end of the list. */
        } else if (ch1 == ')') {
            *formpp = (Form*)headp;
            return 0;

        } else {
            fbuff_ungetch(fbp, ch1);

            /* no space between atoms is an error. */
            if (i > 0 && ws_count == 0 && is_ch_delim(ch1) == false) {
                return E_read_list__missing_ws;
            }

            /* read the next form in the list. */
            Form* formp;
            err = read_form(fbp, &formp);
            if (err) {
                /* reaching EOF before ')' is an error. */
                if (err == EOF) {
                    return E_read_list__premature_eof_3;
                } else {
                    return err;
                }

            /* append the form onto the list. */
            } else {
                List* newp;
                new_list(&newp, formp);

                if (headp == g_emptylist) {
                    headp = newp;
                    tailp = newp;
                } else {
                    tailp->nextp = newp;
                    tailp = newp;
                }
            }
        }

        i++;
    }
}

/* Does ch indicate the end of a token? */
static bool is_ch_delim(char ch) {
    return is_ch_ws(ch) || ch == ')' || ch == '(';
}
We need to update fbuff_discard_ws()
because read_list() needs to know how many whitespace characters were discarded:
reader.c:
 /* Advances fbp past any leading whitespace.
 Note: commas are considered whitespace.
 Returns 0, EOF, or errno. */
-static int fbuff_discard_ws(FBuff* fbp) {
+static int fbuff_discard_ws(FBuff* fbp, int* countp) {
     int err;
     char ch;
+    int count = 0;
 
     while (true) {
         err = fbuff_getch(fbp, &ch);
         if (err) {
             return err;
         } else if (is_ch_ws(ch)) {
+            count++;
             continue;
         } else {
             fbuff_ungetch(fbp, ch);
+            if (countp != NULL) {
+                *countp = count;
+            }
             break;
         }
     }
We update fbuff_get_token() to handle single-character tokens, e.g. ( and ):
reader.c:
         cursor++;
     }
 
-    /* the rest of the chars. */
-    while (true) {
-        size_t len = cursor - *buffpp;
-
-        /* we have run out of room. */
-        if (len == bufflen) {
-            return E_file_get_token__buff_overflow;
-        }
+    /* this is a single-character token. */
+    if (is_ch_single_character_token(ch)) {
+        *cursor = '\0';
+    
+    /* this is a multi-character token. */
+    } else {
+        /* the rest of the chars. */
+        while (true) {
+            size_t len = cursor - *buffpp;
+
+            /* we have run out of room. */
reader.c:
/* Does ch constitute a token on its own? */
static bool is_ch_single_character_token(char ch) {
    char* p = strchr("()", (int)ch);
    return (p != NULL);
}
The evaluator

eval.c:
 int eval_form(Form* formp, Form** resultpp) {
     /* for now, all forms evaluate to themselves. */
     if (is_symbol(formp) || is_clong(formp) || is_cdouble(formp)
-        || is_cstring(formp))
+        || is_cstring(formp) || is_list(formp))
     {
         *resultpp = formp;
         return 0;
The printer

printer.c:
     } else if (is_cstring(formp)) {
         CString* csp = (CString*)formp;
         return print_cstring(csp, fp);
+    } else if (is_list(formp)) {
+        List* lp = (List*)formp;
+        return print_list(lp, fp);
     } else {
         assert(false);
     }
printer.c (error handling elided):
/* Prints the List in lp into fp.
Returns 0 or errno. */
static int print_list(List* lp, FILE* fp) {
    fputs("(", fp);
    List* i = lp;
    while (!is_list_empty(i)) {
        if (i != lp) {
            fputs(" ", fp);
        }
        print_form(i->datap, fp);
        i = i->nextp;
    }
    fputs(")", fp);
    return 0;
}
This is point at which we will drop the types from our printer output.
This means our printer will output valid Lisp.
printer.c (error handling elided):
-    fprintf(fp, "Symbol: %s", symp->valuep);
+    fprintf(fp, "%s", symp->valuep);
@@ -24,7 +24,7 @@
-    fprintf(fp, "CLong: %li", lp->value);
+    fprintf(fp, "%li", lp->value);
@@ -36,7 +36,7 @@
-    fprintf(fp, "CDouble: %f", dp->value);
+    fprintf(fp, "%f", dp->value);
@@ -141,7 +141,7 @@
-    fprintf(fp, "CString: \"%s\"", esc);
+    fprintf(fp, "\"%s\"", esc);
Try it out

Remember our test from part 1?
$ ./lisp 
> (+ 1 1)
Symbol: (+
Symbol: 1
Symbol: 1)
>

Let's try it now:
$ ./lisp 
> (+ 1 1)
(+ 1 1)

Progress!
Next time

In part 7 we will wrap up our initial reader implementation with support for comments.  After that, we'll start focusing on evaluation!

  
## errors.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#ifndef _ERRCODE_H_
#define _ERRCODE_H_

enum ErrCode_ {
    /* not an error. */
    E_success = 0,

    /* A catch-all "unknown" error. */
    /* Note: we start at 10,000 to skip past the errno range. */
    E_unknown = 10000,

    /* We ran out of buffer while reading a token. */
    E_file_get_token__buff_overflow = 10010,

    /* We tried to parse an invalid string. */
    E_parse_string__invalid_string_1 = 10020,
    E_parse_string__invalid_string_2 = 10021,
    E_parse_string__invalid_string_3 = 10022,
    E_parse_string__invalid_string_4 = 10023,

    E_read_list__premature_eof_1 = 10030,
    E_read_list__premature_eof_2 = 10031,
    E_read_list__premature_eof_3 = 10032,
    E_read_list__missing_ws = 10040,

    E_read_form__unexpected_list_closer = 10050,
};
typedef enum ErrCode_ ErrCode;

#endif

## eval.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#include "eval.h"
#include "printer.h"
#include <assert.h>


/* Evaluates formp into resultpp.
Returns 0. */
int eval_form(Form* formp, Form** resultpp) {
    /* for now, all forms evaluate to themselves. */
    if (is_symbol(formp) || is_clong(formp) || is_cdouble(formp)
        || is_cstring(formp) || is_list(formp))
    {
        *resultpp = formp;
        return 0;

    /* unsupported form. */
    } else {
        assert(false);
    }
}

## eval.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#ifndef _EVAL_H_
#define _EVAL_H_

#include "forms.h"

int eval_form(Form* formp, Form** resultpp);

#endif

## forms.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

/* All of the Lisp forms. */

#include "forms.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <string.h>


/* Symbol */

/* Malloc's a Symbol into sympp and copies sp into it. */
int new_symbol(Symbol** sympp, const char* sp) {
    Symbol* symp = malloc(sizeof(Symbol));
    if (symp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }
    symp->type = TypeSymbol;

    size_t len = strlen(sp);
    symp->valuep = malloc(len + 1);
    if (symp->valuep == NULL) {
        free(symp);
        int err = errno;
        errno = 0;
        return err;
    }
    strcpy(symp->valuep, sp);

    *sympp = symp;
    return 0;
}


/* Is formp a Symbol? */
bool is_symbol(Form* formp) {
    return formp->type == TypeSymbol;
}


/* CLong */

/* Malloc's a CLong into clpp, initializing it with l.
Returns 0 or errno. */
int new_clong(CLong** clpp, long l) {
    CLong* clp = malloc(sizeof(CLong));
    if (clp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }
    clp->type = TypeCLong;
    clp->value = l;
    *clpp = clp;
    return 0;
}


/* Is formp a CLong? */
bool is_clong(Form* formp) {
    return formp->type == TypeCLong;
}


/* CDouble */

/* Malloc's a CDouble into cdpp, initializing it with d.
Returns 0 or errno. */
int new_cdouble(CDouble** cdpp, double d) {
    CDouble* cdp = malloc(sizeof(CDouble));
    if (cdp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }
    cdp->type = TypeCDouble;
    cdp->value = d;
    *cdpp = cdp;
    return 0;
}


/* Is formp a CDouble? */
bool is_cdouble(Form* formp) {
    return formp->type == TypeCDouble;
}


/* CString */

/* Malloc's a CString into cspp and places sp into it.
Note: sp is not copied; cspp assumes ownership of it.
Returns 0 or errno. */
int new_cstring(CString** cspp, char* sp) {
    CString* csp = malloc(sizeof(CString));
    if (csp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }
    csp->type = TypeCString;
    csp->valuep = sp;
    *cspp = csp;
    return 0;
}


/* Is formp a CString? */
bool is_cstring(Form* formp) {
    return formp->type == TypeCString;
}


/* List */

List* g_emptylist = NULL;


/* Initialize the global empty list singleton.
Asserts on failure.
Call this function once from main(). */
void list_main() {
    assert(g_emptylist == NULL);
    g_emptylist = malloc(sizeof(List));
    assert(g_emptylist != NULL);
    g_emptylist->type = TypeList;
    g_emptylist->nextp = g_emptylist;
    g_emptylist->datap = (Form*)g_emptylist;
}


/* Malloc's a List intp lpp, initializing it with datap.
Returns 0 or errno. */
int new_list(List** lpp, Form* datap) {
    List* lp;
    if (datap == NULL) {
        lp = g_emptylist;
    } else {
        lp = malloc(sizeof(List));
        if (lp == NULL) {
            int err = errno;
            errno = 0;
            return err;
        }
        lp->type = TypeList;
        lp->datap = datap;
        lp->nextp = g_emptylist;
    }
    *lpp = lp;
    return 0;
}


/* Is formp a List? */
bool is_list(Form* formp) {
    return formp->type == TypeList;
}


/* Is listp the empty list? */
bool is_list_empty(List* lp) {
    return lp == g_emptylist;
}


/* Push formp as the new head of listpp.
Returns 0 or errno. */
int list_push(List** listpp, Form* formp) {
    List* list2p;
    int err = new_list(&list2p, formp);
    if (err) {
        return err;
    }
    list2p->nextp = *listpp;
    *listpp = list2p;
    return 0;
}

## forms.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

/* All of the Lisp forms. */

#ifndef _FORM_H_
#define _FORM_H_

#include <stdbool.h>


/* The list of Lisp form types. */
enum FormType_ {
    _Type_UNINITIALIZED = 0,

    TypeSymbol = 10,
    TypeCLong = 20,
    TypeCDouble = 30,
    TypeCString = 40,

    TypeList = 100,
};
typedef enum FormType_ FormType;


/* A type-erased Lisp form. */
struct Form_ {
    FormType type;
};
typedef struct Form_ Form;


/* Symbol */

struct Symbol_ {
    FormType type;
    char* valuep;
};
typedef struct Symbol_ Symbol;

int new_symbol(Symbol** sympp, const char* sp);
bool is_symbol(Form* formp);


/* CLong */

struct CLong_ {
    FormType type;
    long value;
};
typedef struct CLong_ CLong;

int new_clong(CLong** clpp, long l);
bool is_clong(Form* formp);


/* CDouble */

struct CDouble_ {
    FormType type;
    double value;
};
typedef struct CDouble_ CDouble;

int new_cdouble(CDouble** cdpp, double d);
bool is_cdouble(Form* formp);


/* CString */

struct CString_ {
    FormType type;
    char* valuep;
};
typedef struct CString_ CString;

int new_cstring(CString** cspp, char* sp);
bool is_cstring(Form* formp);


/* List */

struct List_ {
    FormType type;
    Form* datap;
    struct List_* nextp;
};
typedef struct List_ List;

extern List* g_emptylist;

int new_list(List** lpp, Form* datap);
bool is_list(Form* formp);
int list_push(List** listpp, Form* formp);
bool is_list_empty(List* listp);
void list_main();


#endif

## main.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#include "repl.h"
#include "forms.h"
#include <stdlib.h>
#include <stdio.h>


/* The entrypoint of this program. */
int main() {
    list_main();

    int err = repl();
    if (err) {
        fprintf(stderr, "Error %d.\n", err);
        return err;
    } else {
        return EXIT_SUCCESS;
    }
}

## Makefile
CC=gcc -g -std=c99 -Wall -Werror -D_POSIX_C_SOURCE=200809L

lisp: main.o forms.o reader.o eval.o printer.o repl.o
	$(CC) -o lisp *.o

main.o: main.c
	$(CC) -c main.c

forms.o: forms.h forms.c
	$(CC) -c forms.c

reader.o: reader.h reader.c
	$(CC) -c reader.c

eval.o: eval.h eval.c
	$(CC) -c eval.c

printer.o: printer.h printer.c
	$(CC) -c printer.c

repl.o: repl.h repl.c
	$(CC) -c repl.c

clean:
	rm -f *.o lisp

test: lisp
	./run_tests.sh

.PHONY: clean

## printer.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#include "printer.h"
#include <assert.h>
#include <sys/errno.h>
#include <stdlib.h>
#include <string.h>


/* Prints the Symbol in symp into fp.
Returns 0 or errno. */
static int print_symbol(Symbol* symp, FILE* fp) {
    int err = fprintf(fp, "%s", symp->valuep);
    if (err < 0) {
        return err;
    } else {
        return 0;
    }
}


/* Prints the CLong in lp into fp.
Returns 0 or errno. */
static int print_clong(CLong* lp, FILE* fp) {
    int err = fprintf(fp, "%li", lp->value);
    if (err < 0) {
        return err;
    } else {
        return 0;
    }
}


/* Prints the CDouble in dp into fp.
Returns 0 or errno. */
static int print_cdouble(CDouble* dp, FILE* fp) {
    int err = fprintf(fp, "%f", dp->value);
    if (err < 0) {
        return err;
    } else {
        return 0;
    }
}


/* Is ch an unescaped char? */
bool is_unescaped(char ch) {
    char* found = strchr("\a\b\e\f\n\r\t\v\\\"", (int)ch);
    return found != NULL;
}


/* Returns the escaped version of unesc.
For example, if unesc is a newline, 'n' is returned.
Asserts false if unesc is not a valid escape char. */
static char escape_char(char unesc) {
    if (unesc == '\a') {
        return 'a';
    } else if (unesc == '\b') {
        return 'b';
    } else if (unesc == '\e') {
        return 'e';
    } else if (unesc == '\f') {
        return 'f';
    } else if (unesc == '\n') {
        return 'n';
    } else if (unesc == '\r') {
        return 'r';
    } else if (unesc == '\t') {
        return 't';
    } else if (unesc == '\v') {
        return 'v';
    } else if (unesc == '\\') {
        return '\\';
    } else if (unesc == '"') {
        return '"';
    } else {
        assert(false);
    }
}


/* Escapes srcp into a malloc'ed dstpp.
Returns 0 or errno. */
static int escape_str(char* srcp, char** dstpp) {
    size_t src_len = strlen(srcp);
    size_t src_size = src_len + 1;
    /* dst will be worst-case twice as large (every byte becomes two bytes),
    so start there, then shrink to fit at the end. */
    size_t dst_size = src_size * 2;

    char* dstp = malloc(dst_size);
    if (dstp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }

    char* src_cursor = srcp;
    char* src_last = srcp + src_len - 1;
    char* dst_cursor = dstp;
    size_t dst_len = 0;
    while(src_cursor <= src_last) {
        char ch = *src_cursor;
        if (is_unescaped(ch)) {
            *dst_cursor = '\\';
            dst_cursor++;
            *dst_cursor = escape_char(ch);
        } else {
            *dst_cursor = ch;
        }
        src_cursor++;
        dst_cursor++;
        dst_len++;
    }

    /* shrink-to-fit. */
    size_t newdst_size = dst_len + 1;
    char* newdstp = realloc(dstp, newdst_size);
    if (newdstp == NULL) {
        free(dstp);
        int err = errno;
        errno = 0;
        return err;
    } else {
        dstp = newdstp;
    }

    *dstpp = dstp;
    return 0;
}


/* Prints the CString in csp into fp.
Returns 0 or errno. */
static int print_cstring(CString* csp, FILE* fp) {
    char* esc;
    int err = escape_str(csp->valuep, &esc);
    if (err) {
        return err;
    }
    err = fprintf(fp, "\"%s\"", esc);
    if (err < 0) {
        return err;
    } else {
        return 0;
    }
}


/* Prints the List in lp into fp.
Returns 0 or errno. */
static int print_list(List* lp, FILE* fp) {
    int err = fputs("(", fp);
    if (err == EOF) {
        err = errno;
        errno = 0;
        return err;
    }
    List* i = lp;
    while (!is_list_empty(i)) {
        if (i != lp) {
            err = fputs(" ", fp);
            if (err == EOF) {
                err = errno;
                errno = 0;
                return err;
            }
        }
        err = print_form(i->datap, fp);
        if (err) {
            return err;
        }
        i = i->nextp;
    }
    err = fputs(")", fp);
    if (err == EOF) {
        err = errno;
        errno = 0;
        return err;
    }
    return 0;
}


/* Prints the Form in formp into fp.
Returns 0 or errno. */
int print_form(Form* formp, FILE* fp) {
    if (is_symbol(formp)) {
        Symbol* symp = (Symbol*)formp;
        return print_symbol(symp, fp);
    } else if (is_clong(formp)) {
        CLong* lp = (CLong*)formp;
        return print_clong(lp, fp);
    } else if (is_cdouble(formp)) {
        CDouble* dp = (CDouble*)formp;
        return print_cdouble(dp, fp);
    } else if (is_cstring(formp)) {
        CString* csp = (CString*)formp;
        return print_cstring(csp, fp);
    } else if (is_list(formp)) {
        List* lp = (List*)formp;
        return print_list(lp, fp);
    } else {
        assert(false);
    }
}

## printer.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#ifndef _PRINT_H_
#define _PRINT_H_

#include "forms.h"
#include <stdio.h>

int print_form(Form* formp, FILE* fp);

#endif

## reader.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#include "reader.h"
#include "errors.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>


/* Creates a new FBuff.
Returns 0 or errno. */
int new_fbuff(FBuff** fbpp, FILE* fp) {
    FBuff* fbp = malloc(sizeof(FBuff));
    if (fbp == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }

    fbp->fp = fp;
    fbp->buffp = NULL;
    fbp->nextp = NULL;
    fbp->size = 0;
    fbp->len = 0;

    *fbpp = fbp;
    return 0;
}


/* Frees fbp. */
void free_fbuff(FBuff* fbp) {
    free(fbp->buffp);
    free(fbp);
}


/* Reads the next line into fbp.
Returns 0, EOF, or errno. */
static int fbuff_getline(FBuff* fbp) {
    ssize_t result = getline(&(fbp->buffp), &(fbp->size), fbp->fp);
    if (result == -1) {
        if (feof(fbp->fp)) {
            return EOF;
        } else {
            result = errno;
            errno = 0;
            return result;
        }
    } else {
        fbp->len = result;
        fbp->nextp = fbp->buffp;
        return 0;
    }
}


/* Is fbp at the end of the current line? */
bool is_fbuff_eol(FBuff* fbp) {
    return fbp->len == 0 || fbp->nextp == fbp->buffp + fbp->len;
}


/* Reads and consumes the next character into chp from fbp.
Returns 0, EOF, or errno. */
static int fbuff_getch(FBuff* fbp, char* chp) {
    if (is_fbuff_eol(fbp)) {
        int err = fbuff_getline(fbp);
        if (err) {
            return err;
        }
    }
    char ch = *(fbp->nextp);
    (fbp->nextp)++;
    *chp = ch;
    return 0;
}


/* Pushes ch back into fbp.
Asserts if used incorrectly. */
static void fbuff_ungetch(FBuff* fbp, char ch) {
    assert(fbp->nextp > fbp->buffp);
    fbp->nextp--;
    *(fbp->nextp) = ch;
}


/* Is ch considered whitespace?
Note: commas are considered whitespace. */
static bool is_ch_ws(char ch) {
    return isspace(ch) || ch == ',';
}


/* Does ch indicate the end of a token? */
static bool is_ch_delim(char ch) {
    return is_ch_ws(ch) || ch == ')' || ch == '(';
}


/* Does ch constitute a token on its own? */
static bool is_ch_single_character_token(char ch) {
    char* p = strchr("()", (int)ch);
    return (p != NULL);
}


/* Advances fbp past any leading whitespace.
Note: commas are considered whitespace.
Returns 0, EOF, or errno. */
static int fbuff_discard_ws(FBuff* fbp, int* countp) {
    int err;
    char ch;
    int count = 0;

    while (true) {
        err = fbuff_getch(fbp, &ch);
        if (err) {
            return err;
        } else if (is_ch_ws(ch)) {
            count++;
            continue;
        } else {
            fbuff_ungetch(fbp, ch);
            if (countp != NULL) {
                *countp = count;
            }
            break;
        }
    }
    return 0;
}


/* Advances fbp past any whitespace in the current line. */
void fbuff_skip_buffered_ws(FBuff* fbp) {
    while (!is_fbuff_eol(fbp) && is_ch_ws(*(fbp->nextp))) {
        fbp->nextp++;
    }
}


/* Is u even? */
static bool is_even(unsigned int u) {
    /* if the LSB isn't set, u is even. */
    return !(u & 0x1);
}


/* Advances fbp far enough to read one token (which is a string literal).
Points buffpp to a malloc'ed buffer containing the string.
Returns 0, EOF, errno, or an error code. */
static int fbuff_get_token_str(FBuff* fbp, char** buffpp) {
    int err;
    char ch;

    /* allocate the initial buffer. */
    size_t buffsize = 1000;
    size_t bufflen = buffsize - 1;
    char* buffp = malloc(buffsize);
    if (buffp == NULL) {
        err = errno;
        errno = 0;
        return err;
    }
    char* cursor = buffp;

    /* the first char must be the opening quote. */
    err = fbuff_getch(fbp, &ch);
    if (err) {
        free(buffp);
        return err;
    } else {
        assert(ch == '"');
        *cursor = ch;
        cursor++;
    }

    unsigned int backslash_count = 0;
    while (true) {
        size_t len = cursor - buffp;

        /* time to grow the buffer. */
        if (len == bufflen) {
            buffsize *= 2;
            bufflen = buffsize - 1;
            char* newbuffp = realloc(buffp, buffsize);
            if (newbuffp == NULL) {
                free(buffp);
                err = errno;
                errno = 0;
                return err;
            } else {
                buffp = newbuffp;
            }
        }

        err = fbuff_getch(fbp, &ch);
        if (err) {
            free(buffp);
            return err;

        /* this is the end of the string. */
        } else if (ch == '"' && is_even(backslash_count)) {
            *cursor = ch;
            cursor++;
            *cursor = '\0';
            /* shrink buffp to fit the size of the string. */
            size_t finalbuffsize = buffp - cursor + 1;
            if (finalbuffsize < buffsize) {
                char* finalbuffp = realloc(buffp, finalbuffsize);
                if (finalbuffp == NULL) {
                    free(buffp);
                    err = errno;
                    errno = 0;
                    return err;
                } else {
                    buffp = finalbuffp;
                }
            }
            break;

        /* this is a regular char. */
        } else {
            /* track the number of consecutive backslashes so we can
            disambiguate the closing quote. */
            if (ch == '\\') {
                backslash_count++;
            } else {
                backslash_count = 0;
            }

            *cursor = ch;
            cursor++;
        }
    }

    *buffpp = buffp;
    return 0;
}


/* Advances fbp far enough to read one token of input.
Writes the token contents to *buffpp.
In the case of a string literal, points buffpp to a malloc'ed string buffer.
Returns 0, EOF, errno, or an error code. */
static int fbuff_get_token(FBuff* fbp, char** buffpp, size_t buffsize) {
    int err;
    char ch;
    size_t bufflen = buffsize - 1;
    char* cursor = *buffpp;

    /* discard any leading whitespace. */
    err = fbuff_discard_ws(fbp, NULL);
    if (err) {
        return err;
    }

    /* a token must be at least one char in length. */
    err = fbuff_getch(fbp, &ch);
    if (err) {
        return err;

    /* this is a string literal. */
    } else if (ch == '"') {
        fbuff_ungetch(fbp, ch);
        return fbuff_get_token_str(fbp, buffpp);

    } else {
        *cursor = ch;
        cursor++;
    }

    /* this is a single-character token. */
    if (is_ch_single_character_token(ch)) {
        *cursor = '\0';

    /* this is a multi-character token. */
    } else {
        /* the rest of the chars. */
        while (true) {
            size_t len = cursor - *buffpp;

            /* we have run out of room. */
            if (len == bufflen) {
                return E_file_get_token__buff_overflow;
            }

            err = fbuff_getch(fbp, &ch);
            /* we've reached EOF. return what we have so far. */
            if (err == EOF) {
                *cursor = '\0';
                break;

            /* there was an error reading from fp. */
            } else if (err != 0) {
                return err;

            /* we've reached the end of this token. */
            } else if (is_ch_delim(ch)) {
                fbuff_ungetch(fbp, ch);
                *cursor = '\0';
                break;

            /* this char is part of the token. */
            } else {
                *cursor = ch;
                cursor++;
                continue;
            }
        }
    }
    return 0;
}


/* Tries to parse a long from buffp into lp.
Returns true or false. */
static bool try_parse_long(const char* buffp, long* lp) {
    char* endptr;
    long l = strtol(buffp, &endptr, 10);
    if (errno != 0) {
        errno = 0;
        return false;
    } else if (endptr == buffp || *endptr != '\0') {
        return false;
    } else {
        *lp = l;
        return true;
    }
}


/* Tries to parse a double from buffp intp dp.
Returns true or false. */
static bool try_parse_double(const char* buffp, double* dp) {
    char* endptr;
    double d = strtod(buffp, &endptr);
    if (errno != 0) {
        errno = 0;
        return false;
    } else if (endptr == buffp || *endptr != '\0') {
        return false;
    } else {
        *dp = d;
        return true;
    }
}


/* Is ch in the list of escapable chars? */
static bool is_escapechar(char ch) {
    char* found = strchr("abefnrtv?'\"\\", (int)ch);
    return found != NULL;
}


/* Returns the "unescaped" char corresponding to the given escape char.
E.g. if esc is 'n', a newline character is returned.
Asserts false if esc is not a valid escape char. */
static char unescape_char(char esc) {
    if (esc == 'a') {
        return '\a';
    } else if (esc == 'b') {
        return '\b';
    } else if (esc == 'e') {
        return '\e';
    } else if (esc == 'f') {
        return '\f';
    } else if (esc == 'n') {
        return '\n';
    } else if (esc == 'r') {
        return '\r';
    } else if (esc == 't') {
        return '\t';
    } else if (esc == 'v') {
        return '\v';
    } else if (esc == '\\') {
        return '\\';
    } else if (esc == '\'') {
        return '\'';
    } else if (esc == '"') {
        return '"';
    } else if (esc == '?') {
        return '?';
    } else {
        assert(false);
    }
}


/* Parses a string from buffp.
*spp is malloc'ed with a copy of the parsed string.
Returns 0 or errno or error. */
static int parse_string(const char* buffp, char** spp) {
    size_t src_len = strlen(buffp);
    size_t src_size = src_len + 1;

    /* minimum buffp is an opening and closing quote, so we know len >= 2. */
    assert(src_len >= 2);
    /* first and last char must be '"'. */
    if (*buffp != '"') {
        return E_parse_string__invalid_string_1;
    }
    if (*(buffp + src_len - 1) != '"') {
        return E_parse_string__invalid_string_2;
    }

    size_t dst_size = src_size - 2;
    char* dst = malloc(dst_size);
    if (dst == NULL) {
        int err = errno;
        errno = 0;
        return err;
    }

    /* skip the opening quote. */
    const char* src_first = buffp + 1;
    /* skip the closing quote. */
    const char* src_last = buffp + src_len - 2;

    const char* src_cursor = src_first;
    char* dst_cursor = dst;

    while (src_cursor <= src_last) {
        /* this is possibly an escape sequence. */
        if (*src_cursor == '\\') {
            src_cursor++;

            /* the last char of the string is a backslash, which is invalid. */
            if (src_cursor > src_last) {
                free(dst);
                return E_parse_string__invalid_string_3;

            /* this is an escape sequence. */
            } else if (is_escapechar(*src_cursor)) {
                *dst_cursor = unescape_char(*src_cursor);
                src_cursor++;
                dst_cursor++;
                continue;

            /* this is an invalid escape sequence. */
            } else {
                free(dst);
                return E_parse_string__invalid_string_4;
            }

        /* this is just a regular char. */
        } else {
            *dst_cursor = *src_cursor;
            src_cursor++;
            dst_cursor++;
            continue;
        }
    }
    *dst_cursor = '\0';
    *spp = dst;
    return 0;
}


/* Read all of the forms in this Lisp list.
Returns 0 or error. */
static int read_list(FBuff* fbp, Form** formpp) {
    /* note: the leading '(' has already been consumed. */

    int err;
    int i = 0;
    int ws_count;
    char ch1;
    List* headp = g_emptylist;
    List* tailp = g_emptylist;

    while (true) {
        err = fbuff_discard_ws(fbp, &ws_count);
        if (err) {
            /* reaching EOF before ')' is an error. */
            if (err == EOF) {
                return E_read_list__premature_eof_1;
            } else {
                return err;
            }
        }

        err = fbuff_getch(fbp, &ch1);
        if (err) {
            /* reaching EOF before ')' is an error. */
            if (err == EOF) {
                return E_read_list__premature_eof_2;
            } else {
                return err;
            }

        /* we've reached the end of the list. */
        } else if (ch1 == ')') {
            *formpp = (Form*)headp;
            return 0;

        } else {
            fbuff_ungetch(fbp, ch1);

            /* no space between atoms is an error. */
            if (i > 0 && ws_count == 0 && is_ch_delim(ch1) == false) {
                return E_read_list__missing_ws;
            }

            /* read the next form in the list. */
            Form* formp;
            err = read_form(fbp, &formp);
            if (err) {
                /* reaching EOF before ')' is an error. */
                if (err == EOF) {
                    return E_read_list__premature_eof_3;
                } else {
                    return err;
                }

            /* append the form onto the list. */
            } else {
                List* newp;
                int err = new_list(&newp, formp);
                if (err) {
                    return err;
                }

                if (headp == g_emptylist) {
                    headp = newp;
                    tailp = newp;
                } else {
                    tailp->nextp = newp;
                    tailp = newp;
                }
            }
        }

        i++;
    }
}


/* Read one Lisp form from fp intp formpp.
Returns 0 or EOF or errno or error. */
int read_form(FBuff* fbp, Form** formpp) {
    int err;
    int buffsize = 100;
    char buff[buffsize];
    char* buffp = buff;
    char ch1;

    /* read a token. */
    err = fbuff_get_token(fbp, &buffp, buffsize);
    if (err) {
        return err;
    }
    ch1 = *buffp;

    /* we've reached the end of input. */
    if (ch1 == '\0') {
        return EOF;

    /* a list. */
    } else if (ch1 == '(') {
        return read_list(fbp, formpp);

    /* a list closer at this point is a syntax error. */
    } else if (ch1 == ')') {
        return E_read_form__unexpected_list_closer;

    /* string literal. */
    } else if (ch1 == '"') {
        assert(buffp != buff);
        char* sp;
        err = parse_string(buffp, &sp);
        if (err) {
            free(buffp);
            return err;
        }

        CString* csp;
        err = new_cstring(&csp, sp);
        if (err) {
            free(buffp);
            return err;
        } else {
            *formpp = (Form*)csp;
            return 0;
        }

    /* the form type can't be determined from ch1 alone. */
    } else {
        bool success;

        /* an integer literal. */
        long l;
        success = try_parse_long(buffp, &l);
        if (success) {
            CLong* clp;
            err = new_clong(&clp, l);
            if (err) {
                return err;
            } else {
                *formpp = (Form*)clp;
                return 0;
            }
        }

        /* a floating-point literal. */
        double d;
        success = try_parse_double(buffp, &d);
        if (success) {
            CDouble* cdp;
            err = new_cdouble(&cdp, d);
            if (err) {
                return err;
            } else {
                *formpp = (Form*)cdp;
                return 0;
            }
        }

        /* assume anything else is a symbol. */
        Symbol* symp;
        err = new_symbol(&symp, buffp);
        if (err) {
            return err;
        } else {
            *formpp = (Form*)symp;
            return 0;
        }
    }
}

## reader.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#ifndef _READ_H_
#define _READ_H_

#include "forms.h"
#include <stdio.h>


/* A line-oriented FILE* buffer. */
struct FBuff_ {
    FILE* fp;
    char* buffp;
    size_t size;
    size_t len;
    char* nextp;
};
typedef struct FBuff_ FBuff;

int new_fbuff(FBuff** fbpp, FILE* fp);
void free_fbuff(FBuff* fbp);
bool is_fbuff_eol(FBuff* fbp);
void fbuff_skip_buffered_ws(FBuff* fbp);


int read_form(FBuff* fbp, Form** formpp);


#endif

## repl.c
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#include "repl.h"
#include "reader.h"
#include "printer.h"
#include "eval.h"
#include <stdbool.h>
#include <sys/errno.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>


/* Is fp a tty? */
bool is_file_tty(FILE* fp) {
    int result = isatty(fileno(fp));
    if (result == 0) {
        errno = 0;
        return false;
    } else {
        return true;
    }
}


/* Should the REPL prompt be displayed? */
bool should_display_prompt(FBuff* fbp) {
    if (!is_file_tty(fbp->fp)) {
        return false;
    } else {
        return is_fbuff_eol(fbp);
    }
}


/* Starts a read-eval-print loop.
Loops until EOF or I/O error.
Returns 0 or error. */
int repl() {
    FBuff* fbp;
    int err = new_fbuff(&fbp, stdin);
    if (err) {
        return err;
    }

    while (true) {
        fbuff_skip_buffered_ws(fbp);

        if (should_display_prompt(fbp)) {
            /* display the prompt. */
            int err = fputs("> ", stdout);
            if (err == EOF) {
                free_fbuff(fbp);
                err = errno;
                errno = 0;
                return err;
            }
        }

        /* read the next Lisp form. */
        Form* formp;
        err = read_form(fbp, &formp);
        if (err) {
            if (err == EOF) {
                break;
            } else {
                int err2 = fprintf(stderr, "Error %d.\n", err);
                if (err2 < 0) {
                    free_fbuff(fbp);
                    return err2;
                }
                continue;
            }
        }

        /* evaluate the form. */
        Form* resultp;
        err = eval_form(formp, &resultp);
        if (err) {
            int err2 = fprintf(stderr, "Error %d.\n", err);
            if (err2 < 0) {
                free_fbuff(fbp);
                return err2;
            }
            continue;
        }

        /* print the result. */
        err = print_form(resultp, stdout);
        if (err) {
            free_fbuff(fbp);
            return err;
        }
        err = fputs("\n", stdout);
        if (err == EOF) {
            free_fbuff(fbp);
            err = errno;
            errno = 0;
            return err;
        }

        /* loop. */
        continue;
    }

    free_fbuff(fbp);
    return 0;
}

## repl.h
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */

#ifndef _REPL_H_
#define _REPL_H_

int repl();

#endif

## run_tests.sh
#!/bin/bash

set -e -o pipefail

for f in `ls test*.input`
do
    base=$(basename $f .input)
    echo $base
    cat ${base}.input | ./lisp > /tmp/${base}.out
    diff -urN ${base}.expected /tmp/${base}.out
done

echo "all tests passed"

## test1.expected
1
1
2
3
1.200000
foo
"foo"
"line one\nline two"
true
nil

## test1.input
1
1 2 3
1.2
foo
"foo"
"line one
line two"
true
nil

## test2.expected
()
()
(1)
(1 2)
(1 2)
(1 (2 ()))

## test2.input
()
(
)
(1)
(1 2)
(1
2)
(1(2()))

## test3.expected
;comment
;
comment
1;comment
1
;
comment
(1 (;comment 2;comment))

## test3.input
;comment
; comment
1;comment
1 ; comment
(1(;comment
2;comment
))
	/* This file is Copyright (C) 2019 Jason Pepas. */
	/* This file is released under the terms of the MIT License. */
	/* See https://opensource.org/licenses/MIT */

	#ifndef _ERRCODE_H_
	#define _ERRCODE_H_

	enum ErrCode_ {
	/* not an error. */
	E_success = 0,

	/* A catch-all "unknown" error. */
	/* Note: we start at 10,000 to skip past the errno range. */
	E_unknown = 10000,

	/* We ran out of buffer while reading a token. */
	E_file_get_token__buff_overflow = 10010,

	/* We tried to parse an invalid string. */
	E_parse_string__invalid_string_1 = 10020,
	E_parse_string__invalid_string_2 = 10021,
	E_parse_string__invalid_string_3 = 10022,
	E_parse_string__invalid_string_4 = 10023,

	E_read_list__premature_eof_1 = 10030,
	E_read_list__premature_eof_2 = 10031,
	E_read_list__premature_eof_3 = 10032,
	E_read_list__missing_ws = 10040,

	E_read_form__unexpected_list_closer = 10050,
	};
	typedef enum ErrCode_ ErrCode;

	#endif
	CC=gcc -g -std=c99 -Wall -Werror -D_POSIX_C_SOURCE=200809L

	lisp: main.o forms.o reader.o eval.o printer.o repl.o
	$(CC) -o lisp *.o

	main.o: main.c
	$(CC) -c main.c

	forms.o: forms.h forms.c
	$(CC) -c forms.c

	reader.o: reader.h reader.c
	$(CC) -c reader.c

	eval.o: eval.h eval.c
	$(CC) -c eval.c

	printer.o: printer.h printer.c
	$(CC) -c printer.c

	repl.o: repl.h repl.c
	$(CC) -c repl.c

	clean:
	rm -f *.o lisp

	test: lisp
	./run_tests.sh

	.PHONY: clean
	#!/bin/bash

	set -e -o pipefail

	for f in `ls test*.input`
	do
	base=$(basename $f .input)
	echo $base
	cat ${base}.input \| ./lisp > /tmp/${base}.out
	diff -urN ${base}.expected /tmp/${base}.out
	done

	echo "all tests passed"