Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Last active January 13, 2020 22:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cellularmitosis/3d4ff1b214f93d5c6d1f30625e5f203f to your computer and use it in GitHub Desktop.
Save cellularmitosis/3d4ff1b214f93d5c6d1f30625e5f203f to your computer and use it in GitHub Desktop.
A Lisp interpreter in C, part 9: nil and bool

Blog 2020/1/12

<- previous | index | next ->

A Lisp interpreter in C, part 9: nil and bool

Let's learn how to write a Lisp interpreter in C!

In part 9, we'll implement two new types: Nil and CBool, and populate the environment with their (singleton) values.

Lisp forms

We add a struct to represent the Nil type. Nil represents the absence of a value.

forms.h:

struct Nil_ {
    FormType type;
};
typedef struct Nil_ Nil;

Nil differs from our other types thus far in that it is a singleton. This means that we only allow one instance of the Nil type:

forms.h:

extern Nil* g_nil;

Because nil is a singleton, implementing comparison against nil is a simple pointer comparison:

forms.h:

/* Is formp nil? */
bool is_nil(Form* formp) {
    return formp == (Form*)g_nil;
}

nil being a singleton also means that we need to initialize that singleton on app startup:

forms.c:

Nil* g_nil = NULL;

/* Initialize the global nil singleton.
Asserts on failure.
Call this function once from main(). */
void nil_main() {
    g_nil = malloc(sizeof(Nil));
    g_nil->type = TypeNil;
    assert(g_nil != NULL);
}

Note that we assert on failure: if our system is so resource constrained that we can't even malloc a singleton at app startup, this isn't a condition we can recover from.

We also add a struct to represent our boolean type, CBool:

forms.h:

struct CBool_ {
    FormType type;
    bool value;
};
typedef struct CBool_ CBool;

Similar to Nil, CBool is implemented as two singletons:

forms.h:

extern CBool* g_true;
extern CBool* g_false;

These singletons need to be initialized at app startup as well:

forms.c:

/* Initialize the global boolean singletons.
Asserts on failure.
Call this function once from main(). */
void cbool_main() {
    g_true = malloc(sizeof(CBool));
    assert(g_true != NULL);
    g_true->type = TypeCBool;
    g_true->value = true;

    g_false = malloc(sizeof(CBool));
    assert(g_false != NULL);
    g_false->type = TypeCBool;
    g_false->value = false;
}

We also update form_eq() to compute equality for Nil and CBool:

forms.c:

                 return false;
             }
         }
+    } else if (is_cbool(a)) {
+        /* CBool's are singletons, so we can just use pointer comparison. */
+        return a == b;
+    } else if (is_nil(a)) {
+        return true;
     } else {
         assert(false);
     }

Truthy and falsey

The introduction of booleans also means we need to introduce the concepts of truthy and falsey.

Most languages perform type coersion from some number of types to the boolean type, so that many kinds of values can be used as a predicate, as in statements like if (1) { ... or while (count) { ..., etc.

The specifics of which types can be coerced and how that happens are part of the semantics of that language.

For our Lisp, we'll borrow a page from Clojure's semantics:

  • nil and false are the only two falsey values
  • everything else is considered truthy

forms.c:

/* Is formp falsey? */
bool is_form_falsey(Form* formp) {
    return formp == (Form*)g_false || is_nil(formp);
}

/* Is formp truthy? */
bool is_form_truthy(Form* formp) {
    return !is_form_falsey(formp);
}

The printer

We update our printer with Nil and CBool support:

printer.c:

     } else if (is_list(formp)) {
         List* lp = (List*)formp;
         return print_list(lp, fp);
+    } else if (is_cbool(formp)) {
+        CBool* cbp = (CBool*)formp;
+        return print_cbool(cbp, fp);
+    } else if (is_nil(formp)) {
+        return print_nil(fp);
     } else {
         assert(false);
     }

printer.c (error handling elided):

/* Prints nil into fp.
Returns 0 or errno. */
static int print_nil(FILE* fp) {
    fputs("nil", fp);
    return 0;
}

/* Prints the CBool in cbp into fp.
Returns 0 or errno. */
static int print_cbool(CBool* cbp, FILE* fp) {
    if (cbp == g_true) {
        fputs("true", fp);
    } else if (cbp == g_false) {
        fputs("false", fp);
    } else {
        assert(false);
    }
    return 0;
}

main

We also updated main() to initialize our singletons and populate our environment with symbols for nil, true, and false:

main.c:

 /* The entrypoint of this program. */
 int main() {
     list_main();
+    cbool_main();
+    nil_main();
 

main.c:

     assert(env_bind_clong(envp, "the-answer", 42) == 0);
     assert(env_bind_cdouble(envp, "pi", 3.14159) == 0);
     assert(env_bind_cstring(envp, "greeting", "Hello, world!") == 0);
+    assert(env_bind_nil(envp, "nil") == 0);
+    assert(env_bind_cbool(envp, "true", g_true) == 0);
+    assert(env_bind_cbool(envp, "false", g_false) == 0);
     return envp;
 }
 

forms.c (error handling elided):

/* Convenience function to push nil into the env.
Returns 0 or error. */
int env_bind_nil(List* envp, char* key) {
    Symbol* keyp;
    new_symbol(&keyp, key);
    return env_bind(envp, keyp, (Form*)g_nil);
}

/* Convenience function to push a boolean into the env.
Returns 0 or error. */
int env_bind_cbool(List* envp, char* key, CBool* value) {
    Symbol* keyp;
    new_symbol(&keyp, key);
    return env_bind(envp, keyp, (Form*)value);
}

Try it out

$ ./lisp.py
> nil
nil
> true
true
>

Next time

In part 10 we'll give the user the ability to add new bindings to the environment at run-time!

/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _ERRCODE_H_
#define _ERRCODE_H_
enum ErrCode_ {
/* not an error. */
E_success = 0,
/* A catch-all "unknown" error. */
/* Note: we start at 10,000 to skip past the errno range. */
E_unknown = 10000,
/* We ran out of buffer while reading a token. */
E_file_get_token__buff_overflow = 10010,
/* We tried to parse an invalid string. */
E_parse_string__invalid_string_1 = 10020,
E_parse_string__invalid_string_2 = 10021,
E_parse_string__invalid_string_3 = 10022,
E_parse_string__invalid_string_4 = 10023,
E_read_list__premature_eof_1 = 10030,
E_read_list__premature_eof_2 = 10031,
E_read_list__premature_eof_3 = 10032,
E_read_list__missing_ws = 10040,
E_read_form__unexpected_list_closer = 10050,
E_read_form__comment = 10051,
E_eval_symbol__unbound = 10060,
};
typedef enum ErrCode_ ErrCode;
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "eval.h"
#include "errors.h"
#include "printer.h"
#include <assert.h>
/* Evaluates symp into resultpp.
Returns 0 or error. */
static int eval_symbol(Symbol* symp, Form** resultpp, List* envp) {
/* symbols evaluate to their corresponding value from the env. */
Form* valuep = env_lookup(envp, symp);
if (valuep == NULL) {
return E_eval_symbol__unbound;
} else {
*resultpp = valuep;
return 0;
}
}
/* Evaluates formp into resultpp.
Returns 0. */
int eval_form(Form* formp, Form** resultpp, List* envp) {
/* literals evaluate to themselves. */
if (is_clong(formp) || is_cdouble(formp) || is_cstring(formp)) {
*resultpp = formp;
return 0;
/* symbols. */
} else if (is_symbol(formp)) {
Symbol* symp = (Symbol*)formp;
return eval_symbol(symp, resultpp, envp);
/* for now, lists evaluate to themselves. */
} else if (is_list(formp)) {
*resultpp = formp;
return 0;
/* unsupported form. */
} else {
assert(false);
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _EVAL_H_
#define _EVAL_H_
#include "forms.h"
int eval_form(Form* formp, Form** resultpp, List* envp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
/* All of the Lisp forms. */
#include "forms.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <string.h>
/* Form */
/* Are a and b equal? */
bool form_eq(Form* a, Form* b) {
if (a->type != b->type) {
return false;
} else if (is_clong(a)) {
CLong* al = (CLong*)a;
CLong* bl = (CLong*)b;
return al->value == bl->value;
} else if (is_cdouble(a)) {
CDouble* ad = (CDouble*)a;
CDouble* bd = (CDouble*)b;
return ad->value == bd->value;
} else if (is_cstring(a)) {
CString* as = (CString*)a;
CString* bs = (CString*)b;
return (strcmp(as->valuep, bs->valuep) == 0);
} else if (is_symbol(a)) {
Symbol* as = (Symbol*)a;
Symbol* bs = (Symbol*)b;
return (strcmp(as->valuep, bs->valuep) == 0);
} else if (is_list(a)) {
List* al = (List*)a;
List* bl = (List*)b;
while (true) {
if (is_list_empty(al) && is_list_empty(bl)) {
return true;
} else if (!is_list_empty(al) && !is_list_empty(bl)) {
if (form_eq(al->datap, bl->datap)) {
al = al->nextp;
bl = bl->nextp;
continue;
} else {
return false;
}
} else {
return false;
}
}
} else if (is_cbool(a)) {
/* CBool's are singletons, so we can just use pointer comparison. */
return a == b;
} else if (is_nil(a)) {
return true;
} else {
assert(false);
}
}
/* Is formp truthy? */
bool is_form_truthy(Form* formp) {
return !is_form_falsey(formp);
}
/* Is formp falsey? */
bool is_form_falsey(Form* formp) {
return formp == (Form*)g_false || is_nil(formp);
}
/* Symbol */
/* Malloc's a Symbol into sympp and copies sp into it. */
int new_symbol(Symbol** sympp, const char* sp) {
Symbol* symp = malloc(sizeof(Symbol));
if (symp == NULL) {
int err = errno;
errno = 0;
return err;
}
symp->type = TypeSymbol;
size_t len = strlen(sp);
symp->valuep = malloc(len + 1);
if (symp->valuep == NULL) {
free(symp);
int err = errno;
errno = 0;
return err;
}
strcpy(symp->valuep, sp);
*sympp = symp;
return 0;
}
/* Is formp a Symbol? */
bool is_symbol(Form* formp) {
return formp->type == TypeSymbol;
}
/* CLong */
/* Malloc's a CLong into clpp, initializing it with l.
Returns 0 or errno. */
int new_clong(CLong** clpp, long l) {
CLong* clp = malloc(sizeof(CLong));
if (clp == NULL) {
int err = errno;
errno = 0;
return err;
}
clp->type = TypeCLong;
clp->value = l;
*clpp = clp;
return 0;
}
/* Is formp a CLong? */
bool is_clong(Form* formp) {
return formp->type == TypeCLong;
}
/* CDouble */
/* Malloc's a CDouble into cdpp, initializing it with d.
Returns 0 or errno. */
int new_cdouble(CDouble** cdpp, double d) {
CDouble* cdp = malloc(sizeof(CDouble));
if (cdp == NULL) {
int err = errno;
errno = 0;
return err;
}
cdp->type = TypeCDouble;
cdp->value = d;
*cdpp = cdp;
return 0;
}
/* Is formp a CDouble? */
bool is_cdouble(Form* formp) {
return formp->type == TypeCDouble;
}
/* CString */
/* Malloc's a CString into cspp and places sp into it.
Note: sp is not copied; cspp assumes ownership of it.
Returns 0 or errno. */
int new_cstring(CString** cspp, char* sp) {
CString* csp = malloc(sizeof(CString));
if (csp == NULL) {
int err = errno;
errno = 0;
return err;
}
csp->type = TypeCString;
csp->valuep = sp;
*cspp = csp;
return 0;
}
/* Is formp a CString? */
bool is_cstring(Form* formp) {
return formp->type == TypeCString;
}
/* CBool */
CBool* g_true = NULL;
CBool* g_false = NULL;
/* Initialize the global boolean singletons.
Asserts on failure.
Call this function once from main(). */
void cbool_main() {
g_true = malloc(sizeof(CBool));
assert(g_true != NULL);
g_true->type = TypeCBool;
g_true->value = true;
g_false = malloc(sizeof(CBool));
assert(g_false != NULL);
g_false->type = TypeCBool;
g_false->value = false;
}
/* Is formp a CBool? */
bool is_cbool(Form* formp) {
return formp->type == TypeCBool;
}
/* Nil */
Nil* g_nil = NULL;
/* Initialize the global nil singleton.
Asserts on failure.
Call this function once from main(). */
void nil_main() {
g_nil = malloc(sizeof(Nil));
g_nil->type = TypeNil;
assert(g_nil != NULL);
}
/* Is formp nil? */
bool is_nil(Form* formp) {
return formp == (Form*)g_nil;
}
/* List */
List* g_emptylist = NULL;
/* Initialize the global empty list singleton.
Asserts on failure.
Call this function once from main(). */
void list_main() {
assert(g_emptylist == NULL);
g_emptylist = malloc(sizeof(List));
assert(g_emptylist != NULL);
g_emptylist->type = TypeList;
g_emptylist->nextp = g_emptylist;
g_emptylist->datap = (Form*)g_emptylist;
}
/* Malloc's a List intp lpp, initializing it with datap.
Returns 0 or errno. */
int new_list(List** lpp, Form* datap) {
List* lp;
if (datap == NULL) {
lp = g_emptylist;
} else {
lp = malloc(sizeof(List));
if (lp == NULL) {
int err = errno;
errno = 0;
return err;
}
lp->type = TypeList;
lp->datap = datap;
lp->nextp = g_emptylist;
}
*lpp = lp;
return 0;
}
/* Is formp a List? */
bool is_list(Form* formp) {
return formp->type == TypeList;
}
/* Is listp the empty list? */
bool is_list_empty(List* lp) {
return lp == g_emptylist;
}
/* Push formp as the new head of listpp.
Returns 0 or errno. */
int list_push(List** listpp, Form* formp) {
List* list2p;
int err = new_list(&list2p, formp);
if (err) {
return err;
}
list2p->nextp = *listpp;
*listpp = list2p;
return 0;
}
/* Association List */
/* Creates a new key-value pair and pushes it onto the A-list.
Returns 0 or error. */
int alist_push(List** alistpp, Symbol* keyp, Form* valuep) {
List* alistp = *alistpp;
/* construct a key-value "pair" (list) */
List* vlp;
int err = new_list(&vlp, valuep);
if (err) {
return err;
}
List* kvlp;
err = new_list(&kvlp, (Form*)keyp);
if (err) {
return err;
}
kvlp->nextp = vlp;
List* newheadp;
err = new_list(&newheadp, (Form*)kvlp);
if (err) {
return err;
}
newheadp->nextp = alistp;
*alistpp = newheadp;
return 0;
}
/* Returns the value which corresponds to keyp.
Returns a Form or NULL. */
Form* alist_lookup(List* alistp, Symbol* keyp) {
while (true) {
if (is_list_empty(alistp)) {
return NULL;
} else {
List* kvlp = (List*)(alistp->datap);
if (form_eq(kvlp->datap, (Form*)keyp)) {
return kvlp->nextp->datap;
} else {
alistp = alistp->nextp;
continue;
}
}
}
}
/* Environment */
/* Pushes a new scope onto envpp.
Returns 0 or error. */
int env_push_scope(List** envpp) {
List* envp = *envpp;
List* alistp = g_emptylist;
List* newheadp;
int err = new_list(&newheadp, (Form*)alistp);
if (err) {
return err;
}
newheadp->nextp = envp;
*envpp = newheadp;
return 0;
}
/* Push a new key-value pair onto the environment.
Returns 0 or error. */
int env_bind(List* envp, Symbol* keyp, Form* valuep) {
assert(!is_list_empty(envp));
List* alistp = (List*)(envp->datap);
int err = alist_push(&alistp, keyp, valuep);
if (err) {
return err;
} else {
envp->datap = (Form*)alistp;
return 0;
}
}
/* Recursively looks up the value from the environment stack.
Returns a Form or NULL. */
Form* env_lookup(List* envp, Symbol* keyp) {
List* alistp;
while (!is_list_empty(envp)) {
alistp = (List*)(envp->datap);
Form* valuep = alist_lookup(alistp, keyp);
if (valuep) {
return valuep;
} else {
envp = envp->nextp;
continue;
}
}
return NULL;
}
/* Convenience function to push a long into the env.
Returns 0 or error. */
int env_bind_clong(List* envp, char* key, long value) {
Symbol* keyp;
int err = new_symbol(&keyp, key);
if (err) {
return err;
}
CLong* valuep;
err = new_clong(&valuep, value);
if (err) {
return err;
}
return env_bind(envp, keyp, (Form*)valuep);
}
/* Convenience function to push a double into the env.
Returns 0 or error. */
int env_bind_cdouble(List* envp, char* key, double value) {
Symbol* keyp;
int err = new_symbol(&keyp, key);
if (err) {
return err;
}
CDouble* valuep;
err = new_cdouble(&valuep, value);
if (err) {
return err;
}
return env_bind(envp, keyp, (Form*)valuep);
}
/* Convenience function to push a string into the env.
Returns 0 or error. */
int env_bind_cstring(List* envp, char* key, char* value) {
Symbol* keyp;
int err = new_symbol(&keyp, key);
if (err) {
return err;
}
CString* valuep;
err = new_cstring(&valuep, value);
if (err) {
return err;
}
return env_bind(envp, keyp, (Form*)valuep);
}
/* Convenience function to push nil into the env.
Returns 0 or error. */
int env_bind_nil(List* envp, char* key) {
Symbol* keyp;
int err = new_symbol(&keyp, key);
if (err) {
return err;
}
return env_bind(envp, keyp, (Form*)g_nil);
}
/* Convenience function to push a boolean into the env.
Returns 0 or error. */
int env_bind_cbool(List* envp, char* key, CBool* value) {
Symbol* keyp;
int err = new_symbol(&keyp, key);
if (err) {
return err;
}
return env_bind(envp, keyp, (Form*)value);
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
/* All of the Lisp forms. */
#ifndef _FORM_H_
#define _FORM_H_
#include <stdbool.h>
/* The list of Lisp form types. */
enum FormType_ {
_Type_UNINITIALIZED = 0,
TypeSymbol = 10,
TypeCLong = 20,
TypeCDouble = 30,
TypeCString = 40,
TypeCBool = 50,
TypeNil = 60,
TypeList = 100,
};
typedef enum FormType_ FormType;
/* A type-erased Lisp form. */
struct Form_ {
FormType type;
};
typedef struct Form_ Form;
bool form_eq(Form* a, Form* b);
bool is_form_truthy(Form* formp);
bool is_form_falsey(Form* formp);
/* Symbol */
struct Symbol_ {
FormType type;
char* valuep;
};
typedef struct Symbol_ Symbol;
int new_symbol(Symbol** sympp, const char* sp);
bool is_symbol(Form* formp);
/* CLong */
struct CLong_ {
FormType type;
long value;
};
typedef struct CLong_ CLong;
int new_clong(CLong** clpp, long l);
bool is_clong(Form* formp);
/* CDouble */
struct CDouble_ {
FormType type;
double value;
};
typedef struct CDouble_ CDouble;
int new_cdouble(CDouble** cdpp, double d);
bool is_cdouble(Form* formp);
/* CString */
struct CString_ {
FormType type;
char* valuep;
};
typedef struct CString_ CString;
int new_cstring(CString** cspp, char* sp);
bool is_cstring(Form* formp);
/* CBool */
struct CBool_ {
FormType type;
bool value;
};
typedef struct CBool_ CBool;
void cbool_main();
bool is_cbool(Form* formp);
extern CBool* g_true;
extern CBool* g_false;
/* Nil */
struct Nil_ {
FormType type;
};
typedef struct Nil_ Nil;
void nil_main();
bool is_nil(Form* formp);
extern Nil* g_nil;
/* List */
struct List_ {
FormType type;
Form* datap;
struct List_* nextp;
};
typedef struct List_ List;
extern List* g_emptylist;
int new_list(List** lpp, Form* datap);
bool is_list(Form* formp);
int list_push(List** listpp, Form* formp);
bool is_list_empty(List* listp);
void list_main();
/* Association List */
int alist_push(List** alistpp, Symbol* keyp, Form* valuep);
Form* alist_lookup(List* alistp, Symbol* keyp);
/* Environment */
int env_push_scope(List** envpp);
int env_bind(List* envp, Symbol* keyp, Form* valuep);
Form* env_lookup(List* envp, Symbol* keyp);
int env_bind_clong(List* envp, char* key, long value);
int env_bind_cdouble(List* envp, char* key, double value);
int env_bind_cstring(List* envp, char* key, char* value);
int env_bind_nil(List* envp, char* key);
int env_bind_cbool(List* envp, char* key, CBool* value);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "repl.h"
#include "forms.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
/* Creates a default environment.
Asserts on error. */
List* new_default_env() {
List* envp = g_emptylist;
assert(env_push_scope(&envp) == 0);
assert(env_bind_clong(envp, "the-answer", 42) == 0);
assert(env_bind_cdouble(envp, "pi", 3.14159) == 0);
assert(env_bind_cstring(envp, "greeting", "Hello, world!") == 0);
assert(env_bind_nil(envp, "nil") == 0);
assert(env_bind_cbool(envp, "true", g_true) == 0);
assert(env_bind_cbool(envp, "false", g_false) == 0);
return envp;
}
/* The entrypoint of this program. */
int main() {
list_main();
cbool_main();
nil_main();
List* envp = new_default_env();
int err = repl(envp);
if (err) {
fprintf(stderr, "Error %d.\n", err);
return err;
} else {
return EXIT_SUCCESS;
}
}
CC=gcc -g -std=c99 -Wall -Werror -D_POSIX_C_SOURCE=200809L
lisp: main.o forms.o reader.o eval.o printer.o repl.o
$(CC) -o lisp *.o
main.o: main.c
$(CC) -c main.c
forms.o: forms.h forms.c
$(CC) -c forms.c
reader.o: reader.h reader.c
$(CC) -c reader.c
eval.o: eval.h eval.c
$(CC) -c eval.c
printer.o: printer.h printer.c
$(CC) -c printer.c
repl.o: repl.h repl.c
$(CC) -c repl.c
clean:
rm -f *.o lisp
test: lisp
./run_tests.sh
.PHONY: clean
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "printer.h"
#include <assert.h>
#include <sys/errno.h>
#include <stdlib.h>
#include <string.h>
/* Prints the Symbol in symp into fp.
Returns 0 or errno. */
static int print_symbol(Symbol* symp, FILE* fp) {
int err = fprintf(fp, "%s", symp->valuep);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Prints the CLong in lp into fp.
Returns 0 or errno. */
static int print_clong(CLong* lp, FILE* fp) {
int err = fprintf(fp, "%li", lp->value);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Prints the CDouble in dp into fp.
Returns 0 or errno. */
static int print_cdouble(CDouble* dp, FILE* fp) {
int err = fprintf(fp, "%f", dp->value);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Is ch an unescaped char? */
bool is_unescaped(char ch) {
char* found = strchr("\a\b\e\f\n\r\t\v\\\"", (int)ch);
return found != NULL;
}
/* Returns the escaped version of unesc.
For example, if unesc is a newline, 'n' is returned.
Asserts false if unesc is not a valid escape char. */
static char escape_char(char unesc) {
if (unesc == '\a') {
return 'a';
} else if (unesc == '\b') {
return 'b';
} else if (unesc == '\e') {
return 'e';
} else if (unesc == '\f') {
return 'f';
} else if (unesc == '\n') {
return 'n';
} else if (unesc == '\r') {
return 'r';
} else if (unesc == '\t') {
return 't';
} else if (unesc == '\v') {
return 'v';
} else if (unesc == '\\') {
return '\\';
} else if (unesc == '"') {
return '"';
} else {
assert(false);
}
}
/* Escapes srcp into a malloc'ed dstpp.
Returns 0 or errno. */
static int escape_str(char* srcp, char** dstpp) {
size_t src_len = strlen(srcp);
size_t src_size = src_len + 1;
/* dst will be worst-case twice as large (every byte becomes two bytes),
so start there, then shrink to fit at the end. */
size_t dst_size = src_size * 2;
char* dstp = malloc(dst_size);
if (dstp == NULL) {
int err = errno;
errno = 0;
return err;
}
char* src_cursor = srcp;
char* src_last = srcp + src_len - 1;
char* dst_cursor = dstp;
size_t dst_len = 0;
while(src_cursor <= src_last) {
char ch = *src_cursor;
if (is_unescaped(ch)) {
*dst_cursor = '\\';
dst_cursor++;
*dst_cursor = escape_char(ch);
} else {
*dst_cursor = ch;
}
src_cursor++;
dst_cursor++;
dst_len++;
}
/* shrink-to-fit. */
size_t newdst_size = dst_len + 1;
char* newdstp = realloc(dstp, newdst_size);
if (newdstp == NULL) {
free(dstp);
int err = errno;
errno = 0;
return err;
} else {
dstp = newdstp;
}
*dstpp = dstp;
return 0;
}
/* Prints the CString in csp into fp.
Returns 0 or errno. */
static int print_cstring(CString* csp, FILE* fp) {
char* esc;
int err = escape_str(csp->valuep, &esc);
if (err) {
return err;
}
err = fprintf(fp, "\"%s\"", esc);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Prints the List in lp into fp.
Returns 0 or errno. */
static int print_list(List* lp, FILE* fp) {
int err = fputs("(", fp);
if (err == EOF) {
err = errno;
errno = 0;
return err;
}
List* i = lp;
while (!is_list_empty(i)) {
if (i != lp) {
err = fputs(" ", fp);
if (err == EOF) {
err = errno;
errno = 0;
return err;
}
}
err = print_form(i->datap, fp);
if (err) {
return err;
}
i = i->nextp;
}
err = fputs(")", fp);
if (err == EOF) {
err = errno;
errno = 0;
return err;
}
return 0;
}
/* Prints the CBool in cbp into fp.
Returns 0 or errno. */
static int print_cbool(CBool* cbp, FILE* fp) {
int err;
if (cbp == g_true) {
err = fputs("true", fp);
} else if (cbp == g_false) {
err = fputs("false", fp);
} else {
assert(false);
}
if (err == EOF) {
err = errno;
errno = 0;
return err;
}
return 0;
}
/* Prints nil into fp.
Returns 0 or errno. */
static int print_nil(FILE* fp) {
int err = fputs("nil", fp);
if (err == EOF) {
err = errno;
errno = 0;
return err;
}
return 0;
}
/* Prints the Form in formp into fp.
Returns 0 or errno. */
int print_form(Form* formp, FILE* fp) {
if (is_symbol(formp)) {
Symbol* symp = (Symbol*)formp;
return print_symbol(symp, fp);
} else if (is_clong(formp)) {
CLong* lp = (CLong*)formp;
return print_clong(lp, fp);
} else if (is_cdouble(formp)) {
CDouble* dp = (CDouble*)formp;
return print_cdouble(dp, fp);
} else if (is_cstring(formp)) {
CString* csp = (CString*)formp;
return print_cstring(csp, fp);
} else if (is_list(formp)) {
List* lp = (List*)formp;
return print_list(lp, fp);
} else if (is_cbool(formp)) {
CBool* cbp = (CBool*)formp;
return print_cbool(cbp, fp);
} else if (is_nil(formp)) {
return print_nil(fp);
} else {
assert(false);
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _PRINT_H_
#define _PRINT_H_
#include "forms.h"
#include <stdio.h>
int print_form(Form* formp, FILE* fp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "reader.h"
#include "errors.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
/* Creates a new FBuff.
Returns 0 or errno. */
int new_fbuff(FBuff** fbpp, FILE* fp) {
FBuff* fbp = malloc(sizeof(FBuff));
if (fbp == NULL) {
int err = errno;
errno = 0;
return err;
}
fbp->fp = fp;
fbp->buffp = NULL;
fbp->nextp = NULL;
fbp->size = 0;
fbp->len = 0;
*fbpp = fbp;
return 0;
}
/* Frees fbp. */
void free_fbuff(FBuff* fbp) {
free(fbp->buffp);
free(fbp);
}
/* Reads the next line into fbp.
Returns 0, EOF, or errno. */
static int fbuff_getline(FBuff* fbp) {
ssize_t result = getline(&(fbp->buffp), &(fbp->size), fbp->fp);
if (result == -1) {
if (feof(fbp->fp)) {
return EOF;
} else {
result = errno;
errno = 0;
return result;
}
} else {
fbp->len = result;
fbp->nextp = fbp->buffp;
return 0;
}
}
/* Is fbp at the end of the current line? */
bool is_fbuff_eol(FBuff* fbp) {
return fbp->len == 0 || fbp->nextp == fbp->buffp + fbp->len;
}
/* Reads and consumes the next character into chp from fbp.
Returns 0, EOF, or errno. */
static int fbuff_getch(FBuff* fbp, char* chp) {
if (is_fbuff_eol(fbp)) {
int err = fbuff_getline(fbp);
if (err) {
return err;
}
}
char ch = *(fbp->nextp);
(fbp->nextp)++;
*chp = ch;
return 0;
}
/* Pushes ch back into fbp.
Asserts if used incorrectly. */
static void fbuff_ungetch(FBuff* fbp, char ch) {
assert(fbp->nextp > fbp->buffp);
fbp->nextp--;
*(fbp->nextp) = ch;
}
/* Is ch considered whitespace?
Note: commas are considered whitespace. */
static bool is_ch_ws(char ch) {
return isspace(ch) || ch == ',';
}
/* Does ch indicate the end of a token? */
static bool is_ch_delim(char ch) {
return is_ch_ws(ch) || ch == '(' || ch == ')' || ch == ';';
}
/* Does ch constitute a token on its own? */
static bool is_ch_single_character_token(char ch) {
char* p = strchr("()", (int)ch);
return (p != NULL);
}
/* Advances fbp past any leading whitespace.
Note: commas are considered whitespace.
Returns 0, EOF, or errno. */
static int fbuff_discard_ws(FBuff* fbp, int* countp) {
int err;
char ch;
int count = 0;
while (true) {
err = fbuff_getch(fbp, &ch);
if (err) {
return err;
} else if (is_ch_ws(ch)) {
count++;
continue;
} else {
fbuff_ungetch(fbp, ch);
if (countp != NULL) {
*countp = count;
}
break;
}
}
return 0;
}
/* Advances fbp past any whitespace in the current line. */
void fbuff_skip_buffered_ws(FBuff* fbp) {
while (!is_fbuff_eol(fbp) && is_ch_ws(*(fbp->nextp))) {
fbp->nextp++;
}
}
/* Is u even? */
static bool is_even(unsigned int u) {
/* if the LSB isn't set, u is even. */
return !(u & 0x1);
}
/* Advances fbp far enough to read one token (which is a string literal).
Points buffpp to a malloc'ed buffer containing the string.
Returns 0, EOF, errno, or an error code. */
static int fbuff_get_token_str(FBuff* fbp, char** buffpp) {
int err;
char ch;
/* allocate the initial buffer. */
size_t buffsize = 1000;
size_t bufflen = buffsize - 1;
char* buffp = malloc(buffsize);
if (buffp == NULL) {
err = errno;
errno = 0;
return err;
}
char* cursor = buffp;
/* the first char must be the opening quote. */
err = fbuff_getch(fbp, &ch);
if (err) {
free(buffp);
return err;
} else {
assert(ch == '"');
*cursor = ch;
cursor++;
}
unsigned int backslash_count = 0;
while (true) {
size_t len = cursor - buffp;
/* time to grow the buffer. */
if (len == bufflen) {
buffsize *= 2;
bufflen = buffsize - 1;
char* newbuffp = realloc(buffp, buffsize);
if (newbuffp == NULL) {
free(buffp);
err = errno;
errno = 0;
return err;
} else {
buffp = newbuffp;
}
}
err = fbuff_getch(fbp, &ch);
if (err) {
free(buffp);
return err;
/* this is the end of the string. */
} else if (ch == '"' && is_even(backslash_count)) {
*cursor = ch;
cursor++;
*cursor = '\0';
/* shrink buffp to fit the size of the string. */
size_t finalbuffsize = buffp - cursor + 1;
if (finalbuffsize < buffsize) {
char* finalbuffp = realloc(buffp, finalbuffsize);
if (finalbuffp == NULL) {
free(buffp);
err = errno;
errno = 0;
return err;
} else {
buffp = finalbuffp;
}
}
break;
/* this is a regular char. */
} else {
/* track the number of consecutive backslashes so we can
disambiguate the closing quote. */
if (ch == '\\') {
backslash_count++;
} else {
backslash_count = 0;
}
*cursor = ch;
cursor++;
}
}
*buffpp = buffp;
return 0;
}
/* Advances fbp far enough to read one token of input.
Writes the token contents to *buffpp.
In the case of a string literal, points buffpp to a malloc'ed string buffer.
Returns 0, EOF, errno, or an error code. */
static int fbuff_get_token(FBuff* fbp, char** buffpp, size_t buffsize) {
int err;
char ch;
size_t bufflen = buffsize - 1;
char* cursor = *buffpp;
/* discard any leading whitespace. */
err = fbuff_discard_ws(fbp, NULL);
if (err) {
return err;
}
/* a token must be at least one char in length. */
err = fbuff_getch(fbp, &ch);
if (err) {
return err;
/* this is a string literal. */
} else if (ch == '"') {
fbuff_ungetch(fbp, ch);
return fbuff_get_token_str(fbp, buffpp);
} else {
*cursor = ch;
cursor++;
}
/* this is a single-character token. */
if (is_ch_single_character_token(ch)) {
*cursor = '\0';
/* this is a multi-character token. read the rest of the chars. */
} else {
bool is_comment = (ch == ';');
while (true) {
size_t len = cursor - *buffpp;
/* we have run out of room. */
if (len == bufflen) {
return E_file_get_token__buff_overflow;
}
err = fbuff_getch(fbp, &ch);
/* we've reached EOF. return what we have so far. */
if (err == EOF) {
*cursor = '\0';
break;
/* there was an error reading from fp. */
} else if (err != 0) {
return err;
/* we've reached the end of this regular token. */
} else if (!is_comment && is_ch_delim(ch)) {
fbuff_ungetch(fbp, ch);
*cursor = '\0';
break;
/* we've reached the end of this comment token. */
} else if (is_comment && ch == '\n') {
/* we leave the trailing '\n' to ensure the list args are
still whitespace-separated. */
fbuff_ungetch(fbp, ch);
*cursor = '\0';
break;
/* this char is part of the token. */
} else {
*cursor = ch;
cursor++;
continue;
}
}
}
return 0;
}
/* Tries to parse a long from buffp into lp.
Returns true or false. */
static bool try_parse_long(const char* buffp, long* lp) {
char* endptr;
long l = strtol(buffp, &endptr, 10);
if (errno != 0) {
errno = 0;
return false;
} else if (endptr == buffp || *endptr != '\0') {
return false;
} else {
*lp = l;
return true;
}
}
/* Tries to parse a double from buffp intp dp.
Returns true or false. */
static bool try_parse_double(const char* buffp, double* dp) {
char* endptr;
double d = strtod(buffp, &endptr);
if (errno != 0) {
errno = 0;
return false;
} else if (endptr == buffp || *endptr != '\0') {
return false;
} else {
*dp = d;
return true;
}
}
/* Is ch in the list of escapable chars? */
static bool is_escapechar(char ch) {
char* found = strchr("abefnrtv?'\"\\", (int)ch);
return found != NULL;
}
/* Returns the "unescaped" char corresponding to the given escape char.
E.g. if esc is 'n', a newline character is returned.
Asserts false if esc is not a valid escape char. */
static char unescape_char(char esc) {
if (esc == 'a') {
return '\a';
} else if (esc == 'b') {
return '\b';
} else if (esc == 'e') {
return '\e';
} else if (esc == 'f') {
return '\f';
} else if (esc == 'n') {
return '\n';
} else if (esc == 'r') {
return '\r';
} else if (esc == 't') {
return '\t';
} else if (esc == 'v') {
return '\v';
} else if (esc == '\\') {
return '\\';
} else if (esc == '\'') {
return '\'';
} else if (esc == '"') {
return '"';
} else if (esc == '?') {
return '?';
} else {
assert(false);
}
}
/* Parses a string from buffp.
*spp is malloc'ed with a copy of the parsed string.
Returns 0 or errno or error. */
static int parse_string(const char* buffp, char** spp) {
size_t src_len = strlen(buffp);
size_t src_size = src_len + 1;
/* minimum buffp is an opening and closing quote, so we know len >= 2. */
assert(src_len >= 2);
/* first and last char must be '"'. */
if (*buffp != '"') {
return E_parse_string__invalid_string_1;
}
if (*(buffp + src_len - 1) != '"') {
return E_parse_string__invalid_string_2;
}
size_t dst_size = src_size - 2;
char* dst = malloc(dst_size);
if (dst == NULL) {
int err = errno;
errno = 0;
return err;
}
/* skip the opening quote. */
const char* src_first = buffp + 1;
/* skip the closing quote. */
const char* src_last = buffp + src_len - 2;
const char* src_cursor = src_first;
char* dst_cursor = dst;
while (src_cursor <= src_last) {
/* this is possibly an escape sequence. */
if (*src_cursor == '\\') {
src_cursor++;
/* the last char of the string is a backslash, which is invalid. */
if (src_cursor > src_last) {
free(dst);
return E_parse_string__invalid_string_3;
/* this is an escape sequence. */
} else if (is_escapechar(*src_cursor)) {
*dst_cursor = unescape_char(*src_cursor);
src_cursor++;
dst_cursor++;
continue;
/* this is an invalid escape sequence. */
} else {
free(dst);
return E_parse_string__invalid_string_4;
}
/* this is just a regular char. */
} else {
*dst_cursor = *src_cursor;
src_cursor++;
dst_cursor++;
continue;
}
}
*dst_cursor = '\0';
*spp = dst;
return 0;
}
/* Read all of the forms in this Lisp list.
Returns 0 or error. */
static int read_list(FBuff* fbp, Form** formpp) {
/* note: the leading '(' has already been consumed. */
int err;
int i = 0;
int ws_count;
char ch1;
List* headp = g_emptylist;
List* tailp = g_emptylist;
while (true) {
err = fbuff_discard_ws(fbp, &ws_count);
if (err) {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_1;
} else {
return err;
}
}
err = fbuff_getch(fbp, &ch1);
if (err) {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_2;
} else {
return err;
}
/* we've reached the end of the list. */
} else if (ch1 == ')') {
*formpp = (Form*)headp;
return 0;
} else {
fbuff_ungetch(fbp, ch1);
/* no space between atoms is an error. */
if (i > 0 && ws_count == 0 && is_ch_delim(ch1) == false) {
return E_read_list__missing_ws;
}
/* read the next form in the list. */
Form* formp;
err = read_form(fbp, &formp);
if (err) {
if (err == E_read_form__comment) {
continue;
} else {
/* reaching EOF before ')' is an error. */
if (err == EOF) {
return E_read_list__premature_eof_3;
} else {
return err;
}
}
/* append the form onto the list. */
} else {
List* newp;
int err = new_list(&newp, formp);
if (err) {
return err;
}
if (headp == g_emptylist) {
headp = newp;
tailp = newp;
} else {
tailp->nextp = newp;
tailp = newp;
}
}
}
i++;
}
}
/* Read one Lisp form from fp intp formpp.
Returns 0 or E_read_form__comment or EOF or errno or error. */
int read_form(FBuff* fbp, Form** formpp) {
int err;
int buffsize = 100;
char buff[buffsize];
char* buffp = buff;
char ch1;
/* read a token. */
err = fbuff_get_token(fbp, &buffp, buffsize);
if (err) {
return err;
}
ch1 = *buffp;
/* we've reached the end of input. */
if (ch1 == '\0') {
return EOF;
/* this is a comment. */
} else if (ch1 == ';') {
return E_read_form__comment;
/* a list. */
} else if (ch1 == '(') {
return read_list(fbp, formpp);
/* a list closer at this point is a syntax error. */
} else if (ch1 == ')') {
return E_read_form__unexpected_list_closer;
/* string literal. */
} else if (ch1 == '"') {
assert(buffp != buff);
char* sp;
err = parse_string(buffp, &sp);
if (err) {
free(buffp);
return err;
}
CString* csp;
err = new_cstring(&csp, sp);
if (err) {
free(buffp);
return err;
} else {
*formpp = (Form*)csp;
return 0;
}
/* the form type can't be determined from ch1 alone. */
} else {
bool success;
/* an integer literal. */
long l;
success = try_parse_long(buffp, &l);
if (success) {
CLong* clp;
err = new_clong(&clp, l);
if (err) {
return err;
} else {
*formpp = (Form*)clp;
return 0;
}
}
/* a floating-point literal. */
double d;
success = try_parse_double(buffp, &d);
if (success) {
CDouble* cdp;
err = new_cdouble(&cdp, d);
if (err) {
return err;
} else {
*formpp = (Form*)cdp;
return 0;
}
}
/* assume anything else is a symbol. */
Symbol* symp;
err = new_symbol(&symp, buffp);
if (err) {
return err;
} else {
*formpp = (Form*)symp;
return 0;
}
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _READ_H_
#define _READ_H_
#include "forms.h"
#include <stdio.h>
/* A line-oriented FILE* buffer. */
struct FBuff_ {
FILE* fp;
char* buffp;
size_t size;
size_t len;
char* nextp;
};
typedef struct FBuff_ FBuff;
int new_fbuff(FBuff** fbpp, FILE* fp);
void free_fbuff(FBuff* fbp);
bool is_fbuff_eol(FBuff* fbp);
void fbuff_skip_buffered_ws(FBuff* fbp);
int read_form(FBuff* fbp, Form** formpp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "repl.h"
#include "errors.h"
#include "reader.h"
#include "printer.h"
#include "eval.h"
#include <stdbool.h>
#include <sys/errno.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
/* Is fp a tty? */
bool is_file_tty(FILE* fp) {
int result = isatty(fileno(fp));
if (result == 0) {
errno = 0;
return false;
} else {
return true;
}
}
/* Should the REPL prompt be displayed? */
bool should_display_prompt(FBuff* fbp) {
if (!is_file_tty(fbp->fp)) {
return false;
} else {
return is_fbuff_eol(fbp);
}
}
/* Starts a read-eval-print loop.
Loops until EOF or I/O error.
Returns 0 or error. */
int repl(List* envp) {
FBuff* fbp;
int err = new_fbuff(&fbp, stdin);
if (err) {
return err;
}
while (true) {
fbuff_skip_buffered_ws(fbp);
if (should_display_prompt(fbp)) {
/* display the prompt. */
int err = fputs("> ", stdout);
if (err == EOF) {
free_fbuff(fbp);
err = errno;
errno = 0;
return err;
}
}
/* read the next Lisp form. */
Form* formp;
err = read_form(fbp, &formp);
if (err) {
if (err == EOF) {
break;
} else if (err == E_read_form__comment) {
continue;
} else {
int err2 = fprintf(stderr, "Error %d.\n", err);
if (err2 < 0) {
free_fbuff(fbp);
return err2;
}
continue;
}
}
/* evaluate the form. */
Form* resultp;
err = eval_form(formp, &resultp, envp);
if (err) {
int err2 = fprintf(stderr, "Error %d.\n", err);
if (err2 < 0) {
free_fbuff(fbp);
return err2;
}
continue;
}
/* print the result. */
err = print_form(resultp, stdout);
if (err) {
free_fbuff(fbp);
return err;
}
err = fputs("\n", stdout);
if (err == EOF) {
free_fbuff(fbp);
err = errno;
errno = 0;
return err;
}
/* loop. */
continue;
}
free_fbuff(fbp);
return 0;
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _REPL_H_
#define _REPL_H_
#include "forms.h"
int repl(List* envp);
#endif
#!/bin/bash
set -e -o pipefail
for f in `ls test*.input`
do
base=$(basename $f .input)
echo $base
cat ${base}.input | ./lisp > /tmp/${base}.out
diff -urN ${base}.expected /tmp/${base}.out
done
echo "all tests passed"
1
1
2
3
1.200000
"foo"
"line one\nline two"
1
1 2 3
1.2
"foo"
"line one
line two"
()
()
(1)
(1 2)
(1 2)
(1 (2 ()))
()
(
)
(1)
(1 2)
(1
2)
(1(2()))
;comment
; comment
1;comment
1 ; comment
(1(;comment
2;comment
))
42
3.141590
"Hello, world!"
nil
true
false
the-answer
pi
greeting
nil
true
false
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment