Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Last active January 11, 2020 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cellularmitosis/5484e89eb7b4181bc22df58051674550 to your computer and use it in GitHub Desktop.
Save cellularmitosis/5484e89eb7b4181bc22df58051674550 to your computer and use it in GitHub Desktop.
A Lisp interpreter in C, part 2: integers

Blog 2020/1/9

<- previous | index | next ->

A Lisp interpreter in C, part 2: integers

Let's learn how to write a Lisp interpreter in C!

In part 2, we implement support for reading, evaluating, and printing C long integers.

This post demonstrates how to extend our Lisp with additional types.

Lisp forms

We add a new struct to represent C long integers:

forms.h:

struct CLong_ {
    FormType type;
    long value;
};
typedef struct CLong_ CLong;

int new_clong(CLong** clpp, long l);
bool is_clong(Form* formp);

and add an entry to our FormType enum:

forms.h:

     _Type_UNINITIALIZED = 0,
     TypeSymbol = 10,
+    TypeCLong = 20,
 };
 typedef enum FormType_ FormType;
 

The reader

We add a function which attempts to parse a token as a long integer:

reader.c:

/* Tries to parse a long from buffp into lp.
Returns true or false. */
static bool try_parse_long(const char* buffp, long* lp) {
    char* endptr;
    long l = strtol(buffp, &endptr, 10);
    if (errno != 0) {
        errno = 0;
        return false;
    } else if (endptr == buffp || *endptr != '\0') {
        return false;
    } else {
        *lp = l;
        return true;
    }
}

and stitch it into read_form():

reader.c (error handling elided):

     if (ch1 == '\0') {
         return EOF;
     } else {
-        /* assume every token is a symbol. */
+        bool success;
+
+        /* an integer literal. */
+        long l;
+        success = try_parse_long(buffp, &l);
+        if (success) {
+            CLong* clp;
+            new_clong(&clp, l);
+            *formpp = (Form*)clp;
+            return 0;
+        }
+
+        /* assume anything else is a symbol. */
         Symbol* symp;
         err = new_symbol(&symp, buffp);
         if (err) {

The evaluator

eval.c:

 int eval_form(Form* formp, Form** resultpp) {
     /* for now, all forms evaluate to themselves. */
-    if (is_symbol(formp)) {
+    if (is_symbol(formp) || is_clong(formp)) {
         *resultpp = formp;
         return 0;

The printer

We implement support for printing CLong objects:

printer.c (error handling elided):

/* Prints the CLong in lp into fp.
Returns 0 or errno. */
static int print_clong(CLong* lp, FILE* fp) {
    fprintf(fp, "CLong: %li", lp->value);
    return 0;
}

and stitch it into print_form():

printer.c:

 int print_form(Form* formp, FILE* fp) {
     if (is_symbol(formp)) {
         Symbol* symp = (Symbol*)formp;
         return print_symbol(symp, fp);
+    } else if (is_clong(formp)) {
+        CLong* lp = (CLong*)formp;
+        return print_clong(lp, fp);
     } else {
         assert(false);
     }

Try it out

We can see that our evaluator understands integers now!

$ echo foo 1 42 | ./lisp 
Symbol: foo
CLong: 1
CLong: 42

However, because our tokenizer doesn't understand lists, we aren't quite 100% there yet:

$ echo "(+ 1 2)" | ./lisp 
Symbol: (+
CLong: 1
Symbol: 2)

Next time

In part 3 we will add support for C double floating-point numbers!

/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _ERRCODE_H_
#define _ERRCODE_H_
enum ErrCode_ {
/* not an error. */
E_success = 0,
/* A catch-all "unknown" error. */
/* Note: we start at 10,000 to skip past the errno range. */
E_unknown = 10000,
/* We ran out of buffer while reading a token. */
E_file_get_token__buff_overflow = 10010,
};
typedef enum ErrCode_ ErrCode;
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "eval.h"
#include "printer.h"
#include <assert.h>
/* Evaluates formp into resultpp.
Returns 0. */
int eval_form(Form* formp, Form** resultpp) {
/* for now, all forms evaluate to themselves. */
if (is_symbol(formp) || is_clong(formp)) {
*resultpp = formp;
return 0;
/* unsupported form. */
} else {
assert(false);
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _EVAL_H_
#define _EVAL_H_
#include "forms.h"
int eval_form(Form* formp, Form** resultpp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
/* All of the Lisp forms. */
#include "forms.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <string.h>
/* Symbol */
/* Malloc's a Symbol into sympp and copies sp into it. */
int new_symbol(Symbol** sympp, const char* sp) {
Symbol* symp = malloc(sizeof(Symbol));
if (symp == NULL) {
int err = errno;
errno = 0;
return err;
}
symp->type = TypeSymbol;
size_t len = strlen(sp);
symp->valuep = malloc(len + 1);
if (symp->valuep == NULL) {
free(symp);
int err = errno;
errno = 0;
return err;
}
strcpy(symp->valuep, sp);
*sympp = symp;
return 0;
}
/* Is formp a Symbol? */
bool is_symbol(Form* formp) {
return formp->type == TypeSymbol;
}
/* CLong */
/* Malloc's a CLong into clpp, initializing it with l.
Returns 0 or errno. */
int new_clong(CLong** clpp, long l) {
CLong* clp = malloc(sizeof(CLong));
if (clp == NULL) {
int err = errno;
errno = 0;
return err;
}
clp->type = TypeCLong;
clp->value = l;
*clpp = clp;
return 0;
}
/* Is formp a CLong? */
bool is_clong(Form* formp) {
return formp->type == TypeCLong;
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
/* All of the Lisp forms. */
#ifndef _FORM_H_
#define _FORM_H_
#include <stdbool.h>
/* The list of Lisp form types. */
enum FormType_ {
_Type_UNINITIALIZED = 0,
TypeSymbol = 10,
TypeCLong = 20,
};
typedef enum FormType_ FormType;
/* A type-erased Lisp form. */
struct Form_ {
FormType type;
};
typedef struct Form_ Form;
/* Symbol */
struct Symbol_ {
FormType type;
char* valuep;
};
typedef struct Symbol_ Symbol;
int new_symbol(Symbol** sympp, const char* sp);
bool is_symbol(Form* formp);
/* CLong */
struct CLong_ {
FormType type;
long value;
};
typedef struct CLong_ CLong;
int new_clong(CLong** clpp, long l);
bool is_clong(Form* formp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "repl.h"
#include "forms.h"
#include <stdlib.h>
#include <stdio.h>
int main() {
int err = repl();
if (err) {
fprintf(stderr, "Error %d.\n", err);
return err;
} else {
return EXIT_SUCCESS;
}
}
CC=gcc -g -std=c99 -Wall -Werror -D_POSIX_C_SOURCE=200809L
lisp: main.o forms.o reader.o eval.o printer.o repl.o
$(CC) -o lisp *.o
main.o: main.c
$(CC) -c main.c
forms.o: forms.h forms.c
$(CC) -c forms.c
reader.o: reader.h reader.c
$(CC) -c reader.c
eval.o: eval.h eval.c
$(CC) -c eval.c
printer.o: printer.h printer.c
$(CC) -c printer.c
repl.o: repl.h repl.c
$(CC) -c repl.c
clean:
rm -f *.o lisp
test: lisp
./run_tests.sh
.PHONY: clean
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "printer.h"
#include <assert.h>
/* Prints the Symbol in symp into fp.
Returns 0 or errno. */
static int print_symbol(Symbol* symp, FILE* fp) {
int err = fprintf(fp, "Symbol: %s", symp->valuep);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Prints the CLong in lp into fp.
Returns 0 or errno. */
static int print_clong(CLong* lp, FILE* fp) {
int err = fprintf(fp, "CLong: %li", lp->value);
if (err < 0) {
return err;
} else {
return 0;
}
}
/* Prints the Form in formp into fp.
Returns 0 or errno. */
int print_form(Form* formp, FILE* fp) {
if (is_symbol(formp)) {
Symbol* symp = (Symbol*)formp;
return print_symbol(symp, fp);
} else if (is_clong(formp)) {
CLong* lp = (CLong*)formp;
return print_clong(lp, fp);
} else {
assert(false);
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _PRINT_H_
#define _PRINT_H_
#include "forms.h"
#include <stdio.h>
int print_form(Form* formp, FILE* fp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "reader.h"
#include "errors.h"
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
/* Creates a new FBuff.
Returns 0 or errno. */
int new_fbuff(FBuff** fbpp, FILE* fp) {
FBuff* fbp = malloc(sizeof(FBuff));
if (fbp == NULL) {
int err = errno;
errno = 0;
return err;
}
fbp->fp = fp;
fbp->buffp = NULL;
fbp->nextp = NULL;
fbp->size = 0;
fbp->len = 0;
*fbpp = fbp;
return 0;
}
/* Frees fbp. */
void free_fbuff(FBuff* fbp) {
free(fbp->buffp);
free(fbp);
}
/* Reads the next line into fbp.
Returns 0, EOF, or errno. */
static int fbuff_getline(FBuff* fbp) {
ssize_t result = getline(&(fbp->buffp), &(fbp->size), fbp->fp);
if (result == -1) {
if (feof(fbp->fp)) {
return EOF;
} else {
result = errno;
errno = 0;
return result;
}
} else {
fbp->len = result;
fbp->nextp = fbp->buffp;
return 0;
}
}
/* Is fbp at the end of the current line? */
bool is_fbuff_eol(FBuff* fbp) {
return fbp->len == 0 || fbp->nextp == fbp->buffp + fbp->len;
}
/* Reads and consumes the next character into chp from fbp.
Returns 0, EOF, or errno. */
static int fbuff_getch(FBuff* fbp, char* chp) {
if (is_fbuff_eol(fbp)) {
int err = fbuff_getline(fbp);
if (err) {
return err;
}
}
char ch = *(fbp->nextp);
(fbp->nextp)++;
*chp = ch;
return 0;
}
/* Pushes ch back into fbp.
Asserts if used incorrectly. */
static void fbuff_ungetch(FBuff* fbp, char ch) {
assert(fbp->nextp > fbp->buffp);
fbp->nextp--;
*(fbp->nextp) = ch;
}
/* Is ch considered whitespace?
Note: commas are considered whitespace. */
static bool is_ch_ws(char ch) {
return isspace(ch) || ch == ',';
}
/* Advances fbp past any leading whitespace.
Note: commas are considered whitespace.
Returns 0, EOF, or errno. */
static int fbuff_discard_ws(FBuff* fbp) {
int err;
char ch;
while (true) {
err = fbuff_getch(fbp, &ch);
if (err) {
return err;
} else if (is_ch_ws(ch)) {
continue;
} else {
fbuff_ungetch(fbp, ch);
break;
}
}
return 0;
}
/* Advances fbp past any whitespace in the current line. */
void fbuff_skip_buffered_ws(FBuff* fbp) {
while (!is_fbuff_eol(fbp) && is_ch_ws(*(fbp->nextp))) {
fbp->nextp++;
}
}
/* Does ch indicate the end of a token? */
static bool is_ch_delim(char ch) {
return is_ch_ws(ch);
}
/* Advances fbp far enough to read one token of input.
Writes the token contents to *buffpp.
Returns 0, EOF, errno, or an error code. */
static int fbuff_get_token(FBuff* fbp, char** buffpp, size_t buffsize) {
int err;
char ch;
size_t bufflen = buffsize - 1;
char* cursor = *buffpp;
/* discard any leading whitespace. */
err = fbuff_discard_ws(fbp);
if (err) {
return err;
}
/* a token must be at least one char in length. */
err = fbuff_getch(fbp, &ch);
if (err) {
return err;
}
*cursor = ch;
cursor++;
/* the rest of the chars. */
while (true) {
size_t len = cursor - *buffpp;
/* we have run out of room. */
if (len == bufflen) {
return E_file_get_token__buff_overflow;
}
err = fbuff_getch(fbp, &ch);
/* we've reached EOF. return what we have so far. */
if (err == EOF) {
*cursor = '\0';
break;
/* there was an error reading from fp. */
} else if (err != 0) {
return err;
/* we've reached the end of this token. */
} else if (is_ch_delim(ch)) {
fbuff_ungetch(fbp, ch);
*cursor = '\0';
break;
/* this char is part of the token. */
} else {
*cursor = ch;
cursor++;
continue;
}
}
return 0;
}
/* Tries to parse a long from buffp into lp.
Returns true or false. */
static bool try_parse_long(const char* buffp, long* lp) {
char* endptr;
long l = strtol(buffp, &endptr, 10);
if (errno != 0) {
errno = 0;
return false;
} else if (endptr == buffp || *endptr != '\0') {
return false;
} else {
*lp = l;
return true;
}
}
/* Read one Lisp form from fp intp formpp.
Returns 0 or EOF or errno or error code. */
int read_form(FBuff* fbp, Form** formpp) {
int err;
int buffsize = 100;
char buff[buffsize];
char* buffp = buff;
char ch1;
/* read a token. */
err = fbuff_get_token(fbp, &buffp, buffsize);
if (err) {
return err;
}
ch1 = *buffp;
/* we've reached the end of input. */
if (ch1 == '\0') {
return EOF;
} else {
bool success;
/* an integer literal. */
long l;
success = try_parse_long(buffp, &l);
if (success) {
CLong* clp;
err = new_clong(&clp, l);
if (err) {
return err;
} else {
*formpp = (Form*)clp;
return 0;
}
}
/* assume anything else is a symbol. */
Symbol* symp;
err = new_symbol(&symp, buffp);
if (err) {
return err;
} else {
*formpp = (Form*)symp;
return 0;
}
}
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _READ_H_
#define _READ_H_
#include "forms.h"
#include <stdio.h>
/* A line-oriented FILE* buffer. */
struct FBuff_ {
FILE* fp;
char* buffp;
size_t size;
size_t len;
char* nextp;
};
typedef struct FBuff_ FBuff;
int new_fbuff(FBuff** fbpp, FILE* fp);
void free_fbuff(FBuff* fbp);
bool is_fbuff_eol(FBuff* fbp);
void fbuff_skip_buffered_ws(FBuff* fbp);
int read_form(FBuff* fbp, Form** formpp);
#endif
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#include "repl.h"
#include "reader.h"
#include "printer.h"
#include "eval.h"
#include <stdbool.h>
#include <sys/errno.h>
#include <stdio.h>
#include <unistd.h>
/* Is fp a tty? */
bool is_file_tty(FILE* fp) {
int result = isatty(fileno(fp));
if (result == 0) {
errno = 0;
return false;
} else {
return true;
}
}
/* Should the REPL prompt be displayed? */
bool should_display_prompt(FBuff* fbp) {
if (!is_file_tty(fbp->fp)) {
return false;
} else {
return is_fbuff_eol(fbp);
}
}
/* Starts a read-eval-print loop.
Loops until EOF or I/O error.
Returns 0 or error. */
int repl() {
FBuff* fbp;
int err = new_fbuff(&fbp, stdin);
if (err) {
return err;
}
while (true) {
fbuff_skip_buffered_ws(fbp);
if (should_display_prompt(fbp)) {
/* display the prompt. */
int err = fputs("> ", stdout);
if (err == EOF) {
free_fbuff(fbp);
err = errno;
errno = 0;
return err;
}
}
/* read the next Lisp form. */
Form* formp;
err = read_form(fbp, &formp);
if (err) {
if (err == EOF) {
break;
} else {
int err2 = fprintf(stderr, "Error %d.\n", err);
if (err2 < 0) {
free_fbuff(fbp);
return err2;
}
continue;
}
}
/* evaluate the form. */
Form* resultp;
err = eval_form(formp, &resultp);
if (err) {
int err2 = fprintf(stderr, "Error %d.\n", err);
if (err2 < 0) {
free_fbuff(fbp);
return err2;
}
continue;
}
/* print the result. */
err = print_form(resultp, stdout);
if (err) {
free_fbuff(fbp);
return err;
}
err = fputs("\n", stdout);
if (err == EOF) {
free_fbuff(fbp);
err = errno;
errno = 0;
return err;
}
/* loop. */
continue;
}
free_fbuff(fbp);
return 0;
}
/* This file is Copyright (C) 2019 Jason Pepas. */
/* This file is released under the terms of the MIT License. */
/* See https://opensource.org/licenses/MIT */
#ifndef _REPL_H_
#define _REPL_H_
int repl();
#endif
#!/bin/bash
set -e -o pipefail
for f in `ls test*.input`
do
base=$(basename $f .input)
echo $base
cat ${base}.input | ./lisp > /tmp/${base}.out
diff -urN ${base}.expected /tmp/${base}.out
done
echo "all tests passed"
CLong: 1
CLong: 1
CLong: 2
CLong: 3
Symbol: 1.2
Symbol: foo
Symbol: "foo"
Symbol: "line
Symbol: one
Symbol: line
Symbol: two"
Symbol: true
Symbol: nil
1
1 2 3
1.2
foo
"foo"
"line one
line two"
true
nil
Symbol: ()
Symbol: (
Symbol: )
Symbol: (1)
Symbol: (1
Symbol: 2)
Symbol: (1
Symbol: 2)
Symbol: (1(2()))
()
(
)
(1)
(1 2)
(1
2)
(1(2()))
Symbol: ;comment
Symbol: ;
Symbol: comment
Symbol: 1;comment
CLong: 1
Symbol: ;
Symbol: comment
Symbol: (1(;comment
Symbol: 2;comment
Symbol: ))
;comment
; comment
1;comment
1 ; comment
(1(;comment
2;comment
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment