Skip to content

Instantly share code, notes, and snippets.

@joeyadams
Created April 23, 2011 13:13
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save joeyadams/938595 to your computer and use it in GitHub Desktop.
CI: A simple self-interpreter that's actually a compiler
/*
* Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
This program requires libgc, a garbage collector for C.
To compile with GCC:
gcc -Wall -W -O3 -lgc ci.c -o ci
To run:
1. Supply the name of the script on the command line. The script will then take its input on stdin.
2. Pass the script through stdin. If the program is terminated with a ), the interpreter will read stdin starting after that.
*/
#include <assert.h>
#include <gc/gc.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define fatal(fmt, ...) do { \
fprintf(stderr, fmt "\n", ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
} while (0)
#define alloc(type) ((type *) GC_MALLOC(sizeof(type)))
#define alloc_array(type, count) ((type *) GC_MALLOC(sizeof(type) * (count)))
#define alloc_size(size) (GC_MALLOC(size))
/*
* Dynamic array macros yanked from my darray module:
*
* http://ccan.ozlabs.org/info/darray.html
*/
#define darray(type) struct {type *item; size_t size; size_t alloc;}
#define darray_new() {0,0,0}
#define darray_init(arr) do {(arr).item=0; (arr).size=0; (arr).alloc=0;} while(0)
#define darray_free(arr) do {GC_FREE((arr).item);} while(0)
#define darray_append(arr, ...) do { \
darray_resize(arr, (arr).size+1); \
(arr).item[(arr).size-1] = (__VA_ARGS__); \
} while(0)
#define darray_push(arr, ...) darray_append(arr, __VA_ARGS__)
#define darray_pop(arr) ((arr).item[--(arr).size])
#define darray_append_items(arr, items, count) do { \
size_t __count = (count), __oldSize = (arr).size; \
darray_resize(arr, __oldSize + __count); \
memcpy((arr).item + __oldSize, items, __count * sizeof(*(arr).item)); \
} while(0)
#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize))
#define darray_realloc(arr, newAlloc) do { \
(arr).item = GC_REALLOC((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \
} while(0)
#define darray_growalloc(arr, need) do { \
size_t __need = (need); \
if (__need > (arr).alloc) \
darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \
} while(0)
static inline size_t darray_next_alloc(size_t alloc, size_t need)
{
if (alloc == 0)
alloc = 1;
while (alloc < need)
alloc *= 2;
return alloc;
}
#define darray_foreach(i, arr) \
for ((i) = &(arr).item[0]; (i) < &(arr).item[(arr).size]; (i)++)
/* Division and modulus that round toward negative infinity. */
static int div_(int n, int d)
{
int div = n / d;
int mod = n % d;
if ((mod < 0 && d > 0) || (mod > 0 && d < 0))
div--;
return div;
}
static int mod_(int n, int d)
{
int mod = n % d;
if ((mod < 0 && d > 0) || (mod > 0 && d < 0))
mod += d;
return mod;
}
typedef struct Value Value;
typedef struct Block Block;
typedef struct Inst Inst;
typedef darray(Value) Stack;
typedef darray(Inst) darray_Inst;
/*
* InstFunc
* A function that performs a single operation, updating the stack
* and optionally returning a block that is to be executed afterward.
*/
typedef Block *InstFunc(Stack *);
typedef enum {V_INT, V_BLOCK} ValueTag;
typedef enum {B_VALUE, B_CODE, B_JOIN} BlockTag;
struct Value
{
ValueTag tag;
union {
/* V_INT */
int n;
/* V_BLOCK */
Block *block;
};
};
struct Block {
BlockTag tag;
union {
/* B_VALUE */
Value value;
/* B_CODE */
struct {
Inst *insts;
size_t count;
};
/* B_JOIN */
struct {
Block *a;
Block *b;
};
};
};
struct Inst {
InstFunc *func;
Value push_me_instead; // if func is NULL
};
static Block *compile(FILE *f);
static void runBlock(Block *block, Stack *stack);
static void flatten(Block *block, darray_Inst *target);
static InstFunc f_call, f_lift, f_join;
static InstFunc f_copy, f_pluck, f_drop;
static InstFunc f_eq, f_lt, f_gt, f_range;
static InstFunc f_putc, f_getc, f_ungetc;
static InstFunc f_add, f_sub, f_mul, f_div, f_mod;
static const char *valueTag_show(ValueTag tag)
{
switch (tag) {
case V_INT: return "an int";
case V_BLOCK: return "a block";
default: return "an <<invalid type>>";
}
}
static Block *compile(FILE *in)
{
darray_Inst insts = darray_new();
for (;;) {
int c = fgetc(in);
Inst inst;
if (c == EOF || c == ')')
break;
if (c == '#') {
for (;;) {
c = fgetc(in);
if (c == EOF || c == '\n')
break;
}
if (c == EOF)
break;
else
continue;
}
memset(&inst, 0, sizeof(inst));
switch (c) {
case '(':
inst.push_me_instead.tag = V_BLOCK;
inst.push_me_instead.block = compile(in);
break;
case '$':
inst.func = f_call;
break;
case '^':
inst.func = f_lift;
break;
case '&':
inst.func = f_join;
break;
case 'c':
inst.func = f_copy;
break;
case 'p':
inst.func = f_pluck;
break;
case 'd':
inst.func = f_drop;
break;
case '=':
inst.func = f_eq;
break;
case '<':
inst.func = f_lt;
break;
case '>':
inst.func = f_gt;
break;
case '~':
inst.func = f_range;
break;
case '.':
inst.func = f_putc;
break;
case ',':
inst.func = f_getc;
break;
case '!':
inst.func = f_ungetc;
break;
case '\'':
inst.push_me_instead.tag = V_INT;
inst.push_me_instead.n = fgetc(in);
break;
case '+':
inst.func = f_add;
break;
case '-':
inst.func = f_sub;
break;
case '*':
inst.func = f_mul;
break;
case '/':
inst.func = f_div;
break;
case '%':
inst.func = f_mod;
break;
default:
if (c >= '0' && c <= '9') {
int n = c - '0';
for (;;) {
c = fgetc(in);
if (c >= '0' && c <= '9') {
n *= 10;
n += c - '0';
} else {
ungetc(c, in);
break;
}
}
inst.push_me_instead.tag = V_INT;
inst.push_me_instead.n = n;
} else {
continue; /* Don't append an instruction. */
}
}
darray_append(insts, inst);
}
Block *block = alloc(Block);
block->tag = B_CODE;
block->insts = insts.item;
block->count = insts.size;
return block;
}
#define item(n) (stack->item[stack->size - (n) - 1])
#define push(x) darray_push(*stack, x)
#define pop() darray_pop(*stack)
static void runBlock(Block *block, Stack *stack)
{
Inst *inst;
size_t count;
tail_call:
if (block->tag != B_CODE) {
darray_Inst target = darray_new();
flatten(block, &target);
block->tag = B_CODE;
block->insts = target.item;
block->count = target.size;
}
inst = block->insts;
count = block->count;
for (; count-- > 0; inst++) {
if (inst->func != NULL) {
block = inst->func(stack);
if (block != NULL) {
if (count > 0)
runBlock(block, stack);
else
goto tail_call;
}
} else {
push(inst->push_me_instead);
}
}
}
static void flatten(Block *block, darray_Inst *target)
{
switch (block->tag) {
case B_VALUE:
darray_append(*target, (Inst) {NULL, block->value});
break;
case B_CODE:
darray_append_items(*target, block->insts, block->count);
break;
case B_JOIN:
flatten(block->a, target);
flatten(block->b, target);
break;
default:
fatal("flatten: BUG: Block tag (%d) is invalid", block->tag);
}
}
static void need1(Stack *stack)
{
assert(stack->size >= 1);
}
static void need1int(Stack *stack)
{
assert(stack->size >= 1 && item(0).tag == V_INT);
}
static void need1block(Stack *stack)
{
assert(stack->size >= 1 && item(0).tag == V_BLOCK);
}
static void need2(Stack *stack)
{
assert(stack->size >= 2);
}
static void need2int(Stack *stack)
{
assert(stack->size >= 2 &&
item(0).tag == V_INT &&
item(1).tag == V_INT);
}
static void need2block(Stack *stack)
{
assert(stack->size >= 2 &&
item(0).tag == V_BLOCK &&
item(1).tag == V_BLOCK);
}
static void need3int(Stack *stack)
{
assert(stack->size >= 3 &&
item(0).tag == V_INT &&
item(1).tag == V_INT &&
item(2).tag == V_INT);
}
#define needN(stack, n) assert((stack)->size >= n)
static Block *f_call(Stack *stack)
{
need1block(stack);
return item(0).block;
}
static Block *f_join(Stack *stack)
{
need2block(stack);
Block *block = alloc(Block);
block->tag = B_JOIN;
block->b = pop().block;
block->a = pop().block;
Value v;
v.tag = V_BLOCK;
v.block = block;
push(v);
return NULL;
}
static Block *f_lift(Stack *stack)
{
need1(stack);
Block *b = alloc(Block);
b->tag = B_VALUE;
b->value = item(0);
item(0).tag = V_BLOCK;
item(0).block = b;
return NULL;
}
static Block *f_copy(Stack *stack)
{
need1int(stack);
int idx = pop().n;
assert(idx >= 0);
needN(stack, (size_t) idx + 1);
Value x = item(idx);
push(x);
return NULL;
}
static Block *f_pluck(Stack *stack)
{
need1int(stack);
int idx = pop().n;
assert(idx >= 0);
needN(stack, (size_t) idx + 1);
Value v = item(idx);
/* Move 0..idx-1 to 1..idx */
memmove(&item(idx), &item(idx - 1), idx * sizeof(*stack->item));
item(0) = v;
return NULL;
}
static Block *f_drop(Stack *stack)
{
need1int(stack);
int n = pop().n;
assert(n >= 0);
needN(stack, (size_t) n);
stack->size -= n;
return NULL;
}
static Block *f_eq(Stack *stack)
{
need2block(stack);
Block *on_false = pop().block;
Block *on_true = pop().block;
need2(stack);
Value b = pop();
Value a = item(0);
if (a.tag == V_INT && b.tag == V_INT)
return a.n == b.n ? on_true : on_false;
/*
* If one value is the integer 0 and the other value is anything else,
* return false. In this case, 0 acts like NULL.
*
* Assuming a memory layout where sizeof(void*) == sizeof(int),
* this type of equality test would still be possible
* if type information were erased.
*/
if ((a.tag == V_INT && a.n == 0) ||
(b.tag == V_INT && b.n == 0))
return on_false;
fatal("Cannot use `=' on %s and %s",
valueTag_show(a.tag), valueTag_show(b.tag));
}
static Block *f_lt(Stack *stack)
{
need2block(stack);
Block *on_false = pop().block;
Block *on_true = pop().block;
need2int(stack);
int b = pop().n;
int a = item(0).n;
return a < b ? on_true : on_false;
}
static Block *f_gt(Stack *stack)
{
need2block(stack);
Block *on_false = pop().block;
Block *on_true = pop().block;
need2int(stack);
int b = pop().n;
int a = item(0).n;
return a > b ? on_true : on_false;
}
static Block *f_range(Stack *stack)
{
need2block(stack);
Block *on_false = pop().block;
Block *on_true = pop().block;
need3int(stack);
int hi = pop().n;
int lo = pop().n;
int a = item(0).n;
return a >= lo && a <= hi ? on_true : on_false;
}
static Block *f_putc(Stack *stack)
{
need1int(stack);
int c = pop().n;
putchar(c);
return NULL;
}
static Block *f_getc(Stack *stack)
{
Value v;
v.tag = V_INT;
v.n = getchar();
push(v);
return NULL;
}
static Block *f_ungetc(Stack *stack)
{
need1int(stack);
int c = pop().n;
ungetc(c, stdin);
return NULL;
}
#define intop(expr) do { \
need2int(stack); \
int b = pop().n; \
int a = item(0).n; \
item(0).n = (expr); \
} while (0)
static Block *f_add(Stack *stack)
{
intop(a + b);
return NULL;
}
static Block *f_sub(Stack *stack)
{
intop(a - b);
return NULL;
}
static Block *f_mul(Stack *stack)
{
intop(a * b);
return NULL;
}
static Block *f_div(Stack *stack)
{
intop(div_(a, b));
return NULL;
}
static Block *f_mod(Stack *stack)
{
intop(mod_(a, b));
return NULL;
}
#undef intop
int main(int argc, char *argv[])
{
Block *code;
if (argc == 1) {
code = compile(stdin);
} else if (argc == 2) {
FILE *f = fopen(argv[1], "rb");
if (f == NULL) {
perror(argv[1]);
return EXIT_FAILURE;
}
code = compile(f);
if (fclose(f))
perror(argv[1]);
} else {
fprintf(stderr, "Usage: %s [script.ci]", argv[0]);
return EXIT_FAILURE;
}
Stack stack = darray_new();
runBlock(code, &stack);
return EXIT_SUCCESS;
}
# Sample program: Fibonacci sequence generator
# printNumber(n)
(
1p0(0 1p-'-.)()< 0(2d'0.10.)((1p0(2d)(0c10/2p$10%'0+.)=)$10.1d)=
)
^ # lift printNumber so we can bind it to fib
# fib(a, b, count) use (printNumber)
(
# stack (oldest entries first): a b count fib printNumber
2p0(
# a b fib printNumber count
4c
# a b fib printNumber count a
2p
# a b fib count a printNumber
$
# a b fib count
2p 0c
# a fib count b b
4p+
# fib count b (b + a)
2p1-
# fib b (b + a) (count - 1)
3p$
)(5d)>
)
& # bind printNumber to fib
# fib(0, 1, 48);
0 1 48 3p$
,
(1p
0(2d())(
41(2d())(
'#((1p
0()(10()(1d,1p$)=)<
)$2d,1p$)(
40(,2c$^)(
'$(($))(
'^((^))(
'&((&))(
'c((c))(
'p((p))(
'd((d))(
'=((=))(
'<((<))(
'>((>))(
'~((~))(
'.((.))(
',((,))(
'!((!))(
''(,^)(
'0'9(0c'0-(
,'0'9('0-2p10*+1p$)(!1d)~
)$^)(
'+((+))(
'-((-))(
'*((*))(
'/((/))(
'%((%))(
() # default
)=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=
1p1d,2p$&
)=)=)<
)
$$
// Pseudocode for interpreter.ci
compile(c)
{
if (c < 0 || c == ')') {
return ();
} else if (c == '#') {
loop(c) {
if (c < 0 || c == '\n')
return;
else
return loop(getchar());
}
loop(c);
return compile(getchar());
} else {
switch (c) {
// Blocks
case '(':
term = lift(compile(getchar()));
break;
case '$':
term = ($);
break;
case '^':
term = (^);
break;
case '&':
term = (&);
break;
// Stack manipulation
case 'c':
term = (c);
break;
case 'p':
term = (p);
break;
case 'd':
term = (d);
break;
// Relational operators
case '=':
term = (=);
break;
case '<':
term = (<);
break;
case '>':
term = (>);
break;
case '~':
term = (~);
break;
// I/O
case '.':
term = (.);
break;
case ',':
term = (,);
break;
case '!':
term = (!);
break;
case '\'':
term = lift(getchar());
break;
case '0'..'9':
parseInt(n) {
c = getchar();
if ('0' <= c <= '9') {
return parseInt(n*10 + (c-'0'));
} else {
ungetchar(c);
return n;
}
}
term = lift(parseInt(c - '0'));
break;
case '+':
term = (+);
break;
case '-':
term = (-);
break;
case '*':
term = (*);
break;
case '/':
term = (/);
break;
case '%':
term = (%);
break;
default:
term = ();
}
return term & compile(getchar());
}
}
compiled = compile(getchar());
compiled();
## Blocks
( ... )
Create a "block", which is effectively a list of instructions
with no context. Internally, it could even be machine code.
block $
Call a block. The callee is handed the global stack, which
includes the block being called.
value ^
Lift a value. Namely, turn it into a block that pushes that value.
Example:
1 ^
== (1)
&
Join two blocks into one.
Example:
(1) (2) &
== (1 2)
## Stack manipulation
n c
Copy the nth value of the stack.
Example:
5 4 3 2 1 0 3p
== 5 4 3 2 1 0 3
n p
Pluck the nth value of the stack (remove it, and bring it to front).
Example:
5 4 3 2 1 0 3p
== 5 4 2 1 0 3
n d
Drop n values from the stack. 0d is a no-op.
Example:
5 4 3 2 1 0 3d
== 5 4 3
## Relational operators
a b (on_true) (on_false) =
Test if a equals b. Consume all but the first argument, and call
on_true or on_false. If one argument is zero and the other is any
other type, the result will be false. Otherwise, a and b must be integers.
Example:
3 3 ('0+ .) (1d) =
== 3 '0+ .
a b (on_true) (on_false) <
Test if a is less than b. a and b must be integers.
Example:
3 5 (1d 5) () <
== 3 1d 5
== 5
a b (on_true) (on_false) >
Test if a is greater than b. a and b must be integers.
Example:
3 5 (1d 5) () >
== 3
a lo hi (on_true) (on_false) ~
Test if lo <= a <= hi. a, lo, and hi must be integers.
Example:
3 0 10 ('0+ .) (1d) ~
== 3 '0+ .
## I/O
c .
Put the character c (consuming it from the stack).
,
Get a character and push it to the stack. If the end of file
has been reached, -1 is pushed.
c !
Unget a character. Just like ungetc in C, only one pushback is allowed.
## Integer literals
'c
Push the character c.
/[0-9]+/
Push a decimal integer.
## Arithmetic
a b +
a b -
a b *
Add/subtract/multiply two numbers.
Example:
3 5 + 7 3 + *
== 80
a b /
a b %
Division and modulus. Unlike in C, these round toward negative infinity.
## Miscellaneous
code # comment
The # character comments out everything to the end of the line.
)
Used to end blocks. Can be used to end the whole program, too.
All other characters are ignored.
# This demonstrates that there is no interpreter overhead after the first generation.
#
# The copies of the interpreter below are slightly modified
# to prove that they're all actually being used. Each one redefines
# the . operator to print a number before each newline, hence "signing" it.
#
# Also, note that the ) is needed after each interpreter to indicate the end of
# one and the beginning of the rest.
,(1p0(2d())(41(2d())('#((1p0()(10()(1d,1p$)=)<)$2d,1p$)(40(,2c$^)('$(($))('^((^)
)('&((&))('c((c))('p((p))('d((d))('=((=))('<((<))('>((>))('~((~))('.((10(32.'(.
'1.').)()=.))(',((,)
)('!((!))(''(,^)('0'9(0c'0-(,'0'9('0-2p10*+1p$)(!1d)~)$^)('+((+))('-((-))('*((*)
)('/((/))('%((%))(())=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=1p1d,2p$&)=)=)<)$$
)
,(1p0(2d())(41(2d())('#((1p0()(10()(1d,1p$)=)<)$2d,1p$)(40(,2c$^)('$(($))('^((^)
)('&((&))('c((c))('p((p))('d((d))('=((=))('<((<))('>((>))('~((~))('.((10(32.'(.
'2.').)()=.))(',((,)
)('!((!))(''(,^)('0'9(0c'0-(,'0'9('0-2p10*+1p$)(!1d)~)$^)('+((+))('-((-))('*((*)
)('/((/))('%((%))(())=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=1p1d,2p$&)=)=)<)$$
)
,(1p0(2d())(41(2d())('#((1p0()(10()(1d,1p$)=)<)$2d,1p$)(40(,2c$^)('$(($))('^((^)
)('&((&))('c((c))('p((p))('d((d))('=((=))('<((<))('>((>))('~((~))('.((10(32.'(.
'3.').)()=.))(',((,)
)('!((!))(''(,^)('0'9(0c'0-(,'0'9('0-2p10*+1p$)(!1d)~)$^)('+((+))('-((-))('*((*)
)('/((/))('%((%))(())=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=1p1d,2p$&)=)=)<)$$
)
,(1p0(2d())(41(2d())('#((1p0()(10()(1d,1p$)=)<)$2d,1p$)(40(,2c$^)('$(($))('^((^)
)('&((&))('c((c))('p((p))('d((d))('=((=))('<((<))('>((>))('~((~))('.((10(32.'(.
'4.').)()=.))(',((,)
)('!((!))(''(,^)('0'9(0c'0-(,'0'9('0-2p10*+1p$)(!1d)~)$^)('+((+))('-((-))('*((*)
)('/((/))('%((%))(())=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=1p1d,2p$&)=)=)<)$$
)
,(1p0(2d())(41(2d())('#((1p0()(10()(1d,1p$)=)<)$2d,1p$)(40(,2c$^)('$(($))('^((^)
)('&((&))('c((c))('p((p))('d((d))('=((=))('<((<))('>((>))('~((~))('.((10(32.'(.
'5.').)()=.))(',((,)
)('!((!))(''(,^)('0'9(0c'0-(,'0'9('0-2p10*+1p$)(!1d)~)$^)('+((+))('-((-))('*((*)
)('/((/))('%((%))(())=)=)=)=)=)~)=)=)=)=)=)=)=)=)=)=)=)=)=)=)=1p1d,2p$&)=)=)<)$$
)
(1p0(0 1p-'-.)()< 0(2d'0.10.)((1p0(2d)(0c10/2p$10%'0+.)=)$10.1d)=)^
(2p0(4c2p$2p0c4p+2p1-3p$)(5d)>)&
0 1 48 3p$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment