Skip to content

Instantly share code, notes, and snippets.

@MrSmith33
Created November 3, 2014 13:56
Show Gist options
  • Save MrSmith33/7483b8decaa8bafed7a5 to your computer and use it in GitHub Desktop.
Save MrSmith33/7483b8decaa8bafed7a5 to your computer and use it in GitHub Desktop.
Tiny C
/* file: "tinyc.d" */
/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
// D port by Mr.Smith (C) 2014
import std.stdio;
import std.algorithm : equal;
/*
* This is a compiler for the Tiny-C language. Tiny-C is a
* considerably stripped down version of C and it is meant as a
* pedagogical tool for learning about compilers. The integer global
* variables "a" to "z" are predefined and initialized to zero, and it
* is not possible to declare new variables. The compiler reads the
* program from standard input and prints out the value of the
* variables that are not zero. The grammar of Tiny-C in EBNF is:
*
* <program> ::= <statement>
* <statement> ::= "if" <paren_expr> <statement> |
* "if" <paren_expr> <statement> "else" <statement> |
* "while" <paren_expr> <statement> |
* "do" <statement> "while" <paren_expr> ";" |
* "{" { <statement> } "}" |
* <expr> ";" |
* ";"
* <paren_expr> ::= "(" <expr> ")"
* <expr> ::= <test> | <id> "=" <expr>
* <test> ::= <sum> | <sum> "<" <sum>
* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
* <term> ::= <id> | <int> | <paren_expr>
* <id> ::= "a" | "b" | "c" | "d" | ... | "z"
* <int> ::= <an_unsigned_decimal_integer>
*
* Here are a few invocations of the compiler:
*
* % echo "a=b=c=2<3;" | ./a.out
* a = 1
* b = 1
* c = 1
* % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
* i = 128
* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
* i = 25
* j = 25
* % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
* i = 51
* % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
* i = 51
* % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" | ./a.out
* i = 7
* y = 2
*
* The compiler does a minimal amount of error checking to help
* highlight the structure of the compiler.
*/
/*---------------------------------------------------------------------------*/
/* Lexer. */
enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
string[] words = [ "do", "else", "if", "while", null ];
char ch = ' ';
int sym;
int int_val;
char[100] id_name;
void syntax_error() { assert(false, "syntax error"); }
void next_ch() { ch = cast(char)getchar(); }
void next_sym()
{
while(ch == ' ' || ch == '\n') next_ch();
switch (ch)
{
case 255: sym = EOI; break; //EOF
case '{': next_ch(); sym = LBRA; break;
case '}': next_ch(); sym = RBRA; break;
case '(': next_ch(); sym = LPAR; break;
case ')': next_ch(); sym = RPAR; break;
case '+': next_ch(); sym = PLUS; break;
case '-': next_ch(); sym = MINUS; break;
case '<': next_ch(); sym = LESS; break;
case ';': next_ch(); sym = SEMI; break;
case '=': next_ch(); sym = EQUAL; break;
default:
{
if (ch >= '0' && ch <= '9')
{
int_val = 0; /* missing overflow check */
while (ch >= '0' && ch <= '9')
{
int_val = int_val*10 + (ch - '0');
next_ch();
}
sym = INT;
}
else if (ch >= 'a' && ch <= 'z')
{
size_t i = 0; /* missing overflow check */
while ((ch >= 'a' && ch <= 'z') || ch == '_')
{
id_name[i++] = ch;
next_ch();
}
id_name[i] = '\0';
sym = 0;
while (words[sym] !is null && !equal(words[sym], id_name[0..i]))
{
sym = sym + 1;
}
if (words[sym] is null)
{
if (id_name[1] == '\0')
sym = ID;
else
syntax_error();
}
}
else
syntax_error();
}
}
}
/*---------------------------------------------------------------------------*/
/* Parser. */
enum { VAR, CST, ADD, SUB, LT, SET,
IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }
struct Node
{
int kind;
Node* o1, o2, o3;
int val;
}
Node* new_Node(int k)
{
Node* n = new Node;
n.kind = k;
return n;
}
Node* term() /* <term> ::= <id> | <int> | <paren_expr> */
{
Node* n;
if (sym == ID) { n=new_Node(VAR); n.val=id_name[0]-'a'; next_sym(); }
else if (sym == INT) { n=new_Node(CST); n.val=int_val; next_sym(); }
else n = paren_expr();
return n;
}
Node* sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
{
Node* t, n = term();
while (sym == PLUS || sym == MINUS)
{
t=n; n=new_Node(sym==PLUS?ADD:SUB); next_sym(); n.o1=t; n.o2=term();
}
return n;
}
Node* test() /* <test> ::= <sum> | <sum> "<" <sum> */
{
Node* t, n = sum();
if (sym == LESS)
{
t=n; n=new_Node(LT); next_sym(); n.o1=t; n.o2=sum();
}
return n;
}
Node* expr() /* <expr> ::= <test> | <id> "=" <expr> */
{
Node* t, n;
if (sym != ID) return test();
n = test();
if (n.kind == VAR && sym == EQUAL)
{
t=n; n=new_Node(SET); next_sym(); n.o1=t; n.o2=expr();
}
return n;
}
Node* paren_expr() /* <paren_expr> ::= "(" <expr> ")" */
{
Node* n;
if (sym == LPAR) next_sym(); else syntax_error();
n = expr();
if (sym == RPAR) next_sym(); else syntax_error();
return n;
}
Node* statement()
{
Node* n;
if (sym == IF_SYM) /* "if" <paren_expr> <statement> */
{
n = new_Node(IF1);
next_sym();
n.o1 = paren_expr();
n.o2 = statement();
if (sym == ELSE_SYM) /* ... "else" <statement> */
{
n.kind = IF2;
next_sym();
n.o3 = statement();
}
}
else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */
{
n = new_Node(WHILE);
next_sym();
n.o1 = paren_expr();
n.o2 = statement();
}
else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */
{
n = new_Node(DO);
next_sym();
n.o1 = statement();
if (sym == WHILE_SYM)
next_sym();
else
syntax_error();
n.o2 = paren_expr();
if (sym == SEMI)
next_sym();
else
syntax_error();
}
else if (sym == SEMI) /* ";" */
{
n = new_Node(EMPTY);
next_sym();
}
else if (sym == LBRA) /* "{" { <statement> } "}" */
{
n = new_Node(EMPTY);
next_sym();
while (sym != RBRA)
{
Node* temp = n;
n = new_Node(SEQ);
n.o1 = temp;
n.o2 = statement();
}
next_sym();
}
else /* <expr> ";" */
{
n = new_Node(EXPR);
n.o1 = expr();
if (sym == SEMI)
next_sym();
else
syntax_error();
}
return n;
}
Node* program() /* <program> ::= <statement> */
{
Node* n = new_Node(PROG);
next_sym();
n.o1 = statement();
if (sym != EOI)
syntax_error();
return n;
}
/*---------------------------------------------------------------------------*/
/* Code generator. */
enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };
alias code = byte;
code[1000] _object; // executable
code* pc;
static this()
{
pc = &_object[0];
}
void put(code c) { *pc++ = c; } /* missing overflow check */
code* hole() { return pc++; }
void fix(code* src, code* dst) { *src = cast(code)(dst - src); } /* missing overflow check */
void compile(Node* n)
{
code* p1, p2;
switch (n.kind)
{
case VAR : put(IFETCH); put(cast(code)n.val); break;
case CST : put(IPUSH); put(cast(code)n.val); break;
case ADD : compile(n.o1); compile(n.o2); put(IADD); break;
case SUB : compile(n.o1); compile(n.o2); put(ISUB); break;
case LT : compile(n.o1); compile(n.o2); put(ILT); break;
case SET : compile(n.o2); put(ISTORE); put(cast(code)n.o1.val); break;
case IF1 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break;
case IF2 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole();
fix(p1,pc); compile(n.o3); fix(p2,pc); break;
case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2);
put(JMP); fix(hole(),p1); fix(p2,pc); break;
case DO : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break;
case EMPTY: break;
case SEQ : compile(n.o1); compile(n.o2); break;
case EXPR : compile(n.o1); put(IPOP); break;
case PROG : compile(n.o1); put(HALT); break;
default: break;
}
}
/*---------------------------------------------------------------------------*/
/* Virtual machine. */
struct VM
{
int[26] globals;
void run()
{
int[1000] stack;
int* sp = stack.ptr;
code* pc = _object.ptr;
loop: while(true)
{
final switch (*pc++)
{
case IFETCH: *sp++ = globals[*pc++]; break;
case ISTORE: globals[*pc++] = sp[-1]; break;
case IPUSH : *sp++ = *pc++; break;
case IPOP : --sp; break;
case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; break;
case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; break;
case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; break;
case JMP : pc += *pc; break;
case JZ : if (*--sp == 0) pc += *pc; else pc++; break;
case JNZ : if (*--sp != 0) pc += *pc; else pc++; break;
case HALT: break loop;
}
}
}
}
/*---------------------------------------------------------------------------*/
/* Main program. */
int main()
{
compile(program());
VM vm;
vm.run();
foreach(i, v; vm.globals)
if (v != 0)
writefln("%s = %s", cast(char)('a'+i), v);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment