Created
December 17, 2017 21:06
-
-
Save seanjensengrey/3962c6b5d90f1dfb0d6776662faabb9c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2006 Edmund GRIMLEY EVANS <edmundo@rano.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
/* | |
* A self-compiling compiler for a small subset of C. | |
*/ | |
/* Our library functions. */ | |
void exit(int); | |
int getchar(void); | |
void *malloc(int); | |
int putchar(int); | |
/* The first thing defined must be main(). */ | |
int main1(); | |
int main() | |
{ | |
return main1(); | |
} | |
char *my_realloc(char *old, int oldlen, int newlen) | |
{ | |
char *new = malloc(newlen); | |
int i = 0; | |
while (i <= oldlen - 1) { | |
new[i] = old[i]; | |
i = i + 1; | |
} | |
return new; | |
} | |
int nextc; | |
char *token; | |
int token_size; | |
void error() | |
{ | |
exit(1); | |
} | |
int i; | |
void takechar() | |
{ | |
if (token_size <= i + 1) { | |
int x = (i + 10) << 1; | |
token = my_realloc(token, token_size, x); | |
token_size = x; | |
} | |
token[i] = nextc; | |
i = i + 1; | |
nextc = getchar(); | |
} | |
void get_token() | |
{ | |
int w = 1; | |
while (w) { | |
w = 0; | |
while ((nextc == ' ') | (nextc == 9) | (nextc == 10)) | |
nextc = getchar(); | |
i = 0; | |
while ((('a' <= nextc) & (nextc <= 'z')) | | |
(('0' <= nextc) & (nextc <= '9')) | (nextc == '_')) | |
takechar(); | |
if (i == 0) | |
while ((nextc == '<') | (nextc == '=') | (nextc == '>') | | |
(nextc == '|') | (nextc == '&') | (nextc == '!')) | |
takechar(); | |
if (i == 0) { | |
if (nextc == 39) { | |
takechar(); | |
while (nextc != 39) | |
takechar(); | |
takechar(); | |
} | |
else if (nextc == '"') { | |
takechar(); | |
while (nextc != '"') | |
takechar(); | |
takechar(); | |
} | |
else if (nextc == '/') { | |
takechar(); | |
if (nextc == '*') { | |
nextc = getchar(); | |
while (nextc != '/') { | |
while (nextc != '*') | |
nextc = getchar(); | |
nextc = getchar(); | |
} | |
nextc = getchar(); | |
w = 1; | |
} | |
} | |
else if (nextc != 0-1) | |
takechar(); | |
} | |
token[i] = 0; | |
} | |
} | |
int peek(char *s) | |
{ | |
int i = 0; | |
while ((s[i] == token[i]) & (s[i] != 0)) | |
i = i + 1; | |
return s[i] == token[i]; | |
} | |
int accept(char *s) | |
{ | |
if (peek(s)) { | |
get_token(); | |
return 1; | |
} | |
else | |
return 0; | |
} | |
void expect(char *s) | |
{ | |
if (accept(s) == 0) | |
error(); | |
} | |
char *code; | |
int code_size; | |
int codepos; | |
int code_offset; | |
void save_int(char *p, int n) | |
{ | |
p[0] = n; | |
p[1] = n >> 8; | |
p[2] = n >> 16; | |
p[3] = n >> 24; | |
} | |
int load_int(char *p) | |
{ | |
return ((p[0] & 255) + ((p[1] & 255) << 8) + | |
((p[2] & 255) << 16) + ((p[3] & 255) << 24)); | |
} | |
void emit(int n, char *s) | |
{ | |
i = 0; | |
if (code_size <= codepos + n) { | |
int x = (codepos + n) << 1; | |
code = my_realloc(code, code_size, x); | |
code_size = x; | |
} | |
while (i <= n - 1) { | |
code[codepos] = s[i]; | |
codepos = codepos + 1; | |
i = i + 1; | |
} | |
} | |
void be_push() | |
{ | |
emit(1, "\x50"); /* push %eax */ | |
} | |
void be_pop(int n) | |
{ | |
emit(6, "\x81\xc4...."); /* add $(n * 4),%esp */ | |
save_int(code + codepos - 4, n << 2); | |
} | |
char *table; | |
int table_size; | |
int table_pos; | |
int stack_pos; | |
int sym_lookup(char *s) | |
{ | |
int t = 0; | |
int current_symbol = 0; | |
while (t <= table_pos - 1) { | |
i = 0; | |
while ((s[i] == table[t]) & (s[i] != 0)) { | |
i = i + 1; | |
t = t + 1; | |
} | |
if (s[i] == table[t]) | |
current_symbol = t; | |
while (table[t] != 0) | |
t = t + 1; | |
t = t + 6; | |
} | |
return current_symbol; | |
} | |
void sym_declare(char *s, int type, int value) | |
{ | |
int t = table_pos; | |
i = 0; | |
while (s[i] != 0) { | |
if (table_size <= t + 10) { | |
int x = (t + 10) << 1; | |
table = my_realloc(table, table_size, x); | |
table_size = x; | |
} | |
table[t] = s[i]; | |
i = i + 1; | |
t = t + 1; | |
} | |
table[t] = 0; | |
table[t + 1] = type; | |
save_int(table + t + 2, value); | |
table_pos = t + 6; | |
} | |
int sym_declare_global(char *s) | |
{ | |
int current_symbol = sym_lookup(s); | |
if (current_symbol == 0) { | |
sym_declare(s, 'U', code_offset); | |
current_symbol = table_pos - 6; | |
} | |
return current_symbol; | |
} | |
void sym_define_global(int current_symbol) | |
{ | |
int i; | |
int j; | |
int t = current_symbol; | |
int v = codepos + code_offset; | |
if (table[t + 1] != 'U') | |
error(); /* symbol redefined */ | |
i = load_int(table + t + 2) - code_offset; | |
while (i) { | |
j = load_int(code + i) - code_offset; | |
save_int(code + i, v); | |
i = j; | |
} | |
table[t + 1] = 'D'; | |
save_int(table + t + 2, v); | |
} | |
int number_of_args; | |
void sym_get_value(char *s) | |
{ | |
int t; | |
if ((t = sym_lookup(s)) == 0) | |
error(); | |
emit(5, "\xb8...."); /* mov $n,%eax */ | |
save_int(code + codepos - 4, load_int(table + t + 2)); | |
if (table[t + 1] == 'D') { /* defined global */ | |
} | |
else if (table[t + 1] == 'U') /* undefined global */ | |
save_int(table + t + 2, codepos + code_offset - 4); | |
else if (table[t + 1] == 'L') { /* local variable */ | |
int k = (stack_pos - table[t + 2] - 1) << 2; | |
emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */ | |
save_int(code + codepos - 4, k); | |
} | |
else if (table[t + 1] == 'A') { /* argument */ | |
int k = (stack_pos + number_of_args - table[t + 2] + 1) << 2; | |
emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */ | |
save_int(code + codepos - 4, k); | |
} | |
else | |
error(); | |
} | |
void be_start() | |
{ | |
emit(16, "\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00"); | |
emit(16, "\x02\x00\x03\x00\x01\x00\x00\x00\x54\x80\x04\x08\x34\x00\x00\x00"); | |
emit(16, "\x00\x00\x00\x00\x00\x00\x00\x00\x34\x00\x20\x00\x01\x00\x00\x00"); | |
emit(16, "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x80\x04\x08"); | |
emit(16, "\x00\x80\x04\x08\x10\x4b\x00\x00\x10\x4b\x00\x00\x07\x00\x00\x00"); | |
emit(16, "\x00\x10\x00\x00\xe8\x00\x00\x00\x00\x89\xc3\x31\xc0\x40\xcd\x80"); | |
sym_define_global(sym_declare_global("exit")); | |
/* pop %ebx ; pop %ebx ; xor %eax,%eax ; inc %eax ; int $0x80 */ | |
emit(7, "\x5b\x5b\x31\xc0\x40\xcd\x80"); | |
sym_define_global(sym_declare_global("getchar")); | |
/* mov $3,%eax ; xor %ebx,%ebx ; push %ebx ; mov %esp,%ecx */ | |
emit(10, "\xb8\x03\x00\x00\x00\x31\xdb\x53\x89\xe1"); | |
/* xor %edx,%edx ; inc %edx ; int $0x80 */ | |
/* test %eax,%eax ; pop %eax ; jne . + 7 */ | |
emit(10, "\x31\xd2\x42\xcd\x80\x85\xc0\x58\x75\x05"); | |
/* mov $-1,%eax ; ret */ | |
emit(6, "\xb8\xff\xff\xff\xff\xc3"); | |
sym_define_global(sym_declare_global("malloc")); | |
/* mov 4(%esp),%eax */ | |
emit(4, "\x8b\x44\x24\x04"); | |
/* push %eax ; xor %ebx,%ebx ; mov $45,%eax ; int $0x80 */ | |
emit(10, "\x50\x31\xdb\xb8\x2d\x00\x00\x00\xcd\x80"); | |
/* pop %ebx ; add %eax,%ebx ; push %eax ; push %ebx ; mov $45,%eax */ | |
emit(10, "\x5b\x01\xc3\x50\x53\xb8\x2d\x00\x00\x00"); | |
/* int $0x80 ; pop %ebx ; cmp %eax,%ebx ; pop %eax ; je . + 7 */ | |
emit(8, "\xcd\x80\x5b\x39\xc3\x58\x74\x05"); | |
/* mov $-1,%eax ; ret */ | |
emit(6, "\xb8\xff\xff\xff\xff\xc3"); | |
sym_define_global(sym_declare_global("putchar")); | |
/* mov $4,%eax ; xor %ebx,%ebx ; inc %ebx */ | |
emit(8, "\xb8\x04\x00\x00\x00\x31\xdb\x43"); | |
/* lea 4(%esp),%ecx ; mov %ebx,%edx ; int $0x80 ; ret */ | |
emit(9, "\x8d\x4c\x24\x04\x89\xda\xcd\x80\xc3"); | |
save_int(code + 85, codepos - 89); /* entry set to first thing in file */ | |
} | |
void be_finish() | |
{ | |
save_int(code + 68, codepos); | |
save_int(code + 72, codepos); | |
i = 0; | |
while (i <= codepos - 1) { | |
putchar(code[i]); | |
i = i + 1; | |
} | |
} | |
void promote(int type) | |
{ | |
/* 1 = char lval, 2 = int lval, 3 = other */ | |
if (type == 1) | |
emit(3, "\x0f\xbe\x00"); /* movsbl (%eax),%eax */ | |
else if (type == 2) | |
emit(2, "\x8b\x00"); /* mov (%eax),%eax */ | |
} | |
int expression(); | |
/* | |
* primary-expr: | |
* identifier | |
* constant | |
* ( expression ) | |
*/ | |
int primary_expr() | |
{ | |
int type; | |
if (('0' <= token[0]) & (token[0] <= '9')) { | |
int n = 0; | |
i = 0; | |
while (token[i]) { | |
n = (n << 1) + (n << 3) + token[i] - '0'; | |
i = i + 1; | |
} | |
emit(5, "\xb8...."); /* mov $x,%eax */ | |
save_int(code + codepos - 4, n); | |
type = 3; | |
} | |
else if (('a' <= token[0]) & (token[0] <= 'z')) { | |
sym_get_value(token); | |
type = 2; | |
} | |
else if (accept("(")) { | |
type = expression(); | |
if (peek(")") == 0) | |
error(); | |
} | |
else if ((token[0] == 39) & (token[1] != 0) & | |
(token[2] == 39) & (token[3] == 0)) { | |
emit(5, "\xb8...."); /* mov $x,%eax */ | |
save_int(code + codepos - 4, token[1]); | |
type = 3; | |
} | |
else if (token[0] == '"') { | |
int i = 0; | |
int j = 1; | |
int k; | |
while (token[j] != '"') { | |
if ((token[j] == 92) & (token[j + 1] == 'x')) { | |
if (token[j + 2] <= '9') | |
k = token[j + 2] - '0'; | |
else | |
k = token[j + 2] - 'a' + 10; | |
k = k << 4; | |
if (token[j + 3] <= '9') | |
k = k + token[j + 3] - '0'; | |
else | |
k = k + token[j + 3] - 'a' + 10; | |
token[i] = k; | |
j = j + 4; | |
} | |
else { | |
token[i] = token[j]; | |
j = j + 1; | |
} | |
i = i + 1; | |
} | |
token[i] = 0; | |
/* call ... ; the string ; pop %eax */ | |
emit(5, "\xe8...."); | |
save_int(code + codepos - 4, i + 1); | |
emit(i + 1, token); | |
emit(1, "\x58"); | |
type = 3; | |
} | |
else | |
error(); | |
get_token(); | |
return type; | |
} | |
void binary1(int type) | |
{ | |
promote(type); | |
be_push(); | |
stack_pos = stack_pos + 1; | |
} | |
int binary2(int type, int n, char *s) | |
{ | |
promote(type); | |
emit(n, s); | |
stack_pos = stack_pos - 1; | |
return 3; | |
} | |
/* | |
* postfix-expr: | |
* primary-expr | |
* postfix-expr [ expression ] | |
* postfix-expr ( expression-list-opt ) | |
*/ | |
int postfix_expr() | |
{ | |
int type = primary_expr(); | |
if (accept("[")) { | |
binary1(type); /* pop %ebx ; add %ebx,%eax */ | |
binary2(expression(), 3, "\x5b\x01\xd8"); | |
expect("]"); | |
type = 1; | |
} | |
else if (accept("(")) { | |
int s = stack_pos; | |
be_push(); | |
stack_pos = stack_pos + 1; | |
if (accept(")") == 0) { | |
promote(expression()); | |
be_push(); | |
stack_pos = stack_pos + 1; | |
while (accept(",")) { | |
promote(expression()); | |
be_push(); | |
stack_pos = stack_pos + 1; | |
} | |
expect(")"); | |
} | |
emit(7, "\x8b\x84\x24...."); /* mov (n * 4)(%esp),%eax */ | |
save_int(code + codepos - 4, (stack_pos - s - 1) << 2); | |
emit(2, "\xff\xd0"); /* call *%eax */ | |
be_pop(stack_pos - s); | |
stack_pos = s; | |
type = 3; | |
} | |
return type; | |
} | |
/* | |
* additive-expr: | |
* postfix-expr | |
* additive-expr + postfix-expr | |
* additive-expr - postfix-expr | |
*/ | |
int additive_expr() | |
{ | |
int type = postfix_expr(); | |
while (1) { | |
if (accept("+")) { | |
binary1(type); /* pop %ebx ; add %ebx,%eax */ | |
type = binary2(postfix_expr(), 3, "\x5b\x01\xd8"); | |
} | |
else if (accept("-")) { | |
binary1(type); /* pop %ebx ; sub %eax,%ebx ; mov %ebx,%eax */ | |
type = binary2(postfix_expr(), 5, "\x5b\x29\xc3\x89\xd8"); | |
} | |
else | |
return type; | |
} | |
} | |
/* | |
* shift-expr: | |
* additive-expr | |
* shift-expr << additive-expr | |
* shift-expr >> additive-expr | |
*/ | |
int shift_expr() | |
{ | |
int type = additive_expr(); | |
while (1) { | |
if (accept("<<")) { | |
binary1(type); /* mov %eax,%ecx ; pop %eax ; shl %cl,%eax */ | |
type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xe0"); | |
} | |
else if (accept(">>")) { | |
binary1(type); /* mov %eax,%ecx ; pop %eax ; sar %cl,%eax */ | |
type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xf8"); | |
} | |
else | |
return type; | |
} | |
} | |
/* | |
* relational-expr: | |
* shift-expr | |
* relational-expr <= shift-expr | |
*/ | |
int relational_expr() | |
{ | |
int type = shift_expr(); | |
while (accept("<=")) { | |
binary1(type); | |
/* pop %ebx ; cmp %eax,%ebx ; setle %al ; movzbl %al,%eax */ | |
type = binary2(shift_expr(), | |
9, "\x5b\x39\xc3\x0f\x9e\xc0\x0f\xb6\xc0"); | |
} | |
return type; | |
} | |
/* | |
* equality-expr: | |
* relational-expr | |
* equality-expr == relational-expr | |
* equality-expr != relational-expr | |
*/ | |
int equality_expr() | |
{ | |
int type = relational_expr(); | |
while (1) { | |
if (accept("==")) { | |
binary1(type); | |
/* pop %ebx ; cmp %eax,%ebx ; sete %al ; movzbl %al,%eax */ | |
type = binary2(relational_expr(), | |
9, "\x5b\x39\xc3\x0f\x94\xc0\x0f\xb6\xc0"); | |
} | |
else if (accept("!=")) { | |
binary1(type); | |
/* pop %ebx ; cmp %eax,%ebx ; setne %al ; movzbl %al,%eax */ | |
type = binary2(relational_expr(), | |
9, "\x5b\x39\xc3\x0f\x95\xc0\x0f\xb6\xc0"); | |
} | |
else | |
return type; | |
} | |
} | |
/* | |
* bitwise-and-expr: | |
* equality-expr | |
* bitwise-and-expr & equality-expr | |
*/ | |
int bitwise_and_expr() | |
{ | |
int type = equality_expr(); | |
while (accept("&")) { | |
binary1(type); /* pop %ebx ; and %ebx,%eax */ | |
type = binary2(equality_expr(), 3, "\x5b\x21\xd8"); | |
} | |
return type; | |
} | |
/* | |
* bitwise-or-expr: | |
* bitwise-and-expr | |
* bitwise-and-expr | bitwise-or-expr | |
*/ | |
int bitwise_or_expr() | |
{ | |
int type = bitwise_and_expr(); | |
while (accept("|")) { | |
binary1(type); /* pop %ebx ; or %ebx,%eax */ | |
type = binary2(bitwise_and_expr(), 3, "\x5b\x09\xd8"); | |
} | |
return type; | |
} | |
/* | |
* expression: | |
* bitwise-or-expr | |
* bitwise-or-expr = expression | |
*/ | |
int expression() | |
{ | |
int type = bitwise_or_expr(); | |
if (accept("=")) { | |
be_push(); | |
stack_pos = stack_pos + 1; | |
promote(expression()); | |
if (type == 2) | |
emit(3, "\x5b\x89\x03"); /* pop %ebx ; mov %eax,(%ebx) */ | |
else | |
emit(3, "\x5b\x88\x03"); /* pop %ebx ; mov %al,(%ebx) */ | |
stack_pos = stack_pos - 1; | |
type = 3; | |
} | |
return type; | |
} | |
/* | |
* type-name: | |
* char * | |
* int | |
*/ | |
void type_name() | |
{ | |
get_token(); | |
while (accept("*")) { | |
} | |
} | |
/* | |
* statement: | |
* { statement-list-opt } | |
* type-name identifier ; | |
* type-name identifier = expression; | |
* if ( expression ) statement | |
* if ( expression ) statement else statement | |
* while ( expression ) statement | |
* return ; | |
* expr ; | |
*/ | |
void statement() | |
{ | |
int p1; | |
int p2; | |
if (accept("{")) { | |
int n = table_pos; | |
int s = stack_pos; | |
while (accept("}") == 0) | |
statement(); | |
table_pos = n; | |
be_pop(stack_pos - s); | |
stack_pos = s; | |
} | |
else if (peek("char") | peek("int")) { | |
type_name(); | |
sym_declare(token, 'L', stack_pos); | |
get_token(); | |
if (accept("=")) | |
promote(expression()); | |
expect(";"); | |
be_push(); | |
stack_pos = stack_pos + 1; | |
} | |
else if (accept("if")) { | |
expect("("); | |
promote(expression()); | |
emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */ | |
p1 = codepos; | |
expect(")"); | |
statement(); | |
emit(5, "\xe9...."); /* jmp ... */ | |
p2 = codepos; | |
save_int(code + p1 - 4, codepos - p1); | |
if (accept("else")) | |
statement(); | |
save_int(code + p2 - 4, codepos - p2); | |
} | |
else if (accept("while")) { | |
expect("("); | |
p1 = codepos; | |
promote(expression()); | |
emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */ | |
p2 = codepos; | |
expect(")"); | |
statement(); | |
emit(5, "\xe9...."); /* jmp ... */ | |
save_int(code + codepos - 4, p1 - codepos); | |
save_int(code + p2 - 4, codepos - p2); | |
} | |
else if (accept("return")) { | |
if (peek(";") == 0) | |
promote(expression()); | |
expect(";"); | |
be_pop(stack_pos); | |
emit(1, "\xc3"); /* ret */ | |
} | |
else { | |
expression(); | |
expect(";"); | |
} | |
} | |
/* | |
* program: | |
* declaration | |
* declaration program | |
* | |
* declaration: | |
* type-name identifier ; | |
* type-name identifier ( parameter-list ) ; | |
* type-name identifier ( parameter-list ) statement | |
* | |
* parameter-list: | |
* parameter-declaration | |
* parameter-list, parameter-declaration | |
* | |
* parameter-declaration: | |
* type-name identifier-opt | |
*/ | |
void program() | |
{ | |
int current_symbol; | |
while (token[0]) { | |
type_name(); | |
current_symbol = sym_declare_global(token); | |
get_token(); | |
if (accept(";")) { | |
sym_define_global(current_symbol); | |
emit(4, "\x00\x00\x00\x00"); | |
} | |
else if (accept("(")) { | |
int n = table_pos; | |
number_of_args = 0; | |
while (accept(")") == 0) { | |
number_of_args = number_of_args + 1; | |
type_name(); | |
if (peek(")") == 0) { | |
sym_declare(token, 'A', number_of_args); | |
get_token(); | |
} | |
accept(","); /* ignore trailing comma */ | |
} | |
if (accept(";") == 0) { | |
sym_define_global(current_symbol); | |
statement(); | |
emit(1, "\xc3"); /* ret */ | |
} | |
table_pos = n; | |
} | |
else | |
error(); | |
} | |
} | |
int main1() | |
{ | |
code_offset = 134512640; /* 0x08048000 */ | |
be_start(); | |
nextc = getchar(); | |
get_token(); | |
program(); | |
be_finish(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment