Created
January 15, 2015 14:30
-
-
Save thewestwind/1c956fe7573b672378bd to your computer and use it in GitHub Desktop.
Just some Brainfuck compiler (LLVM based)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "headers.h" | |
using namespace llvm; | |
using namespace std; | |
struct codegen { | |
enum insn_name { | |
ptr_increment, // > | |
ptr_decrement, // < | |
value_increment, // + | |
value_decrement, // - | |
value_write, // . | |
value_read, // , | |
loop_begin, // [ | |
loop_end, // ] | |
}; | |
typedef stack<Value*> stack_Value_type; | |
typedef stack<BasicBlock*> stack_BB_type; | |
codegen(const string& function = "brainfuck", unsigned scratch_length = 65536); | |
~codegen(); | |
void finish(); | |
void add_instruction(insn_name i); | |
private: | |
Module* M; | |
Function* F; | |
BasicBlock* BB; | |
Value* cursor_addr; | |
Value* scratch_base_addr; | |
Value* F_putchar; | |
Value* F_getchar; | |
stack_BB_type st_BB; | |
stack_Value_type st_cursor_addr; | |
stack_Value_type st_scratch_base_addr; | |
BasicBlock* loop_head; | |
stack_BB_type st_loop_head; | |
void adjust_pointer(int delta); | |
Value* load_cursor(); | |
void adjust_value(int delta); | |
Value* load_value(); | |
void store_value(Value*); | |
GetElementPtrInst* get_value_address(); | |
void write_value(); | |
void read_value(); | |
void do_loop_begin(); | |
void do_loop_end(); | |
// copying is unsupported ATM: | |
codegen(const codegen&); | |
codegen& operator = (const codegen&); | |
}; | |
extern int main() { | |
bool r = true; | |
codegen cg; | |
static const char program[] = ".+[.+]"; | |
const char* p = program; | |
bool more = true; | |
while(more) { | |
int ch = *p++; | |
codegen::insn_name *_insn = NULL; | |
codegen::insn_name insn; | |
switch(ch) { | |
case '<': | |
insn = codegen::ptr_decrement; | |
_insn = &insn; | |
break; | |
case '>': | |
insn = codegen::ptr_increment; | |
_insn = &insn; | |
break; | |
case '-': | |
insn = codegen::value_decrement; | |
_insn = &insn; | |
break; | |
case '+': | |
insn = codegen::value_increment; | |
_insn = &insn; | |
break; | |
case '[': | |
insn = codegen::loop_begin; | |
_insn = &insn; | |
break; | |
case ']': | |
insn = codegen::loop_end; | |
_insn = &insn; | |
break; | |
case '.': | |
insn = codegen::value_write; | |
_insn = &insn; | |
break; | |
case ',': | |
insn = codegen::value_read; | |
_insn = &insn; | |
break; | |
case 0: | |
more = false; | |
break; | |
default: | |
fprintf(stderr, "Bogus instruction '%c' (%d)\n", (char) ch, ch); | |
break; | |
} | |
if(more) { | |
assert(_insn != NULL); | |
cg.add_instruction(*_insn); | |
} | |
} | |
cg.finish(); | |
return r ? EXIT_SUCCESS : EXIT_FAILURE; | |
} | |
////////////////////////////////////////////////////////////////////// | |
codegen::codegen(const string& fname, unsigned len): M(NULL), F(NULL), cursor_addr(NULL), BB(NULL), F_putchar(NULL), F_getchar(NULL), loop_head(NULL) { | |
assert(len > 0); | |
M = new Module("brainfuck", getGlobalContext()); | |
Type* voidty = Type::getVoidTy(M->getContext()); | |
FunctionType* ft = FunctionType::get(voidty, false); | |
F = Function::Create(ft, GlobalValue::ExternalLinkage, fname, M); | |
BasicBlock* entry = BasicBlock::Create(M->getContext(), "entry", F); | |
unsigned sw = 32; | |
cursor_addr = new AllocaInst(IntegerType::get(M->getContext(), sw), NULL, "cursor.addr", entry); | |
new StoreInst(ConstantInt::get(M->getContext(), APInt(sw, 0)), cursor_addr, entry); | |
Value* scratch_len = ConstantInt::get(M->getContext(), APInt(32, len)); | |
scratch_base_addr = new AllocaInst(IntegerType::get(M->getContext(), 8), scratch_len, "scratch", entry); | |
// declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>, i32 <len>, i32 <align>, i1 <isvolatile>) | |
IntegerType* i8 = IntegerType::get(M->getContext(), 8); | |
PointerType* p0i8 = PointerType::get(i8, 0); | |
IntegerType* i32 = IntegerType::get(M->getContext(), 32); | |
IntegerType* i1 = IntegerType::get(M->getContext(), 1); | |
Type* argty[] = { | |
p0i8, // dest | |
i8, // val | |
i32, // len | |
i32, // align | |
i1 // volatile | |
}; | |
FunctionType* T_memset = FunctionType::get(voidty, ArrayRef<Type*>(argty, 5), false); | |
Value* _memset = M->getOrInsertFunction("llvm.memset.p0i8.i32", T_memset); | |
Value* args[] = { | |
scratch_base_addr, // dest | |
ConstantInt::get(M->getContext(), APInt(8, 0)), // val | |
scratch_len, // len | |
ConstantInt::get(M->getContext(), APInt(32, 1)), // align | |
ConstantInt::get(M->getContext(), APInt(1, 0)), // volatile | |
}; | |
CallInst::Create(_memset, ArrayRef<Value*>(args, 5), "", entry); | |
BB = entry; | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::finish() { | |
assert(M != NULL); | |
assert(BB != NULL); | |
assert(st_BB.empty()); | |
assert(st_cursor_addr.empty()); | |
assert(st_scratch_base_addr.empty()); | |
assert(st_loop_head.empty()); | |
assert(loop_head == NULL); | |
ReturnInst::Create(M->getContext(), BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::add_instruction(insn_name i) { | |
switch(i) { | |
case ptr_increment: | |
adjust_pointer(1); | |
break; | |
case ptr_decrement: | |
adjust_pointer(-1); | |
break; | |
case value_increment: | |
adjust_value(1); | |
break; | |
case value_decrement: | |
adjust_value(-1); | |
break; | |
case value_write: | |
write_value(); | |
break; | |
case value_read: | |
read_value(); | |
break; | |
case loop_begin: | |
do_loop_begin(); | |
break; | |
case loop_end: | |
do_loop_end(); | |
break; | |
default: | |
__builtin_trap(); | |
} | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::do_loop_begin() { | |
assert(M != NULL); | |
assert(cursor_addr != NULL); | |
assert(scratch_base_addr != NULL); | |
Type* argty[] = { | |
cursor_addr->getType(), | |
scratch_base_addr->getType(), | |
}; | |
FunctionType* T = FunctionType::get(Type::getVoidTy(M->getContext()), ArrayRef<Type*>(argty, 2), false); | |
Function* LF = Function::Create(T, GlobalValue::PrivateLinkage, F->getName() + ".loop", M); | |
Value* args[] = { cursor_addr, scratch_base_addr }; | |
CallInst::Create(LF, ArrayRef<Value*>(args, 2), "", BB); | |
st_cursor_addr.push(cursor_addr); | |
Function::arg_iterator arg = LF->arg_begin(); | |
arg->setName("cursor.addr"); | |
cursor_addr = arg; | |
arg++; | |
st_scratch_base_addr.push(scratch_base_addr); | |
arg->setName("scratch"); | |
scratch_base_addr = arg; | |
arg++; | |
assert(arg == LF->arg_end()); | |
st_BB.push(BB); | |
BB = BasicBlock::Create(M->getContext(), "head", LF); | |
BasicBlock* entry = BasicBlock::Create(M->getContext(), "entry", LF, BB); | |
BranchInst::Create(BB, entry); | |
Value* V = load_value(); | |
assert(V != NULL); | |
Value* flag = new ICmpInst(*BB, CmpInst::ICMP_EQ, V, ConstantInt::get(M->getContext(), APInt(8, 0)), "condition"); | |
BasicBlock* BB_body = BasicBlock::Create(M->getContext(), "body", LF); | |
BasicBlock* BB_exit = BasicBlock::Create(M->getContext(), "exit", LF); | |
ReturnInst::Create(M->getContext(), BB_exit); | |
st_loop_head.push(loop_head); | |
loop_head = BB; | |
BB = BB_body; | |
BranchInst::Create(BB_exit, BB_body, flag, loop_head); | |
} | |
////////////////////////////////////////////////////////////////////// | |
template <class StackType, class ElementType> | |
void top_and_pop(ElementType& t, StackType& st) { | |
assert(!st.empty()); | |
t = st.top(); | |
st.pop(); | |
} | |
void codegen::do_loop_end() { | |
BranchInst::Create(loop_head, BB); | |
top_and_pop(cursor_addr, st_cursor_addr); | |
top_and_pop(scratch_base_addr, st_scratch_base_addr); | |
top_and_pop(loop_head, st_loop_head); | |
top_and_pop(BB, st_BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::read_value() { | |
if(F_getchar == NULL) { | |
assert(M != NULL); | |
FunctionType* T = FunctionType::get(IntegerType::get(M->getContext(), 32), false); | |
F_getchar = M->getOrInsertFunction("getchar", T); | |
} | |
assert(F_getchar != NULL); | |
Value* V = CallInst::Create(F_getchar, "input", BB); | |
V = CastInst::Create(CastInst::Trunc, V, IntegerType::get(M->getContext(), 8), V->getName() + ".trunc", BB); | |
store_value(V); | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::write_value() { | |
Value* V = load_value(); | |
assert(V != NULL); | |
if(F_putchar == NULL) { | |
assert(M != NULL); | |
FunctionType* T = FunctionType::get(Type::getVoidTy(M->getContext()), IntegerType::get(M->getContext(), 32), false); | |
F_putchar = M->getOrInsertFunction("putchar", T); | |
} | |
assert(F_putchar != NULL); | |
V = new ZExtInst(V, IntegerType::get(M->getContext(), 32), V->getName() + ".ext", BB); | |
CallInst::Create(F_putchar, V, "", BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
Value* codegen::load_cursor() { | |
assert(cursor_addr != NULL); | |
assert(BB != NULL); | |
Value* V = new LoadInst(cursor_addr, "cursor", BB); | |
return V; | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::adjust_value(int delta) { | |
assert(delta == -1 || delta == 1); | |
const char* name = NULL; | |
switch(delta) { | |
case -1: name = ".m1"; break; | |
case 1: name = ".p1"; break; | |
} | |
assert(name != NULL); | |
Value* old_value = load_value(); | |
assert(old_value != NULL); | |
Value* new_value = BinaryOperator::Create(BinaryOperator::Add, old_value, ConstantInt::get(cast<IntegerType>(old_value->getType()), delta), old_value->getName() + name, BB); | |
store_value(new_value); | |
} | |
////////////////////////////////////////////////////////////////////// | |
GetElementPtrInst* codegen::get_value_address() { | |
assert(BB != NULL); | |
assert(scratch_base_addr != NULL); | |
Value* V = load_cursor(); | |
assert(V != NULL); | |
GetElementPtrInst* GEP = GetElementPtrInst::Create(scratch_base_addr, V, "value.addr", BB); | |
return GEP; | |
} | |
////////////////////////////////////////////////////////////////////// | |
Value* codegen::load_value() { | |
assert(BB != NULL); | |
return new LoadInst(get_value_address(), "value", BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::store_value(Value* new_value) { | |
assert(new_value != NULL); | |
assert(BB != NULL); | |
new StoreInst(new_value, get_value_address(), BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
void codegen::adjust_pointer(int delta) { | |
assert(delta == -1 || delta == 1); | |
const char* name = NULL; | |
switch(delta) { | |
case -1: name = "cursor.m1"; break; | |
case 1: name = "cursor.p1"; break; | |
} | |
Value* old_cursor = load_cursor(); | |
Value* new_cursor = BinaryOperator::Create(BinaryOperator::Add, old_cursor, ConstantInt::get(cast<IntegerType>(old_cursor->getType()), 1), name, BB); | |
new StoreInst(new_cursor, cursor_addr, BB); | |
} | |
////////////////////////////////////////////////////////////////////// | |
codegen::~codegen() { | |
M->print(outs(), NULL); | |
delete M; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* -*- c++ -*- */ | |
#ifndef __david__3DD77622_4F18_11E4_A52A_000001ABCDEF__ | |
#define __david__3DD77622_4F18_11E4_A52A_000001ABCDEF__ | |
#include "llvm/LinkAllIR.h" | |
#include "llvm/Support/raw_ostream.h" | |
#include <string> | |
#include <cstdlib> | |
#include <stack> | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment