Last active
June 11, 2019 15:23
-
-
Save mbillingr/0005f7845dfe3f25f24e9eb59c9a0e02 to your computer and use it in GitHub Desktop.
Brainfuck JIT compiler written in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ctypes import CFUNCTYPE, c_double, c_int32, c_voidp, c_char | |
import llvmlite.binding as llvm | |
from llvmlite import ir | |
# Looping does not yet work... looks like I'll need to grok the phi instruction.... | |
hello_world_source = """ | |
++++++++ Set Cell #0 to 8 | |
[ | |
>++++ Add 4 to Cell #1; this will always set Cell #1 to 4 | |
[ as the cell will be cleared by the loop | |
>++ Add 2 to Cell #2 | |
>+++ Add 3 to Cell #3 | |
>+++ Add 3 to Cell #4 | |
>+ Add 1 to Cell #5 | |
<<<<- Decrement the loop counter in Cell #1 | |
] Loop till Cell #1 is zero; number of iterations is 4 | |
>+ Add 1 to Cell #2 | |
>+ Add 1 to Cell #3 | |
>- Subtract 1 from Cell #4 | |
>>+ Add 1 to Cell #6 | |
[<] Move back to the first zero cell you find; this will | |
be Cell #1 which was cleared by the previous loop | |
<- Decrement the loop Counter in Cell #0 | |
] Loop till Cell #0 is zero; number of iterations is 8 | |
The result of this is: | |
Cell No : 0 1 2 3 4 5 6 | |
Contents: 0 0 72 104 88 32 8 | |
Pointer : ^ | |
>>. Cell #2 has value 72 which is 'H' | |
>---. Subtract 3 from Cell #3 to get 101 which is 'e' | |
+++++++..+++. Likewise for 'llo' from Cell #3 | |
>>. Cell #5 is 32 for the space | |
<-. Subtract 1 from Cell #4 for 87 to give a 'W' | |
<. Cell #3 was set to 'o' from the end of 'Hello' | |
+++.------.--------. Cell #3 for 'rl' and 'd' | |
>>+. Add 1 to Cell #5 gives us an exclamation point | |
>++. And finally a newline from Cell #6 | |
""" | |
# All these initializations are required for code generation! | |
llvm.initialize() | |
llvm.initialize_native_target() | |
llvm.initialize_native_asmprinter() # yes, even this one | |
def create_execution_engine(): | |
""" | |
Create an ExecutionEngine suitable for JIT code generation on | |
the host CPU. The engine is reusable for an arbitrary number of | |
modules. | |
""" | |
# Create a target machine representing the host | |
target = llvm.Target.from_default_triple() | |
target_machine = target.create_target_machine() | |
# And an execution engine with an empty backing module | |
backing_mod = llvm.parse_assembly("") | |
engine = llvm.create_mcjit_compiler(backing_mod, target_machine) | |
return engine | |
def compile_ir(engine, llvm_ir): | |
""" | |
Compile the LLVM IR string with the given engine. | |
The compiled module object is returned. | |
""" | |
# Create a LLVM module object from the IR | |
if isinstance(llvm_ir, ir.module.Module): | |
mod = llvm_ir | |
else: | |
mod = llvm.parse_assembly(llvm_ir) | |
mod.verify() | |
# Now add the module and make sure it is ready for execution | |
engine.add_module(mod) | |
engine.finalize_object() | |
engine.run_static_constructors() | |
return mod | |
def compile_to_ir(name, source): | |
code = [] | |
loopstack = [] | |
cell_t = ir.IntType(32) | |
cell_pt = ir.IntType(64) | |
cell_p = ir.PointerType(cell_t) | |
fnty = ir.FunctionType(cell_t, (cell_p, cell_p)) | |
ZERO = ir.Constant(cell_t, 0) | |
ONE = ir.Constant(cell_t, 1) | |
STEP = ir.Constant(cell_pt, 4) | |
module = ir.Module(name=name) | |
func = ir.Function(module, fnty, name="main") | |
tape, output = func.args | |
block = func.append_basic_block(name="entry") | |
cond_block, next_block = None, None | |
builder = ir.IRBuilder(block) | |
cursor = builder.ptrtoint(tape, cell_pt, name="cursor") | |
output = builder.ptrtoint(output, cell_pt, name="output") | |
for cmd in source: | |
if cmd == ">": | |
cursor = builder.add(cursor, STEP, name="cursor") | |
if cmd == "<": | |
cursor = builder.sub(cursor, STEP, name="cursor") | |
elif cmd == "+": | |
p = builder.inttoptr(cursor, cell_p) | |
cell = builder.load(p, name="cell") | |
cell = builder.add(cell, ONE, name="cell") | |
builder.store(cell, p) | |
elif cmd == "-": | |
p = builder.inttoptr(cursor, cell_p) | |
cell = builder.load(p, name="cell") | |
cell = builder.sub(cell, ONE, name="cell") | |
builder.store(cell, p) | |
elif cmd == ".": | |
p = builder.inttoptr(cursor, cell_p) | |
cell = builder.load(p, name="cell") | |
p = builder.inttoptr(output, cell_p) | |
builder.store(cell, p) | |
output = builder.add(output, STEP, name="output") | |
elif cmd == "[": | |
loopstack.append((cond_block, next_block)) | |
cond_block = func.append_basic_block() | |
loop_block = func.append_basic_block() | |
next_block = func.append_basic_block() | |
builder.branch(cond_block) | |
builder.position_at_end(cond_block) | |
p = builder.inttoptr(cursor, cell_p) | |
cell = builder.load(p, name="cell") | |
cond = builder.icmp_unsigned('==', cell, ZERO) | |
builder.cbranch(cond, next_block, loop_block) | |
builder.position_at_end(loop_block) | |
elif cmd == "]": | |
builder.branch(cond_block) | |
builder.position_at_end(next_block) | |
cond_block, next_block = loopstack.pop() | |
pass | |
builder.ret(ZERO) | |
return module | |
engine = create_execution_engine() | |
#bfir = compile_to_ir("test", hello_world_source) | |
bfir = compile_to_ir("test", """[>]>""") | |
print(bfir) | |
mod = compile_ir(engine, str(bfir)) | |
print("Before optimization:") | |
print(mod) | |
# perform some optimizations | |
mpm = llvm.passmanagers.create_module_pass_manager() | |
mpm.add_global_optimizer_pass() | |
mpm.add_gvn_pass() | |
mpm.add_instruction_combining_pass() | |
mpm.run(mod) | |
print("After optimization:") | |
print(mod) | |
# Look up the function pointer (a Python int) | |
func_ptr = engine.get_function_address("main") | |
tape = bytearray(1024 * 4) | |
out = bytearray(1024 * 4) | |
#cfunc = CFUNCTYPE(c_int32, c_voidp, c_voidp)(func_ptr) | |
#res = cfunc((c_char*len(tape)).from_buffer(tape), | |
# (c_char*len(out)).from_buffer(out)) | |
print(tape[:10]) | |
print(out[:40:4]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment