Skip to content

Instantly share code, notes, and snippets.

@mbillingr
Last active June 11, 2019 15:23
Show Gist options
  • Save mbillingr/0005f7845dfe3f25f24e9eb59c9a0e02 to your computer and use it in GitHub Desktop.
Save mbillingr/0005f7845dfe3f25f24e9eb59c9a0e02 to your computer and use it in GitHub Desktop.
Brainfuck JIT compiler written in Python
from ctypes import CFUNCTYPE, c_double, c_int32, c_voidp, c_char
import llvmlite.binding as llvm
from llvmlite import ir
# Looping does not yet work... looks like I'll need to grok the phi instruction....
hello_world_source = """
++++++++ Set Cell #0 to 8
[
>++++ Add 4 to Cell #1; this will always set Cell #1 to 4
[ as the cell will be cleared by the loop
>++ Add 2 to Cell #2
>+++ Add 3 to Cell #3
>+++ Add 3 to Cell #4
>+ Add 1 to Cell #5
<<<<- Decrement the loop counter in Cell #1
] Loop till Cell #1 is zero; number of iterations is 4
>+ Add 1 to Cell #2
>+ Add 1 to Cell #3
>- Subtract 1 from Cell #4
>>+ Add 1 to Cell #6
[<] Move back to the first zero cell you find; this will
be Cell #1 which was cleared by the previous loop
<- Decrement the loop Counter in Cell #0
] Loop till Cell #0 is zero; number of iterations is 8
The result of this is:
Cell No : 0 1 2 3 4 5 6
Contents: 0 0 72 104 88 32 8
Pointer : ^
>>. Cell #2 has value 72 which is 'H'
>---. Subtract 3 from Cell #3 to get 101 which is 'e'
+++++++..+++. Likewise for 'llo' from Cell #3
>>. Cell #5 is 32 for the space
<-. Subtract 1 from Cell #4 for 87 to give a 'W'
<. Cell #3 was set to 'o' from the end of 'Hello'
+++.------.--------. Cell #3 for 'rl' and 'd'
>>+. Add 1 to Cell #5 gives us an exclamation point
>++. And finally a newline from Cell #6
"""
# All these initializations are required for code generation!
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter() # yes, even this one
def create_execution_engine():
"""
Create an ExecutionEngine suitable for JIT code generation on
the host CPU. The engine is reusable for an arbitrary number of
modules.
"""
# Create a target machine representing the host
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
# And an execution engine with an empty backing module
backing_mod = llvm.parse_assembly("")
engine = llvm.create_mcjit_compiler(backing_mod, target_machine)
return engine
def compile_ir(engine, llvm_ir):
"""
Compile the LLVM IR string with the given engine.
The compiled module object is returned.
"""
# Create a LLVM module object from the IR
if isinstance(llvm_ir, ir.module.Module):
mod = llvm_ir
else:
mod = llvm.parse_assembly(llvm_ir)
mod.verify()
# Now add the module and make sure it is ready for execution
engine.add_module(mod)
engine.finalize_object()
engine.run_static_constructors()
return mod
def compile_to_ir(name, source):
code = []
loopstack = []
cell_t = ir.IntType(32)
cell_pt = ir.IntType(64)
cell_p = ir.PointerType(cell_t)
fnty = ir.FunctionType(cell_t, (cell_p, cell_p))
ZERO = ir.Constant(cell_t, 0)
ONE = ir.Constant(cell_t, 1)
STEP = ir.Constant(cell_pt, 4)
module = ir.Module(name=name)
func = ir.Function(module, fnty, name="main")
tape, output = func.args
block = func.append_basic_block(name="entry")
cond_block, next_block = None, None
builder = ir.IRBuilder(block)
cursor = builder.ptrtoint(tape, cell_pt, name="cursor")
output = builder.ptrtoint(output, cell_pt, name="output")
for cmd in source:
if cmd == ">":
cursor = builder.add(cursor, STEP, name="cursor")
if cmd == "<":
cursor = builder.sub(cursor, STEP, name="cursor")
elif cmd == "+":
p = builder.inttoptr(cursor, cell_p)
cell = builder.load(p, name="cell")
cell = builder.add(cell, ONE, name="cell")
builder.store(cell, p)
elif cmd == "-":
p = builder.inttoptr(cursor, cell_p)
cell = builder.load(p, name="cell")
cell = builder.sub(cell, ONE, name="cell")
builder.store(cell, p)
elif cmd == ".":
p = builder.inttoptr(cursor, cell_p)
cell = builder.load(p, name="cell")
p = builder.inttoptr(output, cell_p)
builder.store(cell, p)
output = builder.add(output, STEP, name="output")
elif cmd == "[":
loopstack.append((cond_block, next_block))
cond_block = func.append_basic_block()
loop_block = func.append_basic_block()
next_block = func.append_basic_block()
builder.branch(cond_block)
builder.position_at_end(cond_block)
p = builder.inttoptr(cursor, cell_p)
cell = builder.load(p, name="cell")
cond = builder.icmp_unsigned('==', cell, ZERO)
builder.cbranch(cond, next_block, loop_block)
builder.position_at_end(loop_block)
elif cmd == "]":
builder.branch(cond_block)
builder.position_at_end(next_block)
cond_block, next_block = loopstack.pop()
pass
builder.ret(ZERO)
return module
engine = create_execution_engine()
#bfir = compile_to_ir("test", hello_world_source)
bfir = compile_to_ir("test", """[>]>""")
print(bfir)
mod = compile_ir(engine, str(bfir))
print("Before optimization:")
print(mod)
# perform some optimizations
mpm = llvm.passmanagers.create_module_pass_manager()
mpm.add_global_optimizer_pass()
mpm.add_gvn_pass()
mpm.add_instruction_combining_pass()
mpm.run(mod)
print("After optimization:")
print(mod)
# Look up the function pointer (a Python int)
func_ptr = engine.get_function_address("main")
tape = bytearray(1024 * 4)
out = bytearray(1024 * 4)
#cfunc = CFUNCTYPE(c_int32, c_voidp, c_voidp)(func_ptr)
#res = cfunc((c_char*len(tape)).from_buffer(tape),
# (c_char*len(out)).from_buffer(out))
print(tape[:10])
print(out[:40:4])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment