Last active
March 9, 2018 17:24
-
-
Save PurpleMyst/f6b77b9cc1d037e415c7a47215af1c0e to your computer and use it in GitHub Desktop.
Solve AoC 2016 Day 12.1, 12.2, 25.1 using LLVM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from llvmlite import ir, binding as llvm | |
def compile_assembunny(instructions): | |
instructions = tuple(instructions) | |
program_size = len(instructions) | |
char = ir.IntType(8) | |
int32 = ir.IntType(32) | |
int64 = ir.IntType(64) | |
module = ir.Module(name="assembunny") | |
main_type = ir.FunctionType(int32, ()) | |
main_func = ir.Function(module, main_type, name="main") | |
entry = main_func.append_basic_block(name="entry") | |
printf_type = ir.FunctionType(int32, (char.as_pointer(),), var_arg=True) | |
printf = ir.Function(module, printf_type, name="printf") | |
builder = ir.IRBuilder(entry) | |
fmt_contents = bytearray(b"%ld\n") | |
fmt_type = ir.ArrayType(char, len(fmt_contents)) | |
fmt_const = fmt_type(fmt_contents) | |
fmt_var = ir.GlobalVariable(module, fmt_type, name="fmt") | |
fmt_var.initializer = fmt_const | |
registers = {} | |
for name in "abcd": | |
registers[name] = builder.alloca(int64, name=name) | |
builder.store(int64(0), registers[name]) | |
def get_value(operand): | |
# Most instructions that take in a "source" operand can either take a | |
# constant value or a register. We don't really care which it takes, so | |
# we just abstract over it with this function. The LLVM optimizer | |
# handles removing dead code, so we don't even have to care about some | |
# constants always being zero or not zero. | |
if operand in registers: | |
register_value = builder.load(registers[operand], | |
name=operand + "_value") | |
return register_value | |
else: | |
return int64(int(operand)) | |
# A list of blocks that don't have a terminator yet paired with the | |
# information needed to construct that terminator. This is needed because | |
# we don't have all the information needed to construct a jump when we | |
# encounter it, due to having to know which block to jump to. This isn't an | |
# issue for backwards jumps, but it is for forward jumps. | |
jump_from = [] | |
# A dictionary that maps instruction indices to a block that starts with | |
# that instructions. This is for building jumps, seeing as we can only jump | |
# to the start of a block and from the end of a block. | |
jump_to = {} | |
for index, instruction in enumerate(instructions): | |
mnemonic, *args = instruction | |
# For efficiency's sake, we create a block only at locations which are | |
# jumped at by one or more jump instructions. | |
if mnemonic == "jnz": | |
(_, delta) = args | |
jump_to[index + int(delta)] = None | |
for index, instruction in enumerate(instructions): | |
mnemonic, *args = instruction | |
# If this is an instruction that something jumps to, we need to create | |
# a new block that starts with this instruction. | |
if index in jump_to: | |
assert jump_to[index] is None | |
new_block = builder.append_basic_block(name="instr_%s" % index) | |
jump_to[index] = new_block | |
builder.branch(new_block) | |
builder.position_at_start(new_block) | |
if mnemonic == "cpy": | |
(source, destination) = args | |
value = get_value(source) | |
builder.store(value, registers[destination]) | |
elif mnemonic == "inc" or mnemonic == "dec": | |
(register,) = args | |
register_value = builder.load(registers[register], | |
name=source + "_value") | |
if mnemonic == "inc": | |
register_value = builder.add(register_value, int64(1)) | |
elif mnemonic == "dec": | |
register_value = builder.sub(register_value, int64(1)) | |
else: | |
raise "your dongers" | |
builder.store(register_value, registers[register]) | |
elif mnemonic == "jnz": | |
(source, delta) = args | |
delta = int(delta) | |
destination = index + delta | |
# XXX: Instead of creating a new block, we should just do some | |
# trickery with `jump_to`, and make an index >= program_size map to | |
# the end of the program. | |
if destination >= program_size: | |
destination_block = None | |
else: | |
destination_block = jump_to[destination] | |
next_block = builder.append_basic_block(name="nonzero") | |
# If we already know where to jump (i.e. if we're jumping | |
# backwards) , we can just insert a conditional branch. | |
# If we don't, we add it to `jump_from` so we know we | |
# need to build a jump there. | |
if destination_block is not None: | |
value = get_value(source) | |
is_zero = builder.icmp_signed("==", value, int64(0)) | |
builder.cbranch(is_zero, next_block, destination_block) | |
else: | |
jump_from.append((builder.block, | |
source, destination, next_block)) | |
builder.position_at_start(next_block) | |
else: | |
raise ValueError("Unknown memonic %r" % mnemonic) | |
# We're at the end of the bitcode, so we just return from main and print | |
# our 'a' register. | |
end_block = builder.append_basic_block(name="finish") | |
builder.branch(end_block) | |
builder.position_at_start(end_block) | |
fmt_ptr = builder.bitcast(fmt_var, char.as_pointer()) | |
builder.call(printf, (fmt_ptr, get_value("a"))) | |
builder.ret(int32(0)) | |
for (block, source, destination, next_block) in jump_from: | |
with builder.goto_block(block): | |
value = get_value(source) | |
is_zero = builder.icmp_signed("==", value, int64(0)) | |
if destination >= program_size: | |
destination_block = end_block | |
else: | |
destination_block = jump_to[destination] | |
builder.cbranch(is_zero, next_block, destination_block) | |
return module | |
def main(): | |
with open("input.txt") as input_file: | |
instructions = map(str.split, input_file.read().splitlines()) | |
ir_module = compile_assembunny(instructions) | |
print(ir_module) | |
binding_module = llvm.parse_assembly(str(ir_module)) | |
with open("output.bc", "wb") as output_file: | |
output_file.write(binding_module.as_bitcode()) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# This just compiles the assembunny to a bitcode file. To actually solve the challenge, | |
# you'll need to just give a few different values (as a command line argument) to the | |
# script (e.g. `./output 189`) and check if it looks correct. | |
# You can very easily do this with a simple Python, Perl or Bash script. | |
from llvmlite import ir, binding as llvm | |
OUT_LIMIT = 10 | |
def compile_assembunny(instructions): | |
instructions = tuple(instructions) | |
program_size = len(instructions) | |
char = ir.IntType(8) | |
int32 = ir.IntType(32) | |
int64 = ir.IntType(64) | |
module = ir.Module(name="assembunny") | |
main_type = ir.FunctionType(int32, (int32, char.as_pointer().as_pointer())) | |
main_func = ir.Function(module, main_type, name="main") | |
entry = main_func.append_basic_block(name="entry") | |
printf_type = ir.FunctionType(int32, (char.as_pointer(),), var_arg=True) | |
printf = ir.Function(module, printf_type, name="printf") | |
atol_type = ir.FunctionType(int64, (char.as_pointer(),)) | |
atol = ir.Function(module, atol_type, name="atol") | |
builder = ir.IRBuilder(entry) | |
fmt_contents = bytearray(b"%ld\n") | |
fmt_type = ir.ArrayType(char, len(fmt_contents)) | |
fmt_const = fmt_type(fmt_contents) | |
fmt_var = ir.GlobalVariable(module, fmt_type, name="fmt") | |
fmt_var.initializer = fmt_const | |
registers = {} | |
for name in "abcd": | |
registers[name] = builder.alloca(int64, name=name) | |
builder.store(int64(0), registers[name]) | |
outs = builder.alloca(int64, name="outs") | |
builder.store(int64(0), outs) | |
def get_value(operand): | |
# Most instructions that take in a "source" operand can either take a | |
# constant value or a register. We don't really care which it takes, so | |
# we just abstract over it with this function. The LLVM optimizer | |
# handles removing dead code, so we don't even have to care about some | |
# constants always being zero or not zero. | |
if operand in registers: | |
register_value = builder.load(registers[operand], | |
name=operand + "_value") | |
return register_value | |
else: | |
return int64(int(operand)) | |
# A list of blocks that don't have a terminator yet paired with the | |
# information needed to construct that terminator. This is needed because | |
# we don't have all the information needed to construct a jump when we | |
# encounter it, due to having to know which block to jump to. This isn't an | |
# issue for backwards jumps, but it is for forward jumps. | |
jump_from = [] | |
# A dictionary that maps instruction indices to a block that starts with | |
# that instructions. This is for building jumps, seeing as we can only jump | |
# to the start of a block and from the end of a block. | |
jump_to = {} | |
argc, argv = main_func.args | |
arg_given = builder.icmp_unsigned(">=", argc, int32(2)) | |
with builder.if_else(arg_given) as (then, otherwise): | |
with then: | |
arg_ptr = builder.gep(argv, (int32(1),)) | |
arg_value = builder.load(arg_ptr) | |
arg_value = builder.call(atol, (arg_value,)) | |
builder.store(arg_value, registers["a"]) | |
with otherwise: | |
builder.ret(int32(1)) | |
for index, instruction in enumerate(instructions): | |
mnemonic, *args = instruction | |
# For efficiency's sake, we create a block only at locations which are | |
# jumped at by one or more jump instructions. | |
if mnemonic == "jnz": | |
(_, delta) = args | |
jump_to[index + int(delta)] = None | |
for index, instruction in enumerate(instructions): | |
mnemonic, *args = instruction | |
# If this is an instruction that something jumps to, we need to create | |
# a new block that starts with this instruction. | |
if index in jump_to: | |
assert jump_to[index] is None | |
new_block = builder.append_basic_block(name="instr_%s" % index) | |
jump_to[index] = new_block | |
builder.branch(new_block) | |
builder.position_at_start(new_block) | |
if mnemonic == "cpy": | |
(source, destination) = args | |
value = get_value(source) | |
builder.store(value, registers[destination]) | |
elif mnemonic == "inc" or mnemonic == "dec": | |
(register,) = args | |
register_value = builder.load(registers[register], | |
name=source + "_value") | |
if mnemonic == "inc": | |
register_value = builder.add(register_value, int64(1)) | |
elif mnemonic == "dec": | |
register_value = builder.sub(register_value, int64(1)) | |
else: | |
raise "your dongers" | |
builder.store(register_value, registers[register]) | |
elif mnemonic == "jnz": | |
(source, delta) = args | |
delta = int(delta) | |
destination = index + delta | |
# XXX: Instead of creating a new block, we should just do some | |
# trickery with `jump_to`, and make an index >= program_size map to | |
# the end of the program. | |
if destination >= program_size: | |
destination_block = None | |
else: | |
destination_block = jump_to[destination] | |
next_block = builder.append_basic_block(name="nonzero") | |
# If we already know where to jump (i.e. if we're jumping | |
# backwards) , we can just insert a conditional branch. | |
# If we don't, we add it to `jump_from` so we know we | |
# need to build a jump there. | |
if destination_block is not None: | |
value = get_value(source) | |
is_zero = builder.icmp_signed("==", value, int64(0)) | |
builder.cbranch(is_zero, next_block, destination_block) | |
else: | |
jump_from.append((builder.block, | |
source, destination, next_block)) | |
builder.position_at_start(next_block) | |
elif mnemonic == "out": | |
(source,) = args | |
outs_value = builder.load(outs, name="outs_value") | |
over_limit = builder.icmp_unsigned(">=", outs_value, int64(OUT_LIMIT)) | |
with builder.if_else(over_limit) as (then, otherwise): | |
with then: | |
builder.ret(int32(0)) | |
with otherwise: | |
fmt_ptr = builder.bitcast(fmt_var, char.as_pointer()) | |
builder.call(printf, (fmt_ptr, get_value(source))) | |
outs_value = builder.add(outs_value, int64(1)) | |
builder.store(outs_value, outs) | |
else: | |
raise ValueError("Unknown memonic %r" % mnemonic) | |
# We're at the end of the bitcode, so we just return from main and print | |
# our 'a' register. | |
end_block = builder.append_basic_block(name="finish") | |
builder.branch(end_block) | |
builder.position_at_start(end_block) | |
builder.ret(int32(0)) | |
for (block, source, destination, next_block) in jump_from: | |
with builder.goto_block(block): | |
value = get_value(source) | |
is_zero = builder.icmp_signed("==", value, int64(0)) | |
if destination >= program_size: | |
destination_block = end_block | |
else: | |
destination_block = jump_to[destination] | |
builder.cbranch(is_zero, next_block, destination_block) | |
return module | |
def main(): | |
with open("input.txt") as input_file: | |
instructions = map(str.split, input_file.read().splitlines()) | |
ir_module = compile_assembunny(instructions) | |
print(ir_module) | |
binding_module = llvm.parse_assembly(str(ir_module)) | |
with open("output.bc", "wb") as output_file: | |
output_file.write(binding_module.as_bitcode()) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment