Last active
March 3, 2019 16:46
-
-
Save mikroskeem/a98242be32a1c91163b4cdf1c1ec57ea to your computer and use it in GitHub Desktop.
Quick, dirty and sort of safe brainfuck compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import pprint | |
GENERATED_CODE_BASE = """/* | |
* Code generated using compile_bf.py | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> | |
static char memory[%MEM_SIZE%] = { 0 }; | |
int main(void) { | |
char *ptr = memory; | |
// Generated code start | |
%GENERATED_CODE% | |
// Generated code end | |
return 0; | |
} | |
""" | |
LOOP_INSNS = set(['[', ']']) | |
VALID_INSNS = set(['<', '>', '+', '-', '.', ',', '[', ']']) | |
def parse_bf(bf_file): | |
bf_data = [] | |
with open(bf_file, "r") as f: | |
for line in f.readlines(): | |
# Split line into pieces | |
for char in line: | |
if char in VALID_INSNS: | |
bf_data.append(char) | |
return bf_data | |
def merge_bf(bf_data): | |
# Contains: | |
# { | |
# "insn": x, | |
# "amount": n | |
# } | |
new_bf = [] | |
for insn in bf_data: | |
# Empty list or loop elements | |
if insn in LOOP_INSNS or len(new_bf) < 1: | |
new_bf.append({"insn": insn, "amount": 1}) | |
continue | |
last_insn = new_bf[-1] | |
# Not the same insn | |
if last_insn["insn"] is not insn: | |
new_bf.append({"insn": insn, "amount": 1}) | |
continue | |
# Same insn | |
last_insn["amount"] = last_insn["amount"] + 1 | |
return new_bf | |
def analyze_bf(bf_data): | |
info = { "mem_size": 0 } | |
# Emulate brainfuck little bit to figure out how much is the pointer seeked | |
ptr_index = 0 | |
ptr_max = 0 | |
for insn in bf_data: | |
if insn["insn"] is '<': | |
ptr_index -= insn["amount"] | |
elif insn["insn"] is '>': | |
ptr_index += insn["amount"] | |
if ptr_index > (ptr_max - 1): | |
ptr_max = ptr_index + 1 | |
if ptr_index < 0: | |
raise "ptr_index goes negative, invalid program!" | |
info["mem_size"] = ptr_max | |
# Dump info | |
print("Max mem needed: {} bytes".format(info["mem_size"]), file=sys.stderr) | |
return info | |
def write_bf(bf_data): | |
code = [] | |
indentlevel = 1 # 1 because code is indented with 4 spaces in the beginnging | |
for insn in bf_data: | |
i = insn["insn"] | |
n = insn["amount"] | |
gen_code = None | |
if i is '>': | |
gen_code = f"ptr += {n};" | |
elif i is '<': | |
gen_code = f"ptr -= {n};" | |
elif i is '+': | |
gen_code = f"*ptr += {n};" | |
elif i is '-': | |
gen_code = f"*ptr -= {n};" | |
elif i is '.': | |
gen_code = "putchar(*ptr);" | |
elif i is ',': | |
gen_code = "*ptr = getchar();" | |
elif i is '[': | |
indentlevel += 1 | |
gen_code = "while(*ptr) {" | |
elif i is ']': | |
indentlevel -= 1 | |
gen_code = "}" | |
indent = "" | |
if indentlevel > 0: | |
_il = indentlevel if i is not '[' else (indentlevel - 1) # fucking ugly hack | |
indent = " " * _il | |
code.append(indent + gen_code) | |
return '\n'.join(code) | |
parsed = parse_bf(sys.argv[1]) | |
merged = merge_bf(parsed) | |
analyzed = analyze_bf(merged) | |
generated = write_bf(merged) | |
print( | |
GENERATED_CODE_BASE | |
.replace("%GENERATED_CODE%", generated) | |
.replace("%MEM_SIZE%", str(analyzed["mem_size"])) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment