Skip to content

Instantly share code, notes, and snippets.

@Sasszem
Created September 10, 2021 20:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sasszem/308614f96149e5741aeeace72c169936 to your computer and use it in GitHub Desktop.
Save Sasszem/308614f96149e5741aeeace72c169936 to your computer and use it in GitHub Desktop.
Optimizing bf2c compiler in python. Possibly not 100% working. Gave up on fixing cuz' I got bored on it.
from dataclasses import dataclass, field, replace
import typing
from collections.abc import Iterator
from enum import Enum
from collections import defaultdict
#######################################
# Base classes for storing operations #
#######################################
NODE = typing.Union[
"Sequence",
"CharIn",
"CharOut",
"CharOutFixed",
"PointerMove",
"AddSub",
"EndLoopToken",
"LinearLoop",
"SetOperation",
]
@dataclass
class Sequence:
"""A sequence of operations. Might contain sub-sequences, thus implementing a tree"""
elements: list[NODE]
def add(self, element: NODE) -> None:
""""add an element to the sequence"""
self.elements.append(element)
def __init__(self):
"""init the sequence as empty"""
self.elements = list()
def __getitem__(self, key: int) -> NODE:
return self.elements[key]
def __iter__(self):
return iter(self.elements)
def last(self) -> typing.Optional[NODE]:
"""return last element or None if empty"""
if self.elements:
return self.elements[-1]
return None
@dataclass
class CharIn:
"""a character input operation into a cell"""
offset: int = 0
"""offset of the target cell relative to the current pointer"""
@dataclass
class CharOut:
"""character output operation from a cell"""
offset: int = 0
"""relative offset from the pointer"""
@dataclass
class CharOutFixed:
"""a fixed known char out operation"""
val: int
"""value to print"""
@dataclass
class PointerMove:
"""move the pointer"""
val: int
"""(signed) amout to move"""
@dataclass
class AddSub:
"""add or sub from a cell"""
val: int
"""amount to add or sub"""
offset: int = 0
"""relative offset"""
@dataclass
class LinearLoop:
"""a linear loop"""
keys: dict[int, int] = field(default_factory=dict)
offset: int = 0
@dataclass
class SetOperation:
"""set a cell to a known fixed value"""
offset: int
val: int
@dataclass
class EndLoopToken:
"""End of loop sentinel"""
##########
# PARSER #
##########
class BFParser:
"""parse BF code into own representation"""
def parse_sequence(self, token_iter: Iterator[str]) -> Sequence:
"""parse tokens into a sequence"""
seq = Sequence()
for token in token_iter:
node = self.parse_token(token, token_iter)
if node:
if isinstance(node, EndLoopToken):
break
seq.add(node)
else:
raise RuntimeError("Unbalanced loops!")
return seq
def parse_token(self, token: str, rest: Iterator[str]) -> typing.Optional[NODE]:
"""parse a block
a single token or call parse_sequence if found a loop"""
if token == ".":
return CharOut()
if token == ",":
return CharIn()
if token == ">":
return PointerMove(1)
if token == "<":
return PointerMove(-1)
if token == "+":
return AddSub(1)
if token == "-":
return AddSub(-1)
if token == "[":
return self.parse_sequence(rest)
if token == "]":
return EndLoopToken()
return None
# any other char should be ignored
def parse_source(self, source: str) -> Sequence:
"""parse a full source"""
# all we need is to wrap in [] and convert to iter
tree = self.parse_token("[", iter(source+"]"))
assert isinstance(tree, Sequence), "Toplevel is not a sequence!"
return tree
def parse_file(self, filename: str) -> Sequence:
"""parse a file
aka open and then parse source"""
with open(filename) as file:
return self.parse_source(file.read())
class OperationsCombiner:
"""Opt: combine multiple consequent operations into one"""
def __call__(self, tree: Sequence) -> Sequence:
"""run"""
ret_seq = Sequence()
for node in tree:
if isinstance(node, AddSub):
last = ret_seq.last()
if isinstance(last, AddSub) and last.offset == node.offset:
last.val += node.val
else:
ret_seq.add(node)
elif isinstance(node, SetOperation):
last = ret_seq.last()
if isinstance(last, typing.Set) and last.offset == node.offset:
last.val = node.val
else:
ret_seq.add(node)
elif isinstance(node, PointerMove):
last = ret_seq.last()
if isinstance(last, PointerMove):
last.val += node.val
else:
ret_seq.add(node)
elif isinstance(node, Sequence):
ret_seq.add(self(node))
elif isinstance(node, (CharOut, CharIn, CharOutFixed, Sequence, LinearLoop)):
ret_seq.add(node)
else:
raise RuntimeError(f"Unexpected node in graph: {node}")
return ret_seq
class CompboundGenerator:
"""Opt: detect and parse linear loops"""
def test_ops(self, tree: Sequence) -> bool:
return all(isinstance(x, (AddSub, PointerMove)) for x in tree)
# todo: update this when adding a new class!
def __call__(self, tree: Sequence) -> Sequence | LinearLoop | SetOperation:
"""run"""
if self.test_ops(tree):
# gather values
ptr = 0
muls: dict[int, int] = {}
for node in tree:
if isinstance(node, (PointerMove)):
ptr += node.val
elif isinstance(node, AddSub):
muls[ptr] = muls.get(ptr, 0) + node.val
if muls.get(0, 0) == -1:
if len(muls) == 1:
return SetOperation(0, 0)
else:
return LinearLoop(muls)
ret_seq = Sequence()
for node in tree:
if isinstance(node, Sequence):
ret_seq.add(self(node))
elif isinstance(node, (CharOut, CharIn, CharOutFixed, AddSub, PointerMove, LinearLoop, SetOperation)):
ret_seq.add(node)
else:
raise RuntimeError(f"Unexpected node in graph: {node}")
return ret_seq
class OffsetGenerator:
def __call__(self, tree: Sequence) -> Sequence:
ret_seq = Sequence()
new_node: NODE
offset = 0
for node in tree:
if isinstance(node, PointerMove):
offset += node.val
elif isinstance(node, (AddSub, CharIn, CharOut, SetOperation)):
new_node = replace(node)
new_node.offset += offset
ret_seq.add(new_node)
elif isinstance(node, LinearLoop):
new_node = LinearLoop(
{k+offset: v for k, v in node.keys.items()}, offset=node.offset + offset)
ret_seq.add(new_node)
elif isinstance(node, Sequence):
ret_seq.add(PointerMove(offset))
offset = 0
ret_seq.add(self(node))
elif isinstance(node, (CharOutFixed)):
ret_seq.add(node)
else:
raise RuntimeError(f"Unexpected node: {node}")
if offset:
ret_seq.add(PointerMove(offset))
return ret_seq
class SimpleNopPruner:
def __call__(self, tree: Sequence) -> Sequence:
ret_seq = Sequence()
for node in tree:
if isinstance(node, (PointerMove, AddSub)):
if node.val:
ret_seq.add(node)
elif isinstance(node, (Sequence, CharOut, CharOutFixed, CharIn, LinearLoop, SetOperation)):
ret_seq.add(node)
else:
raise RuntimeError(f"Unexpected node: {node}")
return ret_seq
@dataclass
class CWriter:
"""generate C source"""
indent_level: int = 0
indent_spaces: int = 4
src: str = ""
mem_size = 8192
mask = "0x1fff" # todo refactor me
def _write(self, text, end="\n"):
"""indent and newline"""
self.src += f"{self.indent_level*self.indent_spaces*' '}{text}{end}"
def _indent(self, amount=1):
"""add or sub indent level"""
self.indent_level += amount
def write_header(self):
"""write C boilerplate header"""
self._write("#include <stdio.h>")
self._write("")
self._write(f"char M[{self.mem_size}];")
self._write("int main() {")
self._indent()
self._write("int PT = 0;")
self._write("")
def write_footer(self):
"""write C boilerplate footer"""
self._write("")
self._write("")
self._write("return 0;")
self._indent(-1)
self._write("}")
@classmethod
def _plus(cls, num: int) -> str:
"""generate +C if C!=0"""
if num:
return f" {cls._sign(num)} {abs(num)}"
return ""
@classmethod
def _sign(cls, num: int) -> str:
"""get sign as a str"""
return '+' if num > 0 else '-'
def write_code(self, tree: Sequence):
"""write a sequence"""
# todo: refactor this into the classes maybe?
for node in tree:
if isinstance(node, AddSub):
self._write(
f"M[PT{self._plus(node.offset)}] {self._sign(node.val)}= {abs(node.val)};")
elif isinstance(node, PointerMove):
self._write(
f"PT = (PT {self._sign(node.val)} {abs(node.val)}) & {self.mask};")
elif isinstance(node, Sequence):
self._write("while (M[PT]) {")
self._indent()
self.write_code(node)
self._indent(-1)
self._write("}")
elif isinstance(node, CharIn):
self._write(f"M[PT{self._plus(node.offset)}] = getchar();")
elif isinstance(node, CharOut):
self._write(f"putchar(M[PT{self._plus(node.offset)}]);")
elif isinstance(node, LinearLoop):
lin = "".join(
f"M[PT{k:+}] {self._sign(v)}= {abs(v)}*M[PT{self._plus(node.offset)}]; "
for k, v in node.keys.items() if k-node.offset
) + f"M[PT{self._plus(node.offset)}] = 0;"
self._write(lin)
elif isinstance(node, SetOperation):
self._write(f"M[PT{self._plus(node.offset)}] = {node.val};")
elif isinstance(node, CharOutFixed):
self._write(f"putchar({node.val});")
else:
raise RuntimeError(f"Unexpected token: {node}")
def __call__(self, tree: Sequence) -> str:
"""run"""
self.write_header()
self.write_code(tree)
self.write_footer()
return self.src
class SimpleConstantAnalyzer:
def __init__(self):
self.toplevel = True
def __call__(self, tree: Sequence) -> Sequence:
known_values: dict[int, int] = dict()
if self.toplevel:
self.toplevel = False
known_values = defaultdict(int)
ret_seq = Sequence()
to_remove = set()
offset = 0
for node in tree:
# prune if we don't know
if isinstance(node, (CharIn)):
known_values.pop(offset + node.offset, 0)
ret_seq.add(node)
elif isinstance(node, (Sequence)):
known_values = {}
offset = 0
ret_seq.add(self(node))
elif isinstance(node, (CharOut)):
if node.offset + offset in known_values:
ret_seq.add(CharOutFixed(
known_values[node.offset + offset]))
else:
ret_seq.add(node)
elif isinstance(node, (AddSub)):
if node.offset + offset in known_values:
of = node.offset + offset
val = known_values[of]
ret_seq.add(SetOperation(of, val + node.val))
known_values[of] = val + node.val
else:
ret_seq.add(node)
elif isinstance(node, PointerMove):
offset += node.val
ret_seq.add(node)
elif isinstance(node, LinearLoop):
if node.offset + offset in known_values:
val = known_values[node.offset + offset]
for of, mul in node.keys.items():
ret_seq.add(SetOperation(of+offset, val*mul))
known_values[of+offset] = val*mul
known_values[offset + node.offset] = 0
ret_seq.add(SetOperation(node.offset + offset, 0))
else:
ret_seq.add(node)
for k in node.keys.keys():
known_values.pop(offset + k, 0)
known_values[offset + node.offset] = 0
elif isinstance(node, CharOutFixed):
ret_seq.add(node)
elif isinstance(node, SetOperation):
known_values[offset + node.offset] = node.val
ret_seq.add(node)
else:
raise RuntimeError(f"Unexpected node: {node}")
return ret_seq
class RemoveUselessOps:
"""For each set/add operation, find next thing that mutates the cell and check if it's used in between. If not, pop/combine with the next"""
def getNextMutate(self, tree: Sequence, index: int) -> int:
"""get the next instruction mutating the cell with offset from point"""
# offset = tree[index].offset
return 0
###########
# STARTUP #
###########
OPT = [
OperationsCombiner(),
CompboundGenerator(),
OffsetGenerator(),
SimpleNopPruner(),
SimpleConstantAnalyzer(),
]
def save_tree(code: Sequence, fname: str):
"""save tree into a file"""
with open(fname, "w") as file:
file.write(str(code))
def run_optimizations(code: Sequence, times=5, need_debug: bool = False) -> Sequence:
"""run all optimizations and might save last step diff"""
for j in range(times):
for i, optimizer in enumerate(OPT):
if need_debug and i == len(OPT)-1:
save_tree(code, "before")
code = optimizer(code)
if need_debug:
save_tree(code, "after")
return code
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="input brainfuck source file")
parser.add_argument("out", nargs="?", default="out.c",
help="output file name (default: out.c)")
parser.add_argument(
"-d", "--diagnose", help="diagnose last optimization step", action="store_true")
args = parser.parse_args()
mainTree = BFParser().parse_file(args.filename)
mainTree = run_optimizations(mainTree, need_debug=args.diagnose)
out = CWriter()(mainTree)
with open(args.out, "w") as outf:
outf.write(out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment