Skip to content

Instantly share code, notes, and snippets.

@aciceri
Last active Aug 27, 2019
Embed
What would you like to do?
Brainfuck to Python bytecode compiler
#!/usr/bin/env python3
from sys import exit, stdin
from argparse import ArgumentParser, RawDescriptionHelpFormatter, FileType
from types import CodeType
from dis import dis, opmap
import marshal
from importlib.util import MAGIC_NUMBER
from textwrap import dedent
cliParser = ArgumentParser(prog='bf2pyc',
formatter_class=RawDescriptionHelpFormatter,
description='''
____ __ ____ ____
| __ ) / _|___ \| _ \ _ _ ___
| _ \| |_ __) | |_) | | | |/ __|
| |_) | _|/ __/| __/| |_| | (__
|____/|_| |_____|_| \__, |\___|
|___/
Simple Brainfuck to Python Bytecode compiler.
''')
cliParser.add_argument('--version', action='version', version='%(prog)s 0.1')
cliParser.add_argument('input',
default=stdin, # if no file is supplied
type=FileType('r'),
nargs='?')
cliParser.add_argument('--arraysize',
action='store',
dest='arraysize',
metavar='N',
type=int,
default=30000,
help='size of the brainfuck array')
cliParser.add_argument('-o', '--output',
default='out.pyc',
dest='outputfile',
type=str,
action='store',
help='output .pyc file path, if omitted is "out.pyc"')
cliParser.add_argument('-s', '--show',
dest='show',
action='store_true',
help='show the compiled bytecode without creating the output .pyc file')
args = cliParser.parse_args()
source = args.input.read()
arraySize = args.arraysize
def parse(src): # parse the brainfuck source
stack = [] # to remember if inside a [...]
endAt = {} #correspondence between brackes [...]
for i, char in enumerate(src):
if char == '[':
stack.append(i)
elif char == ']':
endAt[stack.pop()] = i
def recParse(start=0, end=len(src)-1): # recursive parser
ast = []
i = start
while i < end:
char = src[i]
if char == '+':
if ast != [] and isinstance(ast[-1], int):
ast[-1] = (ast[-1] + 1) % 256
else:
ast.append(1)
elif char == '-':
if ast != [] and isinstance(ast[-1], int):
ast[-1] = (ast[-1] - 1) % 256
else:
ast.append(255)
elif char in ('>', '<', '.', ','):
ast.append(char)
elif char == '[':
ast.append('[')
ast.append(recParse(i+1, endAt[i]))
ast.append(']')
i = endAt[i]
i += 1
return ast # return the abstract syntax tree
return recParse()
def visit(visitor, ast): # depth visit the ast with the visitor function
for child in ast:
if isinstance(child, list):
visit(visitor, child)
else:
visitor(child)
instructions = bytearray([
opmap['LOAD_CONST'], 1,
opmap['EXTENDED_ARG'], 1,
opmap['LOAD_CONST'], 5,
opmap['IMPORT_NAME'], 4,
opmap['IMPORT_FROM'], 5,
opmap['STORE_FAST'], 2,
opmap['POP_TOP'], 0,
opmap['LOAD_CONST'], 1,
opmap['STORE_FAST'], 1,
opmap['LOAD_CONST'], 1,
opmap['BUILD_LIST'], 1,
opmap['EXTENDED_ARG'], 1,
opmap['LOAD_CONST'], 4,
opmap['BINARY_MULTIPLY'], 0,
opmap['STORE_FAST'], 0,
])
addresses = [] # stack with the last "[" instruction address on the top
def visitor(x):
if x == '.':
instructions.extend([
opmap['LOAD_GLOBAL'], 0,
opmap['LOAD_GLOBAL'], 3,
opmap['LOAD_FAST'], 0,
opmap['LOAD_FAST'], 1,
opmap['BINARY_SUBSCR'], 0,
opmap['CALL_FUNCTION'], 1,
opmap['EXTENDED_ARG'], 1,
opmap['LOAD_CONST'], 2,
opmap['EXTENDED_ARG'], 1,
opmap['LOAD_CONST'], 3,
opmap['CALL_FUNCTION_KW'], 2,
opmap['POP_TOP'], 0,
])
elif x == ',':
instructions.extend([
opmap['LOAD_GLOBAL'], 2,
opmap['LOAD_FAST'], 2,
opmap['LOAD_METHOD'], 6,
opmap['LOAD_CONST'], 2,
opmap['CALL_METHOD'], 1,
opmap['CALL_FUNCTION'], 1,
opmap['LOAD_FAST'], 0,
opmap['LOAD_FAST'], 1,
opmap['STORE_SUBSCR'], 0,
])
elif isinstance(x, int):
instructions.extend([
opmap['LOAD_FAST'], 0,
opmap['LOAD_FAST'], 1,
opmap['BINARY_SUBSCR'], 0,
opmap['EXTENDED_ARG'], ((x+1) >> 8) & 0xff,
opmap['LOAD_CONST'], (x+1) & 0xff,
opmap['BINARY_ADD'], 0,
opmap['EXTENDED_ARG'], 1,
opmap['LOAD_CONST'], 1,
opmap['BINARY_MODULO'], 0,
opmap['LOAD_FAST'], 0,
opmap['LOAD_FAST'], 1,
opmap['STORE_SUBSCR'], 0,
])
elif x == '<':
instructions.extend([
opmap['LOAD_FAST'], 1,
opmap['LOAD_CONST'], 2,
opmap['BINARY_SUBTRACT'], 0,
opmap['STORE_FAST'], 1,
])
elif x == '>':
instructions.extend([
opmap['LOAD_FAST'], 1,
opmap['LOAD_CONST'], 2,
opmap['BINARY_ADD'], 0,
opmap['STORE_FAST'], 1,
])
elif x == '[':
addresses.append(len(instructions))
instructions.extend([opmap['NOP'], 0] * 6)
elif x == ']':
jump = addresses.pop()
instructions.extend([
opmap['EXTENDED_ARG'], (jump >> 16) & 0xff,
opmap['EXTENDED_ARG'], (jump >> 8) & 0xff,
opmap['JUMP_ABSOLUTE'], jump & 0xff
])
l = len(instructions)
instructions[jump:jump+12] = (opmap['LOAD_FAST'], 0,
opmap['LOAD_FAST'], 1,
opmap['BINARY_SUBSCR'], 0,
opmap['EXTENDED_ARG'], (l >> 16) & 0xff,
opmap['EXTENDED_ARG'], (l >> 8) & 0xff,
opmap['POP_JUMP_IF_FALSE'], l & 0xff)
ast = parse(source)
visit(visitor, ast)
instructions.extend([ # the last instructions for every program
opmap['LOAD_CONST'], 0,
opmap['RETURN_VALUE']
])
code = CodeType(
0, # argcount
0, # kwonlyargcount
3, # nlocals
1000, # stacksize
0, # flags
bytes(instructions), # codestring
(None, *range(257), '', ('end',), arraySize, ('stdin',)), # consts
('print', 'input', 'ord', 'chr', 'sys', 'stdin', 'read'), # names
('array', 'pointer', 'stdin'), # varnames
args.outputfile, # filename
args.outputfile, # name
0, # firstlineno
bytes(), # lnotab
(), # freevars
() # cellvars
)
if args.show:
print(dis(code)) # show the bytecode in a readable format
exit(0)
with open(args.outputfile, 'wb+') as out:
# printing the first 16 bytes in the file
out.write(MAGIC_NUMBER) # this depends on the the Python version
out.write(bytes([0] * 12)) # because of the pyc file format
marshal.dump(code, out)
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment