|
#!/usr/bin/env python3 |
|
|
|
# ----------------------------------------------------------------------------------------- |
|
# PBFI: Pythonic BrainFuck Interpreter |
|
# |
|
# @author: Ahmad Alhour (http://aalhour.com) |
|
# @description: BrainFuck Interpreter and REPL in less than 500 lines of Python 3 |
|
# @license: MIT (https://opensource.org/licenses/MIT) |
|
# |
|
# @usage: |
|
# ./brainfuck --help Prints help and exits |
|
# ./brainfuck --repl Opens REPL |
|
# ./brainfuck --program ~/some_program.bf Runs code in ~/some_program.bf |
|
# ./brainfuck --code "++++++[>++++++++++<-]>+++++." Evalues a piece of code |
|
# ----------------------------------------------------------------------------------------- |
|
|
|
# std |
|
import sys |
|
import subprocess |
|
import argparse |
|
|
|
|
|
# ######################################################################################### |
|
# # |
|
# TOKENS # |
|
# # |
|
# ######################################################################################### |
|
# Reads a single input character into the current cell. |
|
READ_CMD = "READ_CMD" |
|
|
|
# Prints the ASCII value at the current cell (i.e. 65 = 'A'). |
|
PRINT_CMD = "PRINT_CMD" |
|
|
|
# Increments the value at the current cell by one. |
|
INC_CELL_CMD = "INC_CELL_CMD" |
|
|
|
# Decrements the value at the current cell by one. |
|
DEC_CELL_CMD = "DEC_CELL_CMD" |
|
|
|
# Moves the data pointer to the next cell (cell on the right). |
|
MOV_PTR_NEXT_CMD = "MOV_PTR_NEXT_CMD" |
|
|
|
# Moves the data pointer to the previous cell (cell on the left). |
|
MOV_PTR_PREV_CMD = "MOV_PTR_PREV_CMD" |
|
|
|
# If the value at the current cell is zero, skips to the corresponding ] . |
|
# Otherwise, move to the next instruction. |
|
BEGIN_LOOP_CMD = "BEGIN_LOOP_CMD" |
|
|
|
# If the value at the current cell is zero, move to the next instruction. |
|
# Otherwise, move backwards in the instructions to the corresponding [ . |
|
END_LOOP_CMD = "END_LOOP_CMD" |
|
|
|
# A matched [ and ] |
|
LOOP_CMD = "LOOP" |
|
|
|
# END OF FILE |
|
EOF = "EOF" |
|
|
|
# Fast lookup tables: Symbol to Token Type mapping, and its reversed |
|
sym2tok = { |
|
",": READ_CMD, |
|
".": PRINT_CMD, |
|
"+": INC_CELL_CMD, |
|
"-": DEC_CELL_CMD, |
|
"<": MOV_PTR_PREV_CMD, |
|
">": MOV_PTR_NEXT_CMD, |
|
"[": BEGIN_LOOP_CMD, |
|
"]": END_LOOP_CMD |
|
} |
|
|
|
tok2sym = {value: key for key, value in sym2tok.items()} |
|
|
|
|
|
# ######################################################################################### |
|
# # |
|
# LAZY LEXER # |
|
# # |
|
# ######################################################################################### |
|
class BrainFuckToken(object): |
|
""" |
|
The BrainFuck Token class. |
|
""" |
|
def __init__(self, ttype, tvalue): |
|
self.type = ttype |
|
self.value = tvalue |
|
|
|
def __str__(self): |
|
return "BrainFuckToken({0}, \"{1}\")".format(self.type, self.value) |
|
|
|
def __repr__(self): |
|
return self.__str__() |
|
|
|
|
|
class Lexer(object): |
|
def __init__(self, input_text=None): |
|
""" |
|
Initializer. |
|
""" |
|
self.curr_pos = 0 |
|
self.curr_char = None |
|
self.input_text = input_text if input_text and isinstance(input_text, str) else "" |
|
|
|
def scan(self, input_text): |
|
""" |
|
Initilizes the lexer with an input text for scanning. |
|
""" |
|
self.curr_pos = 0 |
|
self.curr_char = None |
|
self.input_text = input_text |
|
|
|
if not input_text or not isinstance(input_text, str): |
|
raise ValueError("Input text must be a non-empty string!") |
|
else: |
|
self.curr_char = input_text[0] |
|
|
|
return BrainFuckToken(EOF, None) |
|
|
|
def advance(self): |
|
""" |
|
Advances the lexer pointer ahead and assigns the next character to self.curr_char. |
|
""" |
|
if not self.input_text or not isinstance(self.input_text, str): |
|
raise("Please initialize the input text with the scan method first!") |
|
|
|
self.curr_pos += 1 |
|
|
|
if self.curr_pos >= len(self.input_text): |
|
self.curr_char = None |
|
else: |
|
self.curr_char = self.input_text[self.curr_pos] |
|
|
|
def next_token(self): |
|
""" |
|
Returns a Token representation of the next valid token in the tape or EOF. |
|
""" |
|
if not self.input_text or not isinstance(self.input_text, str): |
|
raise("Please initialize the input text with the scan method first!") |
|
|
|
while self.curr_char is not None: |
|
value = self.curr_char |
|
if value in sym2tok: |
|
self.advance() |
|
return BrainFuckToken(ttype=sym2tok[value], tvalue=value) |
|
else: |
|
self.advance() |
|
# EOF |
|
return BrainFuckToken(EOF, None) |
|
|
|
|
|
|
|
# ######################################################################################### |
|
# # |
|
# PARSER # |
|
# # |
|
# ######################################################################################### |
|
class Parser(object): |
|
""" |
|
BrainFuck Grammar Parser. |
|
""" |
|
def __init__(self, lexer=None): |
|
""" |
|
Initializer. |
|
""" |
|
if lexer is None: |
|
self._lexer = Lexer() |
|
elif isinstance(lexer, Lexer): |
|
self._lexer = lexer |
|
else: |
|
raise("Lexer should be of type `Lexer` or None. If it's None, it will be created automatically.") |
|
self.curr_token = None |
|
|
|
def set_lexer(self, new_lexer): |
|
""" |
|
Sets internal lexer to a new instance. |
|
""" |
|
if not isinstance(new_lexer, Lexer): |
|
raise("Lexer can only be of type `Lexer`.") |
|
self._lexer = new_lexer |
|
|
|
def parse(self, input_text): |
|
""" |
|
Parser entry point. Parses the complete BrainFuck Language. |
|
|
|
program : while_loop (program)* |
|
| terminal_command (program)* |
|
|
|
while_loop : BEGIN_LOOP_CMD program END_LOOP_CMD |
|
|
|
terminal_command : READ_CMD |
|
| PRINT_CMD |
|
| INC_CELL_CMD |
|
| DEC_CELL_CMD |
|
| MOV_PTR_NEXT_CMD |
|
| MOV_PTR_PREV_CMD |
|
""" |
|
# Scan the input text |
|
self._lexer.scan(input_text) |
|
self.curr_token = self._lexer.next_token() |
|
|
|
# Begin parsing |
|
cmd_node = self._program() |
|
if self.curr_token.type != EOF: |
|
self.error(EOF) |
|
|
|
# Return AST |
|
return cmd_node |
|
|
|
def error(self, expected_token_type=None): |
|
""" |
|
Error reporting. |
|
""" |
|
if expected_token_type: |
|
msg = 'Invalid syntax at {!r}. Expected: {!r}'.format( |
|
self.curr_token, expected_token_type) |
|
else: |
|
msg = 'Invalid syntax at {!r}.'.format(self.curr_token) |
|
|
|
raise Exception(msg) |
|
|
|
def consume(self, token_type): |
|
""" |
|
Checked tokens consumption. |
|
""" |
|
if self.curr_token is None: |
|
raise ValueError("Please run parse method with an input program text first!") |
|
|
|
if self.curr_token.type == token_type: |
|
self.curr_token = self._lexer.next_token() |
|
else: |
|
self.error(token_type) |
|
|
|
# ############################## PRIVATE ########################### |
|
|
|
def _program(self): |
|
""" |
|
program : while_loop (program)* |
|
| terminal_command (program)* |
|
""" |
|
if self.curr_token is None: |
|
raise ValueError("Please run parse method with an input program text first!") |
|
|
|
ast = [] |
|
token_types = tok2sym.keys() |
|
|
|
while self.curr_token.type in token_types: |
|
token = self.curr_token |
|
|
|
# Parse while_loop |
|
if token.type == BEGIN_LOOP_CMD: |
|
cmd_node = self._while_loop() |
|
elif token.type == END_LOOP_CMD: |
|
return ast |
|
# Parse terminal_command |
|
else: |
|
cmd_node = self._terminal_command() |
|
|
|
# Append to AST (list of commands) |
|
ast.append(cmd_node) |
|
|
|
return ast |
|
|
|
def _while_loop(self): |
|
""" |
|
while_loop : BEGIN_LOOP_CMD program END_LOOP_CMD |
|
""" |
|
if self.curr_token is None: |
|
raise ValueError("Please run parse method with an input program text first!") |
|
|
|
self.consume(BEGIN_LOOP_CMD) |
|
cmd_node = (LOOP_CMD, self._program()) |
|
self.consume(END_LOOP_CMD) |
|
return cmd_node |
|
|
|
def _terminal_command(self): |
|
""" |
|
terminal_command : READ_CMD |
|
| PRINT_CMD |
|
| INC_CELL_CMD |
|
| DEC_CELL_CMD |
|
| MOV_PTR_NEXT_CMD |
|
| MOV_PTR_PREV_CMD |
|
""" |
|
if self.curr_token is None: |
|
raise ValueError("Please run parse method with an input program text first!") |
|
|
|
token = self.curr_token |
|
cmd_node = (token.type,) |
|
self.consume(token.type) |
|
return cmd_node |
|
|
|
|
|
|
|
# ######################################################################################### |
|
# # |
|
# INTERPRETER and TAPE # |
|
# # |
|
# ######################################################################################### |
|
class BrainFuckTape(object): |
|
""" |
|
The BrainFuck Tape Data Structure. |
|
Implemented internally as a dictionary for on-demand allocation of values |
|
at specific indices. |
|
""" |
|
@property |
|
def MAX_INDEX(self): |
|
return 30000 |
|
|
|
def __init__(self): |
|
self.map = {} |
|
|
|
def __getitem__(self, index): |
|
# return 0 if index was not in self.map |
|
return self.map.get(index, 0) |
|
|
|
def __setitem__(self, index, value): |
|
self.map[index] = value |
|
|
|
|
|
### |
|
# GLOBAL TAPE STATES |
|
global_tape_index = 0 |
|
global_tape = BrainFuckTape() |
|
|
|
|
|
def bf_eval(commands_ast): |
|
""" |
|
BrainFuck AST Nodes Evaluator. |
|
|
|
Evaluates every node in a given AST representation of a BrainFuck program. |
|
The AST is expected to be a linear collection of tuples. Every tuple's first |
|
item is the type of the command. Only Loop Commands contain two items, where |
|
the second item is another linear collection of commands. |
|
""" |
|
global global_tape |
|
global global_tape_index |
|
|
|
for cmd_node in commands_ast: |
|
if not cmd_node: |
|
return None |
|
# Move pointer to previous cell |
|
elif cmd_node[0] == MOV_PTR_PREV_CMD: |
|
if global_tape_index > 0: |
|
global_tape_index -= 1 |
|
else: |
|
raise Exception("Error: index out of bound! Min allowed index value is 0.") |
|
# Move pointer to next cell |
|
elif cmd_node[0] == MOV_PTR_NEXT_CMD: |
|
if global_tape_index < global_tape.MAX_INDEX: |
|
global_tape_index += 1 |
|
else: |
|
raise Exception("Error: index out of bound! Max allowed index value is {}".format( |
|
global_tape.MAX_INDEX)) |
|
# Print the current cell's contents |
|
elif cmd_node[0] == PRINT_CMD: |
|
cell_val = global_tape[global_tape_index] |
|
print(chr(cell_val), sep='', end='') |
|
# Read input to current cell |
|
elif cmd_node[0] == READ_CMD: |
|
str_input = str(input()) |
|
if len(str_input) > 0: |
|
global_tape[global_tape_index] = ord(str_input[0]) |
|
# Increment the contents of current cell |
|
elif cmd_node[0] == INC_CELL_CMD: |
|
global_tape[global_tape_index] += 1 |
|
# Decrement the contents of current cell |
|
elif cmd_node[0] == DEC_CELL_CMD: |
|
global_tape[global_tape_index] -= 1 |
|
# Loop until the current cells contents are equal to 0 |
|
elif cmd_node[0] == LOOP_CMD: |
|
while global_tape[global_tape_index] != 0: |
|
bf_eval(cmd_node[1]) |
|
|
|
|
|
def bf_interpret(program_text): |
|
""" |
|
BrainFuck Program Text Interpreter. |
|
""" |
|
try: |
|
brainfuck_parser = Parser() |
|
bfcode = bfcode = brainfuck_parser.parse(program_text) |
|
bf_eval(bfcode) |
|
except (KeyboardInterrupt, SystemExit): |
|
print("\r\nReceived KeyboardInterrupt, stopping...") |
|
print("Goodbye!") |
|
sys.exit(0) |
|
except Exception as err: |
|
raise |
|
|
|
|
|
def bf_repl(): |
|
""" |
|
BrainFuck REPL. |
|
""" |
|
print("BrainFuck on Python!") |
|
print("Type :clear to clear the screen. Type :quit to exit REPL.") |
|
|
|
# Initialize the parser |
|
brainfuck_parser = Parser() |
|
|
|
while True: |
|
try: |
|
program = input('BrainFuck> ') |
|
except (EOFError, KeyboardInterrupt, SystemExit): |
|
break |
|
|
|
if not program: |
|
continue |
|
|
|
if program == ":clear": |
|
subprocess.call("clear") |
|
elif program == ":quit": |
|
break |
|
else: |
|
try: |
|
bf_code = brainfuck_parser.parse(program_text) |
|
bf_eval(bf_code) |
|
print() |
|
except (KeyboardInterrupt, SystemExit): |
|
print("\r\nReceived KeyboardInterrupt, stopping...") |
|
break |
|
except Exception as err: |
|
print("An error has occurred: {!r}".format(err)) |
|
|
|
print("\r\nGoodbye!") |
|
|
|
|
|
|
|
# ######################################################################################### |
|
# # |
|
# MAIN PROGRAM ENTRY # |
|
# # |
|
# ######################################################################################### |
|
def create_argparser(): |
|
arg_parser = argparse.ArgumentParser(prog="brainfuck") |
|
|
|
# --repl argument |
|
arg_parser.add_argument( |
|
"--repl", |
|
action="store_true", default=False, |
|
help="BrainFuck REPL: interactive mode.") |
|
|
|
# --program argument |
|
arg_parser.add_argument( |
|
"--program", |
|
type=str, nargs=1, default=None, |
|
help="Path to a BrainFuck program file, for example: ./brainfuck --program ~/hello_world.bf") |
|
|
|
# --code argument |
|
arg_parser.add_argument( |
|
"--code", |
|
type=str, nargs=1, default=None, |
|
help="A double-quoted BrainFuck source code program, for example: ./brainfuck --code \"++++ ++++\"") |
|
|
|
return arg_parser |
|
|
|
|
|
def main(): |
|
arg_parser = create_argparser() |
|
args = arg_parser.parse_args() |
|
|
|
# First, --repl has the highest priority |
|
if args.repl is True: |
|
bf_repl() |
|
# Second, --program has the second highest priority. |
|
elif args.program is not None and len(args.program) > 0: |
|
program_code = "" |
|
try: |
|
with open(args.program[0], mode="r", encoding="utf-8") as file: |
|
program_code = file.read() |
|
except (IOError, FileNotFoundError): |
|
print("Error: file was not found!") |
|
sys.exit(1) |
|
bf_interpret(program_code) |
|
print("") |
|
# Third, --code has the lowest priority. |
|
elif args.code is not None and len(args.code) > 0: |
|
bf_interpret(args.code[0]) |
|
print("") |
|
# Nothing was specified, print help. |
|
else: |
|
arg_parser.print_help() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
Hey! Cool example. It was a while back, but if you'd like to see another implementation of Brainfuck in Python 3, you can see mine and compare 😄