Last active
January 14, 2018 15:28
-
-
Save mildsunrise/d948fe1334f36c9a0f6516e58271f936 to your computer and use it in GitHub Desktop.
Look I made a Whitespace interpreter π
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Usage: ./whitespace.py <file> | |
# Interpreter for the whitespace language. | |
# https://en.wikipedia.org/wiki/Whitespace_(programming_language) | |
# | |
# Implementation details: | |
# - Code needn't have any particular encoding. | |
# - I/O character functions deal with bytes, not Unicode codepoints or anything. | |
# in particular, outputting a number outside [0..255] will result in mod(x, 256) being written. | |
# - I/O number input expects a 10-base integer followed by a linefeed: /^(-|+)?\d+\n$/ | |
# - Heap starts initalized to zero, any integer is a valid address. | |
# - The empty label is valid, a number with no bits is valid. | |
import sys | |
# Define parser primitives & main parser logic | |
class SyntaxException(Exception): | |
pass | |
def next(f): | |
while True: | |
c = f.read(1) | |
if len(c) != 1: raise SyntaxException("Unexpected EOF") | |
if c in [b" ", b"\n", b"\t"]: return c | |
def read_label(f): | |
label = "" | |
while True: | |
c = next(f) | |
if c == b"\n": break | |
label += {b" ": "0", b"\t": "1"}[c] | |
return label | |
def read_number(f): | |
label = read_label(f) | |
sign = label[0] | |
number = int("0"+label[1:], 2) | |
if sign == "1": number *= -1 | |
return number | |
class ASTNode: | |
def __init__(self, name, *args): | |
self.name = name | |
self.args = args | |
IMP_STACK = { | |
b" ": ASTNode("stack_push", "number"), | |
b"\n": { b" ": ASTNode("stack_dup"), | |
b"\t": ASTNode("stack_swap"), | |
b"\n": ASTNode("stack_pop") }, | |
} | |
IMP_ARITHMETIC = { | |
b" ": { b" ": ASTNode("arithmetic_add"), | |
b"\t": ASTNode("arithmetic_sub"), | |
b"\n": ASTNode("arithmetic_mul") }, | |
b"\t": { b" ": ASTNode("arithmetic_div"), | |
b"\t": ASTNode("arithmetic_mod") }, | |
} | |
IMP_HEAP = { | |
b" ": ASTNode("heap_store"), | |
b"\t": ASTNode("heap_retrieve"), | |
} | |
IMP_FLOW_CONTROL = { | |
b" ": { b" ": ASTNode("flow_label", "label"), | |
b"\t": ASTNode("flow_call", "label"), | |
b"\n": ASTNode("flow_jump", "label") }, | |
b"\t": { b" ": ASTNode("flow_jump_if_zero", "label"), | |
b"\t": ASTNode("flow_jump_if_neg", "label"), | |
b"\n": ASTNode("flow_return") }, | |
b"\n": { b"\n": ASTNode("flow_exit") }, | |
} | |
IMP_IO = { | |
b" ": { b" ": ASTNode("io_output_char"), | |
b"\t": ASTNode("io_output_number") }, | |
b"\t": { b" ": ASTNode("io_input_char"), | |
b"\t": ASTNode("io_input_number") }, | |
} | |
ROOT = { | |
b" ": IMP_STACK, | |
b"\t": { b" ": IMP_ARITHMETIC, | |
b"\t": IMP_HEAP, | |
b"\n": IMP_IO }, | |
b"\n": IMP_FLOW_CONTROL, | |
} | |
def parse_ast_node(f, c): | |
# parse the command | |
command = b"" | |
node = ROOT | |
while True: | |
command += c | |
assert type(node) is dict | |
if c not in node: | |
raise SyntaxException("Unknown command %r" % (command,)) | |
node = node[c] | |
if type(node) is ASTNode: break | |
c = next(f) | |
# parse the parameters | |
args = [ { "number": read_number, "label": read_label }[argtype](f) for argtype in node.args ] | |
return { "name": node.name, "args": args } | |
def parse_code(f): | |
ast = [] | |
while True: | |
try: | |
c = next(f) | |
except SyntaxException as e: | |
break # EOF | |
ast.append(parse_ast_node(f, c)) | |
return ast | |
def verify_collect_labels(ast): | |
labels = filter(lambda x: x[1]["name"] == "flow_label", enumerate(ast)) | |
return { node["args"][0]: i for i, node in labels } | |
# FIXME: method to analyze unknown label references, duplicates and other things statically | |
# VM primitives, state & operations | |
class RuntimeException(Exception): | |
pass | |
class Stack: | |
def __init__(self): | |
self.s = [] | |
def push(self, x): | |
assert type(x) is int | |
self.s.append(x) | |
def pop(self): | |
if not len(self.s): | |
raise RuntimeException("Stack has no elements!") | |
return self.s.pop() | |
class VM: | |
def __init__(self, ast, output=sys.stdout.buffer, input=sys.stdin.buffer): | |
self.ast = ast | |
self.labels = verify_collect_labels(ast) | |
self.stack = Stack() | |
self.call_stack = [] | |
self.heap = {} | |
self.current = 0 | |
self.exited = False | |
self.output = output | |
self.input = input | |
def jump_to_label(self, label): | |
if label not in self.labels: | |
raise RuntimeException("Unknown label %s" % (label,)) | |
self.current = self.labels[label] | |
def step(self): | |
""" Load next instruction and call corresponding handler. """ | |
if not (0 <= self.current < len(self.ast)): | |
raise RuntimeException("Outside code, no instruction to execute") | |
node = self.ast[self.current] | |
self.current += 1 | |
return getattr(self, "op_" + node["name"])(*node["args"]) | |
def op_stack_push(self, x): | |
self.stack.push(x) | |
def op_stack_dup(self): | |
x = self.stack.pop() | |
self.stack.push(x) | |
self.stack.push(x) | |
def op_stack_swap(self): | |
x = self.stack.pop() | |
y = self.stack.pop() | |
self.stack.push(x) | |
self.stack.push(y) | |
def op_stack_pop(self): | |
self.stack.pop() | |
def op_arithmetic_add(self): | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.push(left + right) | |
def op_arithmetic_sub(self): | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.push(left - right) | |
def op_arithmetic_mul(self): | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.push(left * right) | |
def op_arithmetic_div(self): | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.push(left / right) | |
def op_arithmetic_mod(self): | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.push(left % right) | |
def op_heap_store(self): | |
value = self.stack.pop() | |
addr = self.stack.pop() | |
self.heap[addr] = value | |
def op_heap_retrieve(self): | |
addr = self.stack.pop() | |
self.stack.push(self.heap[addr] if addr in self.heap else 0) | |
def op_flow_label(self, label): | |
pass | |
def op_flow_jump(self, label): | |
self.jump_to_label(label) | |
def op_flow_jump_if_zero(self, label): | |
if self.stack.pop() == 0: self.jump_to_label(label) | |
def op_flow_jump_if_negative(self, label): | |
if self.stack.pop() < 0: self.jump_to_label(label) | |
def op_flow_call(self, label): | |
self.call_stack.append({ "pos": self.current, "label": label }) | |
self.jump_to_label(label) | |
def op_flow_return(self): | |
if not len(call_stack): | |
raise RuntimeException("No routine to end!") | |
self.current = self.call_stack.pop()["pos"] | |
def op_flow_exit(self): | |
self.exited = True | |
def op_io_input_char(self): | |
value = self.input.read(1) | |
if not len(value): raise RuntimeException("EOF when reading from input") | |
self.stack.push(value[0]) | |
def op_io_input_number(self): | |
value = self.input.readline() # TODO: stricter checking | |
if not len(value): raise RuntimeException("EOF when reading from input") | |
self.stack.push(int(value.strip(), 10)) | |
def op_io_output_char(self): | |
value = self.stack.pop() | |
self.output.write(bytes([ value % 256 ])) | |
def op_io_output_number(self): | |
value = self.stack.pop() | |
self.output.write(str(value).encode("ascii")) | |
# Main code: parse file and run it | |
if __name__ == "__main__": | |
with open(sys.argv[1], "rb") as f: | |
ast = parse_code(f) | |
# FIXME: if there's a parsing error, print offset. flag to print AST | |
vm = VM(ast) | |
while not vm.exited: | |
vm.step() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment