Skip to content

Instantly share code, notes, and snippets.

@mildsunrise
Last active January 14, 2018 15:28
Show Gist options
  • Save mildsunrise/d948fe1334f36c9a0f6516e58271f936 to your computer and use it in GitHub Desktop.
Save mildsunrise/d948fe1334f36c9a0f6516e58271f936 to your computer and use it in GitHub Desktop.
Look I made a Whitespace interpreter 😊
#!/usr/bin/env python3
# Usage: ./whitespace.py <file>
# Interpreter for the whitespace language.
# https://en.wikipedia.org/wiki/Whitespace_(programming_language)
#
# Implementation details:
# - Code needn't have any particular encoding.
# - I/O character functions deal with bytes, not Unicode codepoints or anything.
# in particular, outputting a number outside [0..255] will result in mod(x, 256) being written.
# - I/O number input expects a 10-base integer followed by a linefeed: /^(-|+)?\d+\n$/
# - Heap starts initalized to zero, any integer is a valid address.
# - The empty label is valid, a number with no bits is valid.
import sys
# Define parser primitives & main parser logic
class SyntaxException(Exception):
pass
def next(f):
while True:
c = f.read(1)
if len(c) != 1: raise SyntaxException("Unexpected EOF")
if c in [b" ", b"\n", b"\t"]: return c
def read_label(f):
label = ""
while True:
c = next(f)
if c == b"\n": break
label += {b" ": "0", b"\t": "1"}[c]
return label
def read_number(f):
label = read_label(f)
sign = label[0]
number = int("0"+label[1:], 2)
if sign == "1": number *= -1
return number
class ASTNode:
def __init__(self, name, *args):
self.name = name
self.args = args
IMP_STACK = {
b" ": ASTNode("stack_push", "number"),
b"\n": { b" ": ASTNode("stack_dup"),
b"\t": ASTNode("stack_swap"),
b"\n": ASTNode("stack_pop") },
}
IMP_ARITHMETIC = {
b" ": { b" ": ASTNode("arithmetic_add"),
b"\t": ASTNode("arithmetic_sub"),
b"\n": ASTNode("arithmetic_mul") },
b"\t": { b" ": ASTNode("arithmetic_div"),
b"\t": ASTNode("arithmetic_mod") },
}
IMP_HEAP = {
b" ": ASTNode("heap_store"),
b"\t": ASTNode("heap_retrieve"),
}
IMP_FLOW_CONTROL = {
b" ": { b" ": ASTNode("flow_label", "label"),
b"\t": ASTNode("flow_call", "label"),
b"\n": ASTNode("flow_jump", "label") },
b"\t": { b" ": ASTNode("flow_jump_if_zero", "label"),
b"\t": ASTNode("flow_jump_if_neg", "label"),
b"\n": ASTNode("flow_return") },
b"\n": { b"\n": ASTNode("flow_exit") },
}
IMP_IO = {
b" ": { b" ": ASTNode("io_output_char"),
b"\t": ASTNode("io_output_number") },
b"\t": { b" ": ASTNode("io_input_char"),
b"\t": ASTNode("io_input_number") },
}
ROOT = {
b" ": IMP_STACK,
b"\t": { b" ": IMP_ARITHMETIC,
b"\t": IMP_HEAP,
b"\n": IMP_IO },
b"\n": IMP_FLOW_CONTROL,
}
def parse_ast_node(f, c):
# parse the command
command = b""
node = ROOT
while True:
command += c
assert type(node) is dict
if c not in node:
raise SyntaxException("Unknown command %r" % (command,))
node = node[c]
if type(node) is ASTNode: break
c = next(f)
# parse the parameters
args = [ { "number": read_number, "label": read_label }[argtype](f) for argtype in node.args ]
return { "name": node.name, "args": args }
def parse_code(f):
ast = []
while True:
try:
c = next(f)
except SyntaxException as e:
break # EOF
ast.append(parse_ast_node(f, c))
return ast
def verify_collect_labels(ast):
labels = filter(lambda x: x[1]["name"] == "flow_label", enumerate(ast))
return { node["args"][0]: i for i, node in labels }
# FIXME: method to analyze unknown label references, duplicates and other things statically
# VM primitives, state & operations
class RuntimeException(Exception):
pass
class Stack:
def __init__(self):
self.s = []
def push(self, x):
assert type(x) is int
self.s.append(x)
def pop(self):
if not len(self.s):
raise RuntimeException("Stack has no elements!")
return self.s.pop()
class VM:
def __init__(self, ast, output=sys.stdout.buffer, input=sys.stdin.buffer):
self.ast = ast
self.labels = verify_collect_labels(ast)
self.stack = Stack()
self.call_stack = []
self.heap = {}
self.current = 0
self.exited = False
self.output = output
self.input = input
def jump_to_label(self, label):
if label not in self.labels:
raise RuntimeException("Unknown label %s" % (label,))
self.current = self.labels[label]
def step(self):
""" Load next instruction and call corresponding handler. """
if not (0 <= self.current < len(self.ast)):
raise RuntimeException("Outside code, no instruction to execute")
node = self.ast[self.current]
self.current += 1
return getattr(self, "op_" + node["name"])(*node["args"])
def op_stack_push(self, x):
self.stack.push(x)
def op_stack_dup(self):
x = self.stack.pop()
self.stack.push(x)
self.stack.push(x)
def op_stack_swap(self):
x = self.stack.pop()
y = self.stack.pop()
self.stack.push(x)
self.stack.push(y)
def op_stack_pop(self):
self.stack.pop()
def op_arithmetic_add(self):
right = self.stack.pop()
left = self.stack.pop()
self.stack.push(left + right)
def op_arithmetic_sub(self):
right = self.stack.pop()
left = self.stack.pop()
self.stack.push(left - right)
def op_arithmetic_mul(self):
right = self.stack.pop()
left = self.stack.pop()
self.stack.push(left * right)
def op_arithmetic_div(self):
right = self.stack.pop()
left = self.stack.pop()
self.stack.push(left / right)
def op_arithmetic_mod(self):
right = self.stack.pop()
left = self.stack.pop()
self.stack.push(left % right)
def op_heap_store(self):
value = self.stack.pop()
addr = self.stack.pop()
self.heap[addr] = value
def op_heap_retrieve(self):
addr = self.stack.pop()
self.stack.push(self.heap[addr] if addr in self.heap else 0)
def op_flow_label(self, label):
pass
def op_flow_jump(self, label):
self.jump_to_label(label)
def op_flow_jump_if_zero(self, label):
if self.stack.pop() == 0: self.jump_to_label(label)
def op_flow_jump_if_negative(self, label):
if self.stack.pop() < 0: self.jump_to_label(label)
def op_flow_call(self, label):
self.call_stack.append({ "pos": self.current, "label": label })
self.jump_to_label(label)
def op_flow_return(self):
if not len(call_stack):
raise RuntimeException("No routine to end!")
self.current = self.call_stack.pop()["pos"]
def op_flow_exit(self):
self.exited = True
def op_io_input_char(self):
value = self.input.read(1)
if not len(value): raise RuntimeException("EOF when reading from input")
self.stack.push(value[0])
def op_io_input_number(self):
value = self.input.readline() # TODO: stricter checking
if not len(value): raise RuntimeException("EOF when reading from input")
self.stack.push(int(value.strip(), 10))
def op_io_output_char(self):
value = self.stack.pop()
self.output.write(bytes([ value % 256 ]))
def op_io_output_number(self):
value = self.stack.pop()
self.output.write(str(value).encode("ascii"))
# Main code: parse file and run it
if __name__ == "__main__":
with open(sys.argv[1], "rb") as f:
ast = parse_code(f)
# FIXME: if there's a parsing error, print offset. flag to print AST
vm = VM(ast)
while not vm.exited:
vm.step()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment