Last active
November 10, 2022 07:26
-
-
Save tekknolagi/b08b97fb532b52c32e58e706e9378cfd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
PYPY=~/Downloads/pypy-c-jit-106258-39dc1c343c50-linux64 | |
PYTHONPATH="${PYPY}:${PYPY}/pypy" \ | |
"${PYPY}"/bin/pypy \ | |
"${PYPY}"/pypy/rpython \ | |
-O2 --batch targetlox.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
PYPY=~/Downloads/pypy-c-jit-106258-39dc1c343c50-linux64 | |
PYTHONPATH="${PYPY}:${PYPY}/pypy" \ | |
"${PYPY}"/bin/pypy \ | |
"${PYPY}"/pypy/rpython \ | |
-O0 --batch --lldebug targetlox.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun fact(n) { | |
if (n < 2) { | |
return n; | |
} | |
return n * fact(n - 1); | |
} | |
print fact(5); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun fib(n) { | |
if (n < 2) return n; | |
return fib(n - 2) + fib(n - 1); | |
} | |
var start = clock(); | |
print fib(35); | |
print clock() - start; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for (var x = 0; x < 10; x = x + 1) { | |
print x; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun foo(a, b, c) { | |
var x = a + b + c; | |
print x; | |
} | |
print foo(1,2,3); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var x = false; | |
var y = true; | |
if (x or y) { | |
print "x is true!"; | |
} else { | |
print "x is false!"; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun a() { b(); } | |
fun b() { c(); } | |
fun c() { | |
c("too", "many"); | |
} | |
a(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun sub(l, r) { | |
var x = 1; | |
var y = 2; | |
var z = 3; | |
return l - r; | |
} | |
print sub(10, 4); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Continue at https://www.craftinginterpreters.com/calls-and-functions.html#native-functions | |
import time | |
from rpython.rlib.parsing.regexparse import parse_regex | |
from rpython.rlib.parsing.lexer import Lexer, Token, SourcePos | |
( | |
OP_CONSTANT, | |
OP_NIL, | |
OP_TRUE, | |
OP_FALSE, | |
OP_EQUAL, | |
OP_GREATER, | |
OP_LESS, | |
OP_ADD, | |
OP_SUBTRACT, | |
OP_MULTIPLY, | |
OP_DIVIDE, | |
OP_NOT, | |
OP_NEGATE, | |
OP_PRINT, | |
OP_POP, | |
OP_GET_LOCAL, | |
OP_SET_LOCAL, | |
OP_GET_GLOBAL, | |
OP_DEFINE_GLOBAL, | |
OP_SET_GLOBAL, | |
OP_JUMP, | |
OP_JUMP_IF_FALSE, | |
OP_LOOP, | |
OP_CALL, | |
OP_CLOSURE, | |
OP_RETURN, | |
) = xrange(26) | |
class Value: | |
pass | |
class NilType(Value): | |
def __repr__(self): | |
return "nil" | |
NilValue = NilType() | |
class BoolValue(Value): | |
def __init__(self, value): | |
self.value = value | |
def __repr__(self): | |
return "true" if self.value else "false" | |
TrueValue = BoolValue(True) | |
FalseValue = BoolValue(False) | |
class NumberValue(Value): | |
def __init__(self, value): | |
self.value = value | |
def __repr__(self): | |
return str(self.value) | |
class Object(Value): | |
pass | |
class String(Object): | |
def __init__(self, value): | |
self.value = value | |
def __repr__(self): | |
return str(self.value) | |
class Function(Object): | |
def __init__(self, arity, chunk, name): | |
self.arity = arity | |
self.chunk = chunk | |
self._name = name | |
@staticmethod | |
def script(): | |
return Function(arity=-1, chunk=Chunk(), name=None) | |
def name(self): | |
return self._name or "<script>" | |
def __repr__(self): | |
return "<fn %s>" % self.name() | |
class NativeFunction(Object): | |
def __init__(self, function): | |
self.function = function | |
def __repr__(self): | |
return "<native fn>" | |
class Closure(Object): | |
def __init__(self, function): | |
self.function = function | |
def __repr__(self): | |
return "<closure %s>" % self.function | |
def print_value(value): | |
print value.__repr__() | |
def left_pad(string, width, char=" "): | |
l = len(string) | |
if l > width: | |
return string | |
return char * (width - l) + string | |
def right_pad(string, width, char=" "): | |
l = len(string) | |
if l > width: | |
return string | |
return string + char * (width - l) | |
class Chunk: | |
def __init__(self): | |
self.code = [] | |
self.constants = [] | |
self.lines = [] | |
def write(self, byte, line): | |
self.code.append(byte) | |
self.lines.append(line) | |
def add_constant(self, value): | |
self.constants.append(value) | |
return len(self.constants) - 1 | |
def disassemble(self, name): | |
print "==", name, "==" | |
offset = 0 | |
while offset < len(self.code): | |
offset = self.disassemble_instruction(offset) | |
def disassemble_instruction(self, offset): | |
# TODO(max): Implement %04d | |
WIDTH = 4 | |
print left_pad("%d" % offset, WIDTH), | |
if offset > 0 and self.lines[offset] == self.lines[offset - 1]: | |
print " " * WIDTH, | |
else: | |
print left_pad("%d" % self.lines[offset], WIDTH, " "), | |
instruction = self.code[offset] | |
if instruction == OP_CONSTANT: | |
return self.disassemble_constant("OP_CONSTANT", offset) | |
if instruction == OP_GET_GLOBAL: | |
return self.disassemble_constant("OP_GET_GLOBAL", offset) | |
if instruction == OP_DEFINE_GLOBAL: | |
return self.disassemble_constant("OP_DEFINE_GLOBAL", offset) | |
if instruction == OP_SET_GLOBAL: | |
return self.disassemble_constant("OP_SET_GLOBAL", offset) | |
if instruction == OP_NIL: | |
return self.disassemble_simple("OP_NIL", offset) | |
if instruction == OP_TRUE: | |
return self.disassemble_simple("OP_TRUE", offset) | |
if instruction == OP_FALSE: | |
return self.disassemble_simple("OP_FALSE", offset) | |
if instruction == OP_EQUAL: | |
return self.disassemble_simple("OP_EQUAL", offset) | |
if instruction == OP_GREATER: | |
return self.disassemble_simple("OP_GREATER", offset) | |
if instruction == OP_LESS: | |
return self.disassemble_simple("OP_LESS", offset) | |
if instruction == OP_ADD: | |
return self.disassemble_simple("OP_ADD", offset) | |
if instruction == OP_SUBTRACT: | |
return self.disassemble_simple("OP_SUBTRACT", offset) | |
if instruction == OP_MULTIPLY: | |
return self.disassemble_simple("OP_MULTIPLY", offset) | |
if instruction == OP_DIVIDE: | |
return self.disassemble_simple("OP_DIVIDE", offset) | |
if instruction == OP_NOT: | |
return self.disassemble_simple("OP_NOT", offset) | |
if instruction == OP_NEGATE: | |
return self.disassemble_simple("OP_NEGATE", offset) | |
if instruction == OP_PRINT: | |
return self.disassemble_simple("OP_PRINT", offset) | |
if instruction == OP_JUMP: | |
return self.disassemble_jump("OP_JUMP", 1, offset) | |
if instruction == OP_JUMP_IF_FALSE: | |
return self.disassemble_jump("OP_JUMP_IF_FALSE", 1, offset) | |
if instruction == OP_LOOP: | |
return self.disassemble_jump("OP_LOOP", -1, offset) | |
if instruction == OP_CALL: | |
return self.disassemble_byte("OP_CALL", offset) | |
if instruction == OP_POP: | |
return self.disassemble_simple("OP_POP", offset) | |
if instruction == OP_CLOSURE: | |
offset += 1 | |
constant = self.code[offset] | |
offset += 1 | |
print self.pad_instruction("OP_CLOSURE"), | |
print self.pad_constant(constant), | |
print "(%s)" % self.constants[constant].__repr__() | |
return offset | |
if instruction == OP_RETURN: | |
return self.disassemble_simple("OP_RETURN", offset) | |
if instruction == OP_GET_LOCAL: | |
return self.disassemble_byte("OP_GET_LOCAL", offset) | |
if instruction == OP_SET_LOCAL: | |
return self.disassemble_byte("OP_SET_LOCAL", offset) | |
raise RuntimeError("unknown instruction %d") | |
def disassemble_simple(self, name, offset): | |
print name | |
return offset + 1 | |
def pad_instruction(self, name): | |
INSN_WIDTH = 20 | |
return right_pad(name, INSN_WIDTH) | |
def pad_constant(self, const): | |
CONST_WIDTH = 4 | |
return left_pad(str(const), CONST_WIDTH, "0") | |
def disassemble_constant(self, name, offset): | |
idx = self.code[offset + 1] | |
print self.pad_instruction(name), self.pad_constant(idx), "(%s)" % self.constants[idx].__repr__() | |
return offset + 2 | |
def disassemble_byte(self, name, offset): | |
slot = self.code[offset + 1] | |
print self.pad_instruction(name), self.pad_constant(slot) | |
return offset + 2 | |
def disassemble_jump(self, name, sign, offset): | |
# must match JUMP_TARGET_SIZE | |
target_offset = (self.code[offset + 1] << 8) | self.code[offset + 2] | |
jump_end = offset + 3 | |
print self.pad_instruction(name), self.pad_constant(offset), " -> ", self.pad_constant(jump_end + sign * target_offset) | |
return jump_end | |
class Frame: | |
def __init__(self, closure, stack, start_slot_idx): | |
self.closure = closure | |
self.ip = 0 | |
self.stack = stack | |
self.start_slot_idx = start_slot_idx | |
def stack_at(self, idx): | |
return self.stack[self.start_slot_idx + idx] | |
def stack_at_put(self, idx, val): | |
self.stack[self.start_slot_idx + idx] = val | |
INTERPRET_OK, INTERPRET_COMPILE_ERROR, INTERPRET_RUNTIME_ERROR = xrange(3) | |
MAX_FRAMES = 100 | |
class VM: | |
def __init__(self): | |
self.tracing = False | |
self.stack = [] | |
# self.strings = {} | |
self.globals = {} | |
self.frames = [] | |
def open_frame(self, closure): | |
# subtract 1 for function object itself | |
start_slot_idx = len(self.stack) - closure.function.arity - 1 | |
frame = Frame(closure, self.stack, start_slot_idx) | |
self.frames.append(frame) | |
return frame | |
def frame(self): | |
return self.frames[-1] | |
def function(self): | |
return self.frame().closure.function | |
def chunk(self): | |
return self.function().chunk | |
def ip(self): | |
return self.frame().ip | |
def set_tracing(self, value=True): | |
self.tracing = value | |
def read_byte(self): | |
result = self.chunk().code[self.ip()] | |
self.frame().ip += 1 | |
return result | |
def read16(self): | |
high = self.read_byte() | |
low = self.read_byte() | |
return (high << 8) | low | |
def read_constant(self): | |
idx = self.read_byte() | |
return self.chunk().constants[idx] | |
def is_falsey(self, value): | |
return value is NilValue or (isinstance(value, BoolValue) and not value.value) | |
def values_equal(self, left, right): | |
if isinstance(left, BoolValue) and isinstance(right, BoolValue): | |
return left.value == right.value | |
if isinstance(left, NumberValue) and isinstance(right, NumberValue): | |
return left.value == right.value | |
if isinstance(left, String) and isinstance(right, String): | |
return left.value == right.value | |
return left is right | |
def peek(self, distance=0): | |
return self.stack[-1 - distance] | |
def runtime_error(self, message): | |
print message | |
i = len(self.frames) - 1 | |
while i >= 0: | |
frame = self.frames[i] | |
instruction = frame.ip - 1 | |
function = frame.closure.function | |
line = function.chunk.lines[instruction] | |
print "[line %d] in %s" % (line, function.name()) | |
i -= 1 | |
self.stack = [] | |
return INTERPRET_RUNTIME_ERROR | |
def define_native(self, name, function): | |
self.globals[name] = NativeFunction(function) | |
# def intern(self, chars): | |
# interned = self.strings.get(chars) | |
# if interned is not None: | |
# return interned | |
# result = String(chars) | |
# self.strings[chars] = result | |
# return result | |
def setup_call(self, callee, arg_count): | |
if isinstance(callee, Closure): | |
arity = callee.function.arity | |
if arg_count != arity: | |
self.runtime_error("Expected %d arguments but got %d" % (arity, arg_count)) | |
return False | |
if len(self.frames) >= MAX_FRAMES: | |
self.runtime_error("Stack overflow") | |
return False | |
self.open_frame(callee) | |
return True | |
if isinstance(callee, NativeFunction): | |
assert arg_count >= 0 | |
stack_length = len(self.stack) | |
assert stack_length > arg_count | |
args_begin = stack_length - arg_count | |
assert args_begin > 0 | |
result = callee.function(arg_count, self.stack[args_begin:]) | |
while arg_count > 0: | |
self.stack.pop() | |
self.stack.pop() # callable | |
self.stack.append(result) | |
return True | |
self.runtime_error("Can only call functions") | |
return False | |
def run(self): | |
while True: | |
if self.tracing: | |
print "STACK:", | |
print "[", " ".join([value.__repr__() for value in self.stack]), "]" | |
print "GLOBALS:", ", ".join([key + ":%s" % value.__repr__() for key, value in self.globals.iteritems()]) | |
print "TRACE:", | |
self.chunk().disassemble_instruction(self.ip()) | |
instruction = self.read_byte() | |
if instruction == OP_CONSTANT: | |
constant = self.read_constant() | |
self.stack.append(constant) | |
continue | |
if instruction == OP_NIL: | |
self.stack.append(NilValue) | |
continue | |
if instruction == OP_TRUE: | |
self.stack.append(TrueValue) | |
continue | |
if instruction == OP_FALSE: | |
self.stack.append(FalseValue) | |
continue | |
if instruction == OP_EQUAL: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
self.stack.append(BoolValue(self.values_equal(left, right))) | |
continue | |
if instruction == OP_ADD: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if isinstance(right, NumberValue) and isinstance(left, NumberValue): | |
self.stack.append(NumberValue(left.value + right.value)) | |
continue | |
if isinstance(right, String) and isinstance(left, String): | |
self.stack.append(String(left.value + right.value)) | |
continue | |
return self.runtime_error("Operands must be numbers or strings") | |
if instruction == OP_SUBTRACT: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if not isinstance(right, NumberValue) or not isinstance(left, NumberValue): | |
return self.runtime_error("Operands must be numbers") | |
self.stack.append(NumberValue(left.value - right.value)) | |
continue | |
if instruction == OP_MULTIPLY: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if not isinstance(right, NumberValue) or not isinstance(left, NumberValue): | |
return self.runtime_error("Operands must be numbers") | |
self.stack.append(NumberValue(left.value * right.value)) | |
continue | |
if instruction == OP_DIVIDE: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if not isinstance(right, NumberValue) or not isinstance(left, NumberValue): | |
return self.runtime_error("Operands must be numbers") | |
self.stack.append(NumberValue(left.value / right.value)) | |
continue | |
if instruction == OP_GREATER: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if not isinstance(right, NumberValue) or not isinstance(left, NumberValue): | |
return self.runtime_error("Operands must be numbers") | |
self.stack.append(BoolValue(left.value > right.value)) | |
continue | |
if instruction == OP_LESS: | |
right = self.stack.pop() | |
left = self.stack.pop() | |
if not isinstance(right, NumberValue) or not isinstance(left, NumberValue): | |
return self.runtime_error("Operands must be numbers") | |
self.stack.append(BoolValue(left.value < right.value)) | |
continue | |
if instruction == OP_NEGATE: | |
value = self.stack.pop() | |
if not isinstance(value, NumberValue): | |
return self.runtime_error("Operand must be a number") | |
self.stack.append(NumberValue(-value.value)) | |
continue | |
if instruction == OP_NOT: | |
self.stack.append(BoolValue(self.is_falsey(self.stack.pop()))) | |
continue | |
if instruction == OP_PRINT: | |
print_value(self.stack.pop()) | |
continue | |
if instruction == OP_POP: | |
self.stack.pop() | |
continue | |
if instruction == OP_GET_LOCAL: | |
slot = self.read_byte() | |
self.stack.append(self.frame().stack_at(slot)) | |
continue | |
if instruction == OP_SET_LOCAL: | |
slot = self.read_byte() | |
self.frame().stack_at_put(slot, self.peek(0)) | |
continue | |
if instruction == OP_GET_GLOBAL: | |
name = self.read_constant() | |
assert isinstance(name, String) | |
result = self.globals.get(name.value) | |
if result is None: | |
self.runtime_error("Undefined variable '%s'" % name.value) | |
return INTERPRET_RUNTIME_ERROR | |
self.stack.append(result) | |
continue | |
if instruction == OP_DEFINE_GLOBAL: | |
name = self.read_constant() | |
assert isinstance(name, String) | |
self.globals[name.value] = self.stack.pop() | |
continue | |
if instruction == OP_SET_GLOBAL: | |
name = self.read_constant() | |
assert isinstance(name, String) | |
current = self.globals.get(name.value) | |
if current is None: | |
self.runtime_error("Undefined variable '%s'" % name.value) | |
return INTERPRET_RUNTIME_ERROR | |
self.globals[name.value] = self.peek(0) | |
continue | |
if instruction == OP_JUMP: | |
offset = self.read16() | |
self.frame().ip += offset | |
continue | |
if instruction == OP_JUMP_IF_FALSE: | |
offset = self.read16() | |
if self.is_falsey(self.peek(0)): | |
self.frame().ip += offset | |
continue | |
if instruction == OP_LOOP: | |
offset = self.read16() | |
self.frame().ip -= offset | |
continue | |
if instruction == OP_CALL: | |
arg_count = self.read_byte() | |
callee = self.peek(arg_count) | |
result = self.setup_call(callee, arg_count) | |
if not result: | |
return INTERPRET_RUNTIME_ERROR | |
continue | |
if instruction == OP_CLOSURE: | |
function = self.read_constant() | |
closure = Closure(function) | |
self.stack.append(closure) | |
continue | |
if instruction == OP_RETURN: | |
result = self.stack.pop() | |
old_stack_size = self.frame().start_slot_idx | |
self.frames.pop() | |
if not self.frames: | |
self.stack.pop() # the script function | |
return INTERPRET_OK | |
# Reset the stack | |
# TODO(max): Is it faster to do this or pop in a loop? | |
assert old_stack_size > 0 | |
del self.stack[old_stack_size:] | |
self.stack.append(result) | |
continue | |
return self.runtime_error("Unknown opcode %d" % instruction) | |
def group(*choices): | |
return '(' + '|'.join(choices) + ')' | |
def any(*choices): | |
return group(*choices) + '*' | |
def make_single_string(delim): | |
normal_chars = r"[^\n\%s]*" % (delim,) | |
return "".join([delim, normal_chars, any(r"\\." + normal_chars), delim]) | |
TOKENS = ( | |
# Single-character tokens | |
("left_paren", r"\("), | |
("right_paren", r"\)"), | |
("left_brace", r"{"), | |
("right_brace", r"}"), | |
("comma", r","), | |
("dot", r"\."), | |
("minus", r"-"), | |
("plus", r"\+"), | |
("semicolon", r";"), | |
("slash", r"/"), | |
("star", r"\*"), | |
# One or two character tokens | |
("bang", r"\!"), | |
("bang_equal", r"\!="), | |
("equal", r"="), | |
("equal_equal", r"=="), | |
("greater", r">"), | |
("greater_equal", r">="), | |
("less", r"<"), | |
("less_equal", r"<="), | |
# Keywords | |
("and", r"and"), | |
("class", r"class"), | |
("else", r"else"), | |
("false", r"false"), | |
("for", r"for"), | |
("fun", r"fun"), | |
("if", r"if"), | |
("nil", r"nil"), | |
("or", r"or"), | |
("print", r"print"), | |
("return", r"return"), | |
("super", r"super"), | |
("this", r"this"), | |
("true", r"true"), | |
("var", r"var"), | |
("while", r"while"), | |
# Literals | |
("identifier", r"[a-zA-Z_][a-zA-Z_0-9]*"), | |
("string", make_single_string('"')), | |
("number", r"\d+[lL]?"), | |
# Ignore | |
("whitespace", r"\s+"), | |
("comment", r"//[^\r\n]*"), | |
) | |
names = [name for name, regex in TOKENS] | |
token_regexes = [parse_regex(regex) for name, regex in TOKENS] | |
LEXER = Lexer(token_regexes, names, ignore=("whitespace", "comment")) | |
def debug_tokens(tokens): | |
line = -1 | |
for token in tokens: | |
loc = token.source_pos | |
if loc.lineno != line: | |
print "%d\t" % loc.lineno, | |
line = loc.lineno | |
else: | |
print "|\t", | |
print token.name, token.source, len(token.source), loc.i | |
STATEMENT_BOUNDARY = ("class", "fun", "var", "for", "if", "while", "print", "return") | |
class Parser: | |
def __init__(self, tokens): | |
self.tokens = tokens | |
self.previous = None | |
self.current = None | |
self.had_error = False | |
self.panic_mode = False | |
def scan_token(self): | |
if not self.tokens: | |
return Token("eof", "<eof>", SourcePos(-1, -1, -1)) | |
return self.tokens.pop(0) | |
def advance(self): | |
self.previous = self.current | |
while True: | |
self.current = self.scan_token() | |
if self.current.name != "error": | |
break | |
self.error_at_current(self.current.source) | |
def check(self, token_type): | |
return self.current.name == token_type | |
def consume(self, token_type, message): | |
if self.check(token_type): | |
self.advance() | |
return | |
self.error_at_current(message) | |
def match(self, token_type): | |
if not self.check(token_type): | |
return False | |
self.advance() | |
return True | |
def error_at_current(self, message): | |
self.error_at(self.current, message) | |
def error(self, message): | |
self.error_at(self.previous, message) | |
def error_at(self, token, message): | |
if self.panic_mode: | |
return | |
self.panic_mode = True | |
print "[line %d] Error" % token.source_pos.lineno, | |
if token.name == "eof": | |
print "at end", | |
elif token.name == "error": | |
pass | |
else: | |
print "at '%s'" % token.source, | |
print ": %s" % message | |
self.had_error = True | |
def eof(self): | |
self.consume("eof", "Expected EOF") | |
def synchronize(self): | |
self.panic_mode = False | |
while not self.check("eof"): | |
if self.previous.name == "semicolon": | |
return | |
if self.current.name in STATEMENT_BOUNDARY: | |
return | |
self.advance() | |
( | |
PREC_NONE, | |
PREC_ASSIGNMENT, | |
PREC_OR, | |
PREC_AND, | |
PREC_EQUALITY, | |
PREC_COMPARISON, | |
PREC_TERM, | |
PREC_FACTOR, | |
PREC_UNARY, | |
PREC_CALL, | |
PREC_PRIMARY, | |
) = xrange(11) | |
class ParseRule: | |
def __init__(self, prefix, infix, precedence): | |
self.prefix = prefix | |
self.infix = infix | |
self.precedence = precedence | |
class Local: | |
def __init__(self, name, depth): | |
self.name = name | |
self.depth = depth | |
class CompilerContext: | |
def __init__(self, previous): | |
self.function = None | |
self.function_type = FunctionType.SCRIPT | |
self.locals = [] | |
self.previous = previous | |
BYTE_SIZE = 8 | |
JUMP_TARGET_SIZE = 2 | |
MAX_JUMP = 2 ** (JUMP_TARGET_SIZE * BYTE_SIZE) - 1 | |
MAX_PARAMS = 2 ** BYTE_SIZE - 1 | |
class FunctionType: | |
FUNCTION, SCRIPT = xrange(2) | |
# TODO(max): Don't use list.pop(0) to read next token because it's O(n) | |
class Compiler: | |
def __init__(self): | |
self.lexer = LEXER | |
self.parser = None | |
self.scope_depth = 0 | |
self.context = None | |
def push_context(self, function, function_type): | |
previous = self.context | |
self.context = CompilerContext(previous) | |
self.context.function = function | |
self.context.function_type = function_type | |
# Reserve a locals slot for the compiler's internal use | |
self.context.locals.append(Local(Token("<function>", "", SourcePos(-1, -1, -1)), 0)) | |
def pop_context(self): | |
current = self.context | |
self.context = current.previous | |
def current_function(self): | |
function = self.context.function | |
assert function is not None | |
return function | |
def current_chunk(self): | |
chunk = self.current_function().chunk | |
assert chunk is not None | |
return chunk | |
def compile(self, text): | |
tokens = self.lexer.tokenize(text) | |
self.parser = Parser(tokens) | |
self.parser.advance() | |
while not self.parser.match("eof"): | |
self.declaration() | |
self.parser.eof() | |
self.emit_return() | |
if self.parser.had_error: | |
return None | |
return self.current_function() | |
def emit_byte(self, byte): | |
self.current_chunk().write(byte, self.parser.previous.source_pos.lineno) | |
def emit_bytes(self, byte1, byte2): | |
self.current_chunk().write(byte1, self.parser.previous.source_pos.lineno) | |
self.current_chunk().write(byte2, self.parser.previous.source_pos.lineno) | |
def emit_return(self): | |
self.emit_byte(OP_NIL) | |
self.emit_byte(OP_RETURN) | |
def emit_constant(self, value): | |
self.emit_bytes(OP_CONSTANT, self.make_constant(value)) | |
def make_constant(self, value): | |
idx = self.current_chunk().add_constant(value) | |
if idx > 255: | |
self.parser.error("Too many constants in one chunk") | |
return -1 | |
return idx | |
def number(self, can_assign): | |
value = int(self.parser.previous.source) | |
self.emit_constant(NumberValue(value)) | |
def grouping(self, can_assign): | |
self.expression() | |
self.parser.consume("right_paren", "Expect ')' after expression") | |
def unary(self, can_assign): | |
operator_type = self.parser.previous.name | |
self.parse_precedence(PREC_UNARY) | |
if operator_type == "minus": | |
self.emit_byte(OP_NEGATE) | |
elif operator_type == "bang": | |
self.emit_byte(OP_NOT) | |
else: | |
self.parser.error("Unreachable") | |
assert False | |
def get_rule(self, operator_type): | |
try: | |
return RULES[operator_type] | |
except KeyError: | |
print "Could not find operator_type", operator_type | |
raise | |
def binary(self, can_assign): | |
operator_type = self.parser.previous.name | |
rule = self.get_rule(operator_type) | |
self.parse_precedence(rule.precedence + 1) | |
opcode = { | |
"plus": OP_ADD, | |
"minus": OP_SUBTRACT, | |
"star": OP_MULTIPLY, | |
"slash": OP_DIVIDE, | |
"equal_equal": OP_EQUAL, | |
"greater": OP_GREATER, | |
"less": OP_LESS, | |
} | |
opcodes = { | |
"bang_equal": [OP_EQUAL, OP_NOT], | |
"greater_equal": [OP_LESS, OP_NOT], | |
"less_equal": [OP_GREATER, OP_NOT], | |
} | |
if operator_type in opcode: | |
self.emit_byte(opcode[operator_type]) | |
elif operator_type in opcodes: | |
for op in opcodes[operator_type]: | |
self.emit_byte(op) | |
else: | |
raise KeyError("Could not find operator_type", operator_type) | |
def string(self, can_assign): | |
with_quotes = self.parser.previous.source | |
if len(with_quotes) == 0: | |
self.emit_constant(String("")) | |
else: | |
last = len(with_quotes) - 1 | |
assert last >= 0 | |
without_quotes = with_quotes[1:last] | |
self.emit_constant(String(without_quotes)) | |
def literal(self, can_assign): | |
operator_type = self.parser.previous.name | |
opcode = {"false": OP_FALSE, "nil": OP_NIL, "true": OP_TRUE} | |
self.emit_byte(opcode[operator_type]) | |
def parse_precedence(self, precedence): | |
self.parser.advance() | |
prefix_rule = self.get_rule(self.parser.previous.name).prefix | |
if not prefix_rule: | |
self.parser.error("Expect expression") | |
return | |
can_assign = precedence <= PREC_ASSIGNMENT | |
prefix_rule(self, can_assign) | |
while precedence <= self.get_rule(self.parser.current.name).precedence: | |
self.parser.advance() | |
infix_rule = self.get_rule(self.parser.previous.name).infix | |
infix_rule(self, can_assign) | |
if can_assign and self.parser.match("equal"): | |
self.parser.error("Invalid assignment target") | |
def expression(self): | |
self.parse_precedence(PREC_ASSIGNMENT) | |
def declaration(self): | |
if self.parser.match("fun"): | |
self.fun_declaration() | |
elif self.parser.match("var"): | |
self.var_declaration() | |
else: | |
self.statement() | |
if self.parser.panic_mode: | |
self.parser.synchronize() | |
def fun_declaration(self): | |
global_idx = self.parse_variable("Expect function name") | |
self.mark_initialized() | |
self.function() | |
self.define_variable(global_idx) | |
def function(self): | |
name = self.parser.previous.source | |
function = Function(arity=0, chunk=Chunk(), name=name) | |
self.push_context(function, FunctionType.FUNCTION) | |
self.begin_scope() | |
self.parser.consume("left_paren", "Expect '(' after function name") | |
if not self.parser.check("right_paren"): | |
while True: | |
function.arity += 1 | |
if function.arity > MAX_PARAMS: | |
self.parser.error_at_current("Can't have more than %d parameters" % MAX_PARAMS) | |
const = self.parse_variable("Expect parameter name") | |
self.define_variable(const) | |
if not self.parser.match("comma"): | |
break | |
self.parser.consume("right_paren", "Expect ')' after parameters") | |
self.parser.consume("left_brace", "Expect '{' before function body") | |
self.block() | |
self.scope_depth -= 1 | |
# Emit a nil return in case the function does not have one | |
self.emit_return() | |
self.pop_context() | |
function.chunk.disassemble(name) | |
self.emit_bytes(OP_CLOSURE, self.make_constant(function)) | |
def mark_initialized(self): | |
if self.scope_depth == 0: | |
# Top-level; no local to mark initialized, as the function is bound | |
# to a global instead | |
return | |
# Mark the most recently defined local as initialized after | |
# compiling the RHS of the assignment | |
self.context.locals[-1].depth = self.scope_depth | |
def var_declaration(self): | |
global_idx = self.parse_variable("Expect variable name") | |
if self.parser.match("equal"): | |
self.expression() | |
else: | |
self.emit_byte(OP_NIL) | |
self.parser.consume("semicolon", "Expect ';' after variable declaration") | |
self.define_variable(global_idx) | |
def parse_variable(self, message): | |
self.parser.consume("identifier", message) | |
self.declare_variable() | |
if self.scope_depth > 0: | |
# This is a dummy index in the case of locals; define_variable | |
# won't do anything if scope_depth > 0 anyway | |
return 0 | |
return self.identifier_constant(self.parser.previous) | |
def identifier_constant(self, name_token): | |
return self.make_constant(String(name_token.source)) | |
def declare_variable(self): | |
if self.scope_depth == 0: | |
return | |
name_token = self.parser.previous | |
self.error_duplicate_names(name_token) | |
self.add_local(name_token) | |
def error_duplicate_names(self, name_token): | |
i = len(self.context.locals) - 1 | |
while i >= 0: | |
local = self.context.locals[i] | |
if local.depth != -1 and local.depth < self.scope_depth: | |
break | |
if name_token.source == local.name.source: | |
self.parser.error("Local '%s' already defined in this scope" % name_token.source) | |
i -= 1 | |
def add_local(self, name_token): | |
if len(self.context.locals) > 255: | |
self.parser.error("Too many local variables in this function") | |
return | |
self.context.locals.append(Local(name_token, -1)) | |
def define_variable(self, global_idx): | |
assert global_idx != -1, "Invalid global_idx" | |
if self.scope_depth > 0: | |
self.mark_initialized() | |
return | |
self.emit_bytes(OP_DEFINE_GLOBAL, global_idx) | |
def statement(self): | |
if self.parser.match("print"): | |
self.print_statement() | |
elif self.parser.match("for"): | |
self.for_statement() | |
elif self.parser.match("if"): | |
self.if_statement() | |
elif self.parser.match("return"): | |
self.return_statement() | |
elif self.parser.match("while"): | |
self.while_statement() | |
elif self.parser.match("left_brace"): | |
self.begin_scope() | |
self.block() | |
self.end_scope() | |
else: | |
self.expression_statement() | |
def return_statement(self): | |
if self.context.function_type == FunctionType.SCRIPT: | |
self.parser.error("Can't return from top-level code") | |
if self.parser.match("semicolon"): | |
self.emit_return() | |
else: | |
self.expression() | |
self.parser.consume("semicolon", "Expect ';' after return") | |
self.emit_byte(OP_RETURN) | |
def for_statement(self): | |
self.begin_scope() | |
self.parser.consume("left_paren", "Expect '(' after 'for'") | |
if self.parser.match("semicolon"): | |
# No initializer | |
pass | |
elif self.parser.match("var"): | |
self.var_declaration() | |
else: | |
self.expression_statement() | |
loop_start = self.current_offset() | |
exit_jump = -1 | |
if not self.parser.match("semicolon"): | |
self.expression() | |
self.parser.consume("semicolon", "Expect ';' after loop condition") | |
exit_jump = self.emit_jump(OP_JUMP_IF_FALSE) | |
self.emit_byte(OP_POP) # for condition | |
if not self.parser.match("right_paren"): | |
body_jump = self.emit_jump(OP_JUMP) | |
increment_start = self.current_offset() | |
self.expression() | |
self.emit_byte(OP_POP) | |
self.parser.consume("right_paren", "Expect ')' after 'for' clauses") | |
self.emit_loop(loop_start) | |
loop_start = increment_start | |
self.patch_jump(body_jump) | |
self.statement() | |
self.emit_loop(loop_start) | |
if exit_jump != -1: | |
self.patch_jump(exit_jump) | |
self.emit_byte(OP_POP) # for condition | |
self.end_scope() | |
def if_statement(self): | |
self.parser.consume("left_paren", "Expect '(' after 'if'") | |
self.expression() | |
self.parser.consume("right_paren", "Expect ')' after 'if' condition") | |
then_jump = self.emit_jump(OP_JUMP_IF_FALSE) | |
self.emit_byte(OP_POP) | |
self.statement() | |
else_jump = self.emit_jump(OP_JUMP) | |
self.patch_jump(then_jump) | |
self.emit_byte(OP_POP) | |
if self.parser.match("else"): | |
self.statement() | |
self.patch_jump(else_jump) | |
def current_offset(self): | |
return len(self.current_chunk().code) | |
def emit_jump(self, instruction): | |
self.emit_byte(instruction) | |
self.emit_bytes(0xff, 0xff) # must match JUMP_TARGET_SIZE | |
return self.current_offset() - JUMP_TARGET_SIZE | |
def patch_jump(self, offset): | |
jump = self.current_offset() - offset - JUMP_TARGET_SIZE | |
if jump > MAX_JUMP: | |
self.parser.error("Too much code to jump over") | |
self.current_chunk().code[offset] = (jump >> 8) & 0xff | |
self.current_chunk().code[offset + 1] = jump & 0xff | |
def while_statement(self): | |
loop_start = self.current_offset() | |
self.parser.consume("left_paren", "Expect '(' after 'while'") | |
self.expression() | |
self.parser.consume("right_paren", "Expect ')' after 'while' condition") | |
exit_jump = self.emit_jump(OP_JUMP_IF_FALSE) | |
self.emit_byte(OP_POP) | |
self.statement() | |
self.emit_loop(loop_start) | |
self.patch_jump(exit_jump) | |
self.emit_byte(OP_POP) | |
def emit_loop(self, loop_start): | |
self.emit_byte(OP_LOOP) | |
offset = self.current_offset() - loop_start + JUMP_TARGET_SIZE | |
if offset > MAX_JUMP: | |
self.parser.error("Loop body too large") | |
self.emit_byte((offset >> 8) & 0xff) | |
self.emit_byte(offset & 0xff) | |
def begin_scope(self): | |
self.scope_depth += 1 | |
def end_scope(self): | |
self.scope_depth -= 1 | |
while self.context.locals and self.context.locals[-1].depth > self.scope_depth: | |
self.emit_byte(OP_POP) | |
self.context.locals.pop() | |
def block(self): | |
while not self.parser.check("right_brace") and not self.parser.check("eof"): | |
self.declaration() | |
self.parser.consume("right_brace", "Expect '}' after block") | |
def print_statement(self): | |
self.expression() | |
self.parser.consume("semicolon", "Expect ';' after value") | |
self.emit_byte(OP_PRINT) | |
def expression_statement(self): | |
self.expression() | |
self.parser.consume("semicolon", "Expect ';' after expression") | |
self.emit_byte(OP_POP) | |
def variable(self, can_assign): | |
self.named_variable(self.parser.previous, can_assign) | |
def resolve_local(self, name_token): | |
i = len(self.context.locals) - 1 | |
while i >= 0: | |
local = self.context.locals[i] | |
if name_token.source == local.name.source: | |
if local.depth == -1: | |
self.parser.error("Can't read local '%s' in its own initializer" % name_token.source) | |
return i | |
i -= 1 | |
return -1 | |
def named_variable(self, name_token, can_assign): | |
idx = self.resolve_local(name_token) | |
if idx != -1: | |
get_op = OP_GET_LOCAL | |
set_op = OP_SET_LOCAL | |
else: | |
idx = self.identifier_constant(name_token) | |
get_op = OP_GET_GLOBAL | |
set_op = OP_SET_GLOBAL | |
assert idx != -1, "Invalid idx" | |
if can_assign and self.parser.match("equal"): | |
self.expression() | |
self.emit_bytes(set_op, idx) | |
else: | |
self.emit_bytes(get_op, idx) | |
def and_(self, can_assign): | |
end_jump = self.emit_jump(OP_JUMP_IF_FALSE) | |
self.emit_byte(OP_POP) | |
self.parse_precedence(PREC_AND) | |
self.patch_jump(end_jump) | |
def or_(self, can_assign): | |
else_jump = self.emit_jump(OP_JUMP_IF_FALSE) | |
end_jump = self.emit_jump(OP_JUMP) | |
self.patch_jump(else_jump) | |
self.emit_byte(OP_POP) | |
self.parse_precedence(PREC_OR) | |
self.patch_jump(end_jump) | |
def call(self, can_assign): | |
arg_count = self.argument_list() | |
if arg_count > MAX_PARAMS: | |
self.parser.error("Can't have more than %d arguments" % MAX_PARAMS) | |
self.emit_bytes(OP_CALL, arg_count) | |
def argument_list(self): | |
arg_count = 0 | |
if not self.parser.check("right_paren"): | |
while True: | |
self.expression() | |
arg_count += 1 | |
if not self.parser.match("comma"): | |
break | |
self.parser.consume("right_paren", "Expect ')' in function call") | |
return arg_count | |
RULES = { | |
"left_paren": ParseRule(Compiler.grouping, Compiler.call, PREC_CALL), | |
"right_paren": ParseRule(None, None, PREC_NONE), | |
"left_brace": ParseRule(None, None, PREC_NONE), | |
"right_brace": ParseRule(None, None, PREC_NONE), | |
"comma": ParseRule(None, None, PREC_NONE), | |
"dot": ParseRule(None, None, PREC_NONE), | |
"minus": ParseRule(Compiler.unary, Compiler.binary, PREC_TERM), | |
"semicolon": ParseRule(None, None, PREC_NONE), | |
"plus": ParseRule(None, Compiler.binary, PREC_TERM), | |
"slash": ParseRule(None, Compiler.binary, PREC_FACTOR), | |
"star": ParseRule(None, Compiler.binary, PREC_FACTOR), | |
"bang": ParseRule(Compiler.unary, None, PREC_NONE), | |
"bang_equal": ParseRule(None, Compiler.binary, PREC_EQUALITY), | |
"equal": ParseRule(None, Compiler.binary, PREC_NONE), | |
"equal_equal": ParseRule(None, Compiler.binary, PREC_EQUALITY), | |
"greater": ParseRule(None, Compiler.binary, PREC_COMPARISON), | |
"greater_equal": ParseRule(None, Compiler.binary, PREC_COMPARISON), | |
"less": ParseRule(None, Compiler.binary, PREC_COMPARISON), | |
"less_equal": ParseRule(None, Compiler.binary, PREC_COMPARISON), | |
"identifier": ParseRule(Compiler.variable, None, PREC_NONE), | |
"string": ParseRule(Compiler.string, None, PREC_NONE), | |
"number": ParseRule(Compiler.number, None, PREC_NONE), | |
"and": ParseRule(None, Compiler.and_, PREC_AND), | |
"class": ParseRule(None, None, PREC_NONE), | |
"if": ParseRule(None, None, PREC_NONE), | |
"else": ParseRule(None, None, PREC_NONE), | |
"for": ParseRule(None, None, PREC_NONE), | |
"fun": ParseRule(None, None, PREC_NONE), | |
"return": ParseRule(None, None, PREC_NONE), | |
"super": ParseRule(None, None, PREC_NONE), | |
"this": ParseRule(None, None, PREC_NONE), | |
"var": ParseRule(None, None, PREC_NONE), | |
"while": ParseRule(None, None, PREC_NONE), | |
"error": ParseRule(None, None, PREC_NONE), | |
"or": ParseRule(None, Compiler.or_, PREC_OR), | |
"false": ParseRule(Compiler.literal, None, PREC_NONE), | |
"true": ParseRule(Compiler.literal, None, PREC_NONE), | |
"nil": ParseRule(Compiler.literal, None, PREC_NONE), | |
"print": ParseRule(None, None, PREC_NONE), | |
"eof": ParseRule(None, None, PREC_NONE), | |
} | |
def compile_file(program): | |
compiler = Compiler() | |
compiler.push_context(Function.script(), FunctionType.SCRIPT) | |
return compiler.compile(program) | |
def main(argv): | |
tracing = False | |
if len(argv) != 2: | |
if len(argv) == 3: | |
tracing_arg = argv[2] | |
if tracing_arg != "--tracing": | |
print "Invalid TRACING arg. Expected '--tracing' or nothing" | |
return 1 | |
tracing = True | |
else: | |
print "Invalid argcount. Expected ./lox FILENAME [TRACING]" | |
return 1 | |
filename = argv[1] | |
with open(filename, "r") as f: | |
program = f.read() | |
print "Compiling..." | |
function = compile_file(program) | |
if not function: | |
print "Error compiling" | |
return 1 | |
function.chunk.disassemble(function.name()) | |
print "Running..." | |
vm = VM() | |
vm.define_native("clock", lambda arg_count, args: NumberValue(int(time.clock()))) | |
vm.tracing = tracing | |
vm.stack.append(function) | |
vm.open_frame(Closure(function)) | |
code = vm.run() | |
if code != INTERPRET_OK: | |
print "Error running" | |
return 1 | |
return 0 | |
def target(*args): | |
return main | |
if __name__ == "__main__": | |
import sys | |
main(sys.argv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var greeting = "goodbye"; | |
var recipient = " world"; | |
greeting = "hello"; | |
print greeting + recipient; | |
{ | |
var x = 1; | |
{ | |
var y = 2; | |
var z = 3; | |
y = 4; | |
print x + y; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var x = 0; | |
while (x < 10) { | |
print x; | |
x = x + 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment