Skip to content

Instantly share code, notes, and snippets.

@mbillingr
Last active April 23, 2023 05:58
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mbillingr/c2bdca4f618974e7e8d1449aba792b41 to your computer and use it in GitHub Desktop.
Save mbillingr/c2bdca4f618974e7e8d1449aba792b41 to your computer and use it in GitHub Desktop.
Implementation of a minimal Forth interpreter/compiler on top of Python 3.5.
""" pyrx
Implementation of a minimal Forth interpreter/compiler on top of Python 3.6.
It is based on Rx [1] and should eventually support Retro [2].
Facts & Features:
- New words are compiled to Python bytecode (subroutine threading model).
- Dynamically typed: the only data type is the Python object.
- Literals are evaluated as Python expressions.
- The data stack is a global Python list.
- We also have a Python list as heap.
- The dictionary is a global Python dictionary, indexing into the heap.
To-do:
- Implement missing features (?)
- REPL
- String handling
[1] https://github.com/crcx/retro12/blob/master/literate/Rx.md
[2] https://github.com/crcx/retro12/blob/master/literate/RetroForth.md
"""
import types
import dis
stack = [] # data stack
aux_stack = [] # auxiliary stack for compiling control structures
codes = {} # lookup table for code objects
dic = {} # word dictionary
class Heap(list):
def __init__(self):
self.labels = {}
def set_label(self, name, pos):
self.labels[name] = pos
def get_label(self, name):
return self.labels[name]
def add(self, value, name=None):
pos = len(self)
self.append(value)
if name is not None:
self.set_label(name, pos)
return pos
heap = Heap()
compilation = heap.add([], 'compilation')
last_word = heap.add(None, 'last_word')
heaptest = heap.add(135, 'heaptest')
# helper functions
def set_top(*args):
"""Replace values on top of the stack."""
for i, a in enumerate(args[::-1]):
stack[-1 - i] = a
def constant(value):
"""return index of constant and add it to list if necessary"""
cmp = heap[compilation][-1]
try:
return cmp[6].index(value)
except ValueError:
cmp[6] += (value,)
return len(cmp[6]) - 1
def name(value):
"""return index of name and add it to list if necessary"""
cmp = heap[compilation][-1]
try:
return cmp[7].index(value)
except ValueError:
cmp[7] += (value,)
return len(cmp[7]) - 1
def compile_opcode(name, arg=0):
cmp = heap[compilation][-1]
cmp[5] += bytes([dis.opmap[name], arg])
def compile_byte(value):
heap[compilation][-1][5] += bytes([value])
# word Classes determine the interpretion/compilation behavior of words
def macro():
"""
macro:
intepret: execute code
compile: execute code
"""
pos = stack.pop()
exec(pos)
def word():
"""
word:
intepret: execute code
compile: append call to code
"""
code = stack.pop()
if heap[compilation]:
cc = constant(id(code))
compile_opcode('DUP_TOP_TWO')
compile_opcode('LOAD_CONST', cc)
compile_opcode('BINARY_SUBSCR')
compile_opcode('CALL_FUNCTION', 1)
compile_opcode('POP_TOP')
else:
exec(code)
def literal():
"""
Literal:
intepret: push value to stack
compile: create code that pushes value to stack
"""
stack.pop()
if heap[compilation]:
value = eval(stack.pop())
compile_opcode('LOAD_GLOBAL', name('stack'))
compile_opcode('LOAD_CONST', constant(value))
compile_opcode('BUILD_LIST', 1)
compile_opcode('INPLACE_ADD')
compile_opcode('STORE_GLOBAL', name('stack'))
else:
stack[-1] = eval(stack[-1])
def data():
"""
Data:
intepret: leave value on stack
compile: create code that pushes value to stack
"""
if heap[compilation]:
compile_opcode('LOAD_GLOBAL', name('stack'))
compile_opcode('LOAD_CONST', constant(stack.pop()))
compile_opcode('BUILD_LIST', 1)
compile_opcode('INPLACE_ADD')
compile_opcode('STORE_GLOBAL', name('stack'))
def compiler():
"""
compiler:
intepret: throw error
compile: execute code
"""
if heap[compilation]:
exec(stack.pop())
def prepare_code(wordname):
heap[compilation].append([0, 0, 0, 10, 0, bytes(), (None, -1), (), (),
"<pyrx>", wordname, 0, bytes()])
compile_opcode('LOAD_GLOBAL', name('exec'))
compile_opcode('LOAD_GLOBAL', name('codes'))
def interpret(string):
for token in string.split():
prefix = 'prefix:' + token[0]
if prefix in dic:
entry = dic[prefix]
stack.append(token[1:])
elif token in dic:
# try to look up token in dictionary
entry = dic[token]
else:
# otherwise evaluate token as a Python expression
stack.append(token)
entry = dic['prefix:#']
stack.append(heap[entry + D_XT_OFFSET])
eval(heap[entry + D_CLS_OFFSET])
# some words cannot be nicely defined in lambdas
def op_finalize_word():
compile_opcode('POP_TOP') # clean stack (codes)
compile_opcode('POP_TOP') # clean stack (exec)
compile_opcode('LOAD_CONST', constant(None))
compile_opcode('RETURN_VALUE')
code = types.CodeType(*heap[compilation].pop())
add_word(code.co_name, code, word)
def op_repeat():
aux_stack.append(len(heap[compilation][-1][5]))
def op_again():
compile_opcode('JUMP_ABSOLUTE', aux_stack.pop())
def op_0break():
compile_opcode('LOAD_GLOBAL', name('stack'))
compile_opcode('LOAD_CONST', constant(-1))
compile_opcode('BINARY_SUBSCR')
target = len(heap[compilation][-1][5]) + 12
compile_opcode('POP_JUMP_IF_TRUE', target)
compile_opcode('LOAD_GLOBAL', name('stack'))
compile_opcode('LOAD_ATTR', name('pop'))
compile_opcode('CALL_FUNCTION')
compile_opcode('POP_TOP')
compile_opcode('RETURN_VALUE')
def op_open_quot():
prepare_code('')
def op_close_quot():
compile_opcode('POP_TOP') # clean stack (codes)
compile_opcode('POP_TOP') # clean stack (exec)
compile_opcode('LOAD_CONST', constant(None))
compile_opcode('RETURN_VALUE')
code = types.CodeType(*heap[compilation].pop())
codes[id(code)] = code
if heap[compilation]:
compile_opcode('LOAD_GLOBAL', name('stack'))
compile_opcode('LOAD_CONST', constant(code))
compile_opcode('BUILD_LIST', 1)
compile_opcode('INPLACE_ADD')
compile_opcode('STORE_FAST', name('stack'))
else:
stack.append(code)
def op_if():
code = stack.pop()
flag = stack.pop()
if flag:
exec(code)
def op_ifnot():
code = stack.pop()
flag = stack.pop()
if not flag:
exec(code)
def op_choice():
code0 = stack.pop()
code1 = stack.pop()
flag = stack.pop()
exec(code1 if flag else code0)
def op_deref():
stack.append(heap[dic[stack.pop()] + D_XT_OFFSET])
data()
def add_word(name, xt, cls):
if isinstance(xt, types.FunctionType):
xt = xt.__code__
pos = len(heap)
heap.append(heap[last_word])
heap.append(xt)
heap.append(cls.__code__)
heap.append(name)
dic[name] = pos
codes[id(xt)] = xt
heap[last_word] = pos
return pos
D_PREV_OFFSET = 0
D_XT_OFFSET = 1
D_CLS_OFFSET = 2
D_NAME_OFFSET = 3
def init_rx():
# compilation
add_word(';', op_finalize_word, macro)
add_word(',', lambda: compile_byte(stack.pop()), word)
# prefixes
add_word('prefix::', lambda: prepare_code(stack.pop()), macro)
add_word('prefix:#', lambda: None, literal)
add_word('prefix:&', op_deref, macro)
# stack operations
add_word('dup', lambda: stack.append(stack[-1]), word)
add_word('drop', lambda: stack.pop(), word)
add_word('swap', lambda: set_top(stack[-1], stack[-2]), word)
# memory operations
add_word('fetch', lambda: set_top(heap[stack[-1]]), word)
add_word('store', lambda: heap.__setitem__(stack.pop(), stack.pop()), word)
# arithmetic operations
add_word('+', lambda: set_top(stack[-2] + stack.pop()), word)
add_word('-', lambda: set_top(stack[-2] - stack.pop()), word)
add_word('*', lambda: set_top(stack[-2] * stack.pop()), word)
add_word('/', lambda: set_top(stack[-2] / stack.pop()), word)
# control structures
add_word('repeat', op_repeat, compiler)
add_word('again', op_again, compiler)
add_word('0;', op_0break, compiler)
add_word('[', op_open_quot, macro)
add_word(']', op_close_quot, macro)
add_word('call', lambda: exec(stack.pop()), word)
# conditionals
add_word('if', op_if, word)
add_word('-if', op_ifnot, word)
add_word('choose', op_choice, word)
# dictionary
add_word('d:link', lambda: stack.append(last_word), word)
add_word('d:class', lambda: stack.append(stack.pop() + D_CLS_OFFSET), word)
add_word('d:xt', lambda: stack.append(stack.pop() + D_XT_OFFSET), word)
add_word('d:lookup', lambda: stack.append(dic[stack.pop()]), word)
# classes
add_word('class:macro', macro, word)
add_word('class:word', word, word)
add_word('class:data', data, word)
# variables
add_word('Compiler', compilation, data)
add_word('Heaptest', heaptest, data)
add_word('Dictionary', last_word, data)
# data types
add_word('s:to-number', lambda: stack.append(int(stack.pop())), word)
def init_retro():
"""build retro 12 words on top of Rx."""
interpret(':d:last &Dictionary fetch ;')
interpret(':reclass d:last d:class store ;')
interpret(':immediate &class:macro reclass ;')
interpret(':data &class:data reclass ;')
add_word('depth', lambda: stack.append(len(stack)), word)
interpret(':prefix:@ d:lookup d:xt fetch class:data &fetch class:word ; immediate')
interpret(':prefix:! d:lookup d:xt fetch class:data &store class:word ; immediate')
interpret(':compiling? @Compiler ;')
# we inline instruction names instead of opcode numbers
interpret(':prefix:` compiling? [ , ] [ drop ] choose ; immediate')
# TODO...
def assert_stack(*args):
assert len(stack) == len(args)
for s, a in zip(stack, args):
if a != assert_stack.ignore:
assert s == a
assert_stack.ignore = lambda: None
def test_interpret(string, *args):
interpret(string)
assert_stack(*args)
stack.clear()
print('passed:', string)
init_rx()
# No REPL yet, so here are a few usage examples:
test_interpret('5', 5)
test_interpret('#5', 5)
test_interpret(':x #5 ;')
test_interpret(':x #5 ; x', 5)
test_interpret('1 2 swap', 2, 1)
test_interpret('1 2 -', -1)
test_interpret('84 2 /', 42)
test_interpret(':x 1 ;')
test_interpret(':x 1 ; x', 1)
test_interpret(':sqr dup * ;')
test_interpret('2 3 + sqr', 25)
test_interpret('pow(2,3.14)', 2 ** 3.14)
test_interpret('"hello" chr(32) "world!" + +', 'hello world!')
test_interpret(':0add repeat 0; + swap again ;')
test_interpret('False 1 2 3 4 5 0add', 15)
test_interpret(':the-answer 42 ;')
test_interpret('the-answer the-answer', 42, 42)
test_interpret('[ 1 2 3 + * ]', assert_stack.ignore)
test_interpret('[ 3 2 1 + * ] call', 9)
test_interpret(':nest 3 [ sqr dup sqr swap drop ] call ;')
test_interpret('nest', 81)
test_interpret('False [ 1 ] -if', 1)
test_interpret('True [ 2 ] if', 2)
test_interpret('False [ 1 ] if')
test_interpret('True [ 2 ] -if')
test_interpret('False [ 3 ] [ 4 ] choose', 4)
test_interpret('True [ 3 ] [ 4 ] choose', 3)
init_retro()
#
# License
#
# pyrx is Copyright (c) 2017, Martin Billinger
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment