Skip to content

Instantly share code, notes, and snippets.

@mfekadu
Last active May 18, 2019 15:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mfekadu/fb61f5920e868d91a053c18b040584f3 to your computer and use it in GitHub Desktop.
Save mfekadu/fb61f5920e868d91a053c18b040584f3 to your computer and use it in GitHub Desktop.
"""
Here is what an `expr` can be...
expr = int # number
| symbol # variable
| str # string
| (if expr expr expr) # control flow
| (expr expr ...) # function call
Top-level overview
parse first tokenizes string input
then creates an Abstract Syntax Tree based on the text
ExprC Structures (represented by tuples because tuples are immutable):
* <DataType> -- (<type>, <value> ...)
* Number -- ('num', 42)
* Symbol -- ('sym', 'name')
* String -- ('str', 'value')
* If -- ('if', 'cond', 'then', 'else')
* func -- (('sym', name), args ... )
Optionally...
* Func -- ('func', 'name', (args))
* bin_func -- ('bin', (left_expr), (right_expr))
Not Sure How...
* list -- ('list', (values ...))
*
# interesting thing I learned....
# tuples are like C structs & thus slightly slower than lists
# https://stackoverflow.com/q/16940293
# but ExprC needs to be immutable, so it is worth it.
Design choices
* immutable ExprC
** tradeoff: speed vs reliability (testing/debugging)
* Python
** tradeoff: quick-to-code vs slower-than-low-level-language
"""
import re
import sys
# for debugging
DEBUG = False
# constant error messages
NOT_IMPL = "\n\tNOT IMPLEMENTED\n"
UNEXPECTED = '\n\tUNEXPECTED EXPRESSION\n'
LONELY_PAREN = '\n\tFOUND LONELY CLOSING PARENTHESIS ")"\n'
# Read-eval-print loop
def repl():
print("\nWELCOME TO LISP-ish")
print("type `quit()` or `exit()` or press CTRL+D to quit.\n")
while(1):
try:
text = input('<> ')
print("text:\t", text) if DEBUG else None
except:
print('You quit via CTRL+D')
return 0
if (text == "quit()" or text == 'exit()' ):
print("GOODBYE!")
return 0
elif (text == ""):
continue
else:
print(top_interp(text))
# top-level interpreter
def top_interp(expr):
return interp(parse(tokenize(expr)))
"""
INTERP FUNCTIONS
"""
# TODO: implement interp
# return the actual value after interpreting the ExprC
def interp(E):
t, *v = E # unpack the type and values
if (t == 'num'): # if num then return that value
return v[0]
elif isinstance(t,tuple): # if tuple, read the type
print("hello")
t2, v2 = t
if (v2 == 'list'):
print("hello")
return [interp(n) for n in v]
print('t:\t', t) if DEBUG else None
print('v:\t', v) if DEBUG else None
return E
# raise Exception(NOT_IMPL)
"""
PARSE FUNCTIONS
"""
# return a tuple of ExprC given a tokenized list of strings
# calls parse recursively to grow the AST
def parse(expr):
# read a token # [token, [expr...]] # [first, [rest...]]
print("expr:\t", expr) if DEBUG else None
token = expr.pop(0) # pop(0) pops from the front
print("token:\t", token) if DEBUG else None
if (token == '('):
AST = []
print('got (') if DEBUG else None
print('rest:\t', expr) if DEBUG else None
while (expr[0] != ')'): # keep parsing the rest until ')'
print("AST", AST) if DEBUG else None
AST.append(parse(expr))
expr.pop(0) # the ')' must be popped in case of contiguous lists
return tuple(AST)
elif (token == ')'):
print('got )') if DEBUG else None
raise Exception(LONELY_PAREN)
elif (is_num(token)): # check if number
print('got num') if DEBUG else None
return ('num', is_num(token))
elif (isinstance(token, str)):
# notice "foo" is lisp string but 'foo and 'foo' are a symbols
if (token[0] == '\"' and token[-1] == '\"'):
print('got str') if DEBUG else None
return ('str', token[1:-1])
print('got sym') if DEBUG else None
# anything else is a symbol
if (token[0] == '\'' and token[-1] == '\''):
return ('sym', token[1:-1])
if (token[0] == '\''):
return ('sym', token[1:])
if (token[-1] == '\''):
return ('sym', token[:-1])
return ('sym', token)
else:
raise Exception(UNEXPECTED)
# given a string, return the number if it represents a number value
# Number is either int or float
def is_num(expr):
try:
return int(expr)
except:
try:
return float(expr)
except:
return False
# return a string with replaced characters based on a dictionary, R.
# performs replacement in a single pass.
def replace_n(R, text):
# escape any special characters in replacements
R = { re.escape(k):v for k, v in R.items() }
# create a pattern for use in regex
# i.e. R = {'(':" ( ", ')':' ) '} >> P = r'foo|bar'
pattern = re.compile('|'.join(R.keys()))
# look up match and do the substitution if it is in dictionary
return pattern.sub(lambda match: R[re.escape(match.group(0))], text)
# return list representation of Lisp code as string
# given "(first [1, '2'])" expect ['(', 'first', "[1,'2']", ')']
def tokenize(s):
R = {'(':' ( ', ')':' ) ',
'[':' [ ', ']':' ] ',
'{':' { ', '}':' } '}
# safely space out the parenthesis, then tokenize
return replace_n(R, s).split()
"""
TEST CASES
"""
def do_assert(output, expect):
msg = "\noutput:\t{} \n!=\nexpect:\t{}\n".format(output, expect)
assert output == expect, msg
def test_replace_n():
print("test_replace_n...", end="")
# TEST replace_n
R = {'foo':'very', 'bar':'fizzbuzz'}
given = "apple pie is foo delicious, bar!"
expect = "apple pie is very delicious, fizzbuzz!"
output = replace_n(R, given)
do_assert(output, expect)
print("\tdone")
def test_is_num():
print("test_is_num...", end="")
# TEST is_num
# int
do_assert(is_num('42'), 42)
do_assert(is_num('-42'), -42)
# float
do_assert(is_num('42.0'), 42.0)
do_assert(is_num('-42.0'), -42.0)
# non-numerical
do_assert(is_num('abc'), False)
do_assert(is_num('(42)'), False)
print("\t\tdone")
def test_tokenize():
print("test_tokenize...", end="")
# TEST tokenize
given = "(+ 1 (* 4 2)) (foo 42)"
output = tokenize(given)
expect = ['(', '+', '1', '(', '*','4','2',')',')','(','foo','42',')']
do_assert(output, expect)
given = '(foo (42))'
expect = ['(', 'foo', '(', '42', ')', ')']
do_assert(tokenize(given), expect)
given = '(first (list 1 (+ 2 3) 9))'
expect = ['(','first','(','list','1','(','+','2','3',')','9',')',')']
do_assert(tokenize(given), expect)
print("\tdone")
def test_parse():
print("test_parse...", end="")
# TEST parse
given = ['1','2', "3", "[1,2]"]
expect = ('num',1) # lisp repl seems to only parse the first thing
do_assert(parse(given), expect)
given = ['1']
expect = (('num', 1))
do_assert(parse(given), expect)
given = ['"string"']
expect = (('str', 'string'))
do_assert(parse(given), expect)
given = ['symbol']
same_expect = (('sym', 'symbol'))
do_assert(parse(given), same_expect)
given = ['\'symbol']
do_assert(parse(given), same_expect)
given = ['\'symbol\'']
do_assert(parse(given), same_expect)
given = ['symbol\'']
do_assert(parse(given), same_expect)
given = ['(', 'func', '99', ')']
expect = (('sym', 'func'), ('num', 99))
do_assert(parse(given), expect)
given = ['(', 'first', '(', '1', '2', ')', ')']
expect = (('sym', 'first'), (('num', 1), ('num', 2)))
do_assert(parse(given), expect)
given = ['(', 'foo', '(', '99', ')', '(', '99', ')', ')']
expect = (('sym', 'foo'), (('num', 99),),(('num', 99),))
do_assert(parse(given), expect)
given = ['(', 'first', '(', 'list', '1',
'(', '+', '2', '3', ')', '9', ')', ')']
expect = (('sym', 'first'), (('sym', 'list'), ('num', 1),
(('sym', '+'), ('num', 2), ('num', 3)), ('num', 9)))
do_assert(parse(given), expect)
print("\t\tdone")
def test_interp():
print("test_interp...", end="")
given = ('num', 3)
expect = 3
do_assert(interp(given), expect)
print("\t\tdone")
def test_all():
test_replace_n()
test_is_num()
test_tokenize()
test_parse()
test_interp()
print("\nALL TESTS PASSING\n")
if __name__ == "__main__":
if (len(sys.argv) > 1 and sys.argv[1].lower() == "test"):
test_all()
sys.exit(0) # read this with `echo $?` in shell
elif (len(sys.argv) > 1 and sys.argv[1].lower() == "debug"):
DEBUG = True
repl()
sys.exit(0)
else:
repl()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment