Last active
May 18, 2019 15:17
-
-
Save mfekadu/fb61f5920e868d91a053c18b040584f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Here is what an `expr` can be... | |
expr = int # number | |
| symbol # variable | |
| str # string | |
| (if expr expr expr) # control flow | |
| (expr expr ...) # function call | |
Top-level overview | |
parse first tokenizes string input | |
then creates an Abstract Syntax Tree based on the text | |
ExprC Structures (represented by tuples because tuples are immutable): | |
* <DataType> -- (<type>, <value> ...) | |
* Number -- ('num', 42) | |
* Symbol -- ('sym', 'name') | |
* String -- ('str', 'value') | |
* If -- ('if', 'cond', 'then', 'else') | |
* func -- (('sym', name), args ... ) | |
Optionally... | |
* Func -- ('func', 'name', (args)) | |
* bin_func -- ('bin', (left_expr), (right_expr)) | |
Not Sure How... | |
* list -- ('list', (values ...)) | |
* | |
# interesting thing I learned.... | |
# tuples are like C structs & thus slightly slower than lists | |
# https://stackoverflow.com/q/16940293 | |
# but ExprC needs to be immutable, so it is worth it. | |
Design choices | |
* immutable ExprC | |
** tradeoff: speed vs reliability (testing/debugging) | |
* Python | |
** tradeoff: quick-to-code vs slower-than-low-level-language | |
""" | |
import re | |
import sys | |
# for debugging | |
DEBUG = False | |
# constant error messages | |
NOT_IMPL = "\n\tNOT IMPLEMENTED\n" | |
UNEXPECTED = '\n\tUNEXPECTED EXPRESSION\n' | |
LONELY_PAREN = '\n\tFOUND LONELY CLOSING PARENTHESIS ")"\n' | |
# Read-eval-print loop | |
def repl(): | |
print("\nWELCOME TO LISP-ish") | |
print("type `quit()` or `exit()` or press CTRL+D to quit.\n") | |
while(1): | |
try: | |
text = input('<> ') | |
print("text:\t", text) if DEBUG else None | |
except: | |
print('You quit via CTRL+D') | |
return 0 | |
if (text == "quit()" or text == 'exit()' ): | |
print("GOODBYE!") | |
return 0 | |
elif (text == ""): | |
continue | |
else: | |
print(top_interp(text)) | |
# top-level interpreter | |
def top_interp(expr): | |
return interp(parse(tokenize(expr))) | |
""" | |
INTERP FUNCTIONS | |
""" | |
# TODO: implement interp | |
# return the actual value after interpreting the ExprC | |
def interp(E): | |
t, *v = E # unpack the type and values | |
if (t == 'num'): # if num then return that value | |
return v[0] | |
elif isinstance(t,tuple): # if tuple, read the type | |
print("hello") | |
t2, v2 = t | |
if (v2 == 'list'): | |
print("hello") | |
return [interp(n) for n in v] | |
print('t:\t', t) if DEBUG else None | |
print('v:\t', v) if DEBUG else None | |
return E | |
# raise Exception(NOT_IMPL) | |
""" | |
PARSE FUNCTIONS | |
""" | |
# return a tuple of ExprC given a tokenized list of strings | |
# calls parse recursively to grow the AST | |
def parse(expr): | |
# read a token # [token, [expr...]] # [first, [rest...]] | |
print("expr:\t", expr) if DEBUG else None | |
token = expr.pop(0) # pop(0) pops from the front | |
print("token:\t", token) if DEBUG else None | |
if (token == '('): | |
AST = [] | |
print('got (') if DEBUG else None | |
print('rest:\t', expr) if DEBUG else None | |
while (expr[0] != ')'): # keep parsing the rest until ')' | |
print("AST", AST) if DEBUG else None | |
AST.append(parse(expr)) | |
expr.pop(0) # the ')' must be popped in case of contiguous lists | |
return tuple(AST) | |
elif (token == ')'): | |
print('got )') if DEBUG else None | |
raise Exception(LONELY_PAREN) | |
elif (is_num(token)): # check if number | |
print('got num') if DEBUG else None | |
return ('num', is_num(token)) | |
elif (isinstance(token, str)): | |
# notice "foo" is lisp string but 'foo and 'foo' are a symbols | |
if (token[0] == '\"' and token[-1] == '\"'): | |
print('got str') if DEBUG else None | |
return ('str', token[1:-1]) | |
print('got sym') if DEBUG else None | |
# anything else is a symbol | |
if (token[0] == '\'' and token[-1] == '\''): | |
return ('sym', token[1:-1]) | |
if (token[0] == '\''): | |
return ('sym', token[1:]) | |
if (token[-1] == '\''): | |
return ('sym', token[:-1]) | |
return ('sym', token) | |
else: | |
raise Exception(UNEXPECTED) | |
# given a string, return the number if it represents a number value | |
# Number is either int or float | |
def is_num(expr): | |
try: | |
return int(expr) | |
except: | |
try: | |
return float(expr) | |
except: | |
return False | |
# return a string with replaced characters based on a dictionary, R. | |
# performs replacement in a single pass. | |
def replace_n(R, text): | |
# escape any special characters in replacements | |
R = { re.escape(k):v for k, v in R.items() } | |
# create a pattern for use in regex | |
# i.e. R = {'(':" ( ", ')':' ) '} >> P = r'foo|bar' | |
pattern = re.compile('|'.join(R.keys())) | |
# look up match and do the substitution if it is in dictionary | |
return pattern.sub(lambda match: R[re.escape(match.group(0))], text) | |
# return list representation of Lisp code as string | |
# given "(first [1, '2'])" expect ['(', 'first', "[1,'2']", ')'] | |
def tokenize(s): | |
R = {'(':' ( ', ')':' ) ', | |
'[':' [ ', ']':' ] ', | |
'{':' { ', '}':' } '} | |
# safely space out the parenthesis, then tokenize | |
return replace_n(R, s).split() | |
""" | |
TEST CASES | |
""" | |
def do_assert(output, expect): | |
msg = "\noutput:\t{} \n!=\nexpect:\t{}\n".format(output, expect) | |
assert output == expect, msg | |
def test_replace_n(): | |
print("test_replace_n...", end="") | |
# TEST replace_n | |
R = {'foo':'very', 'bar':'fizzbuzz'} | |
given = "apple pie is foo delicious, bar!" | |
expect = "apple pie is very delicious, fizzbuzz!" | |
output = replace_n(R, given) | |
do_assert(output, expect) | |
print("\tdone") | |
def test_is_num(): | |
print("test_is_num...", end="") | |
# TEST is_num | |
# int | |
do_assert(is_num('42'), 42) | |
do_assert(is_num('-42'), -42) | |
# float | |
do_assert(is_num('42.0'), 42.0) | |
do_assert(is_num('-42.0'), -42.0) | |
# non-numerical | |
do_assert(is_num('abc'), False) | |
do_assert(is_num('(42)'), False) | |
print("\t\tdone") | |
def test_tokenize(): | |
print("test_tokenize...", end="") | |
# TEST tokenize | |
given = "(+ 1 (* 4 2)) (foo 42)" | |
output = tokenize(given) | |
expect = ['(', '+', '1', '(', '*','4','2',')',')','(','foo','42',')'] | |
do_assert(output, expect) | |
given = '(foo (42))' | |
expect = ['(', 'foo', '(', '42', ')', ')'] | |
do_assert(tokenize(given), expect) | |
given = '(first (list 1 (+ 2 3) 9))' | |
expect = ['(','first','(','list','1','(','+','2','3',')','9',')',')'] | |
do_assert(tokenize(given), expect) | |
print("\tdone") | |
def test_parse(): | |
print("test_parse...", end="") | |
# TEST parse | |
given = ['1','2', "3", "[1,2]"] | |
expect = ('num',1) # lisp repl seems to only parse the first thing | |
do_assert(parse(given), expect) | |
given = ['1'] | |
expect = (('num', 1)) | |
do_assert(parse(given), expect) | |
given = ['"string"'] | |
expect = (('str', 'string')) | |
do_assert(parse(given), expect) | |
given = ['symbol'] | |
same_expect = (('sym', 'symbol')) | |
do_assert(parse(given), same_expect) | |
given = ['\'symbol'] | |
do_assert(parse(given), same_expect) | |
given = ['\'symbol\''] | |
do_assert(parse(given), same_expect) | |
given = ['symbol\''] | |
do_assert(parse(given), same_expect) | |
given = ['(', 'func', '99', ')'] | |
expect = (('sym', 'func'), ('num', 99)) | |
do_assert(parse(given), expect) | |
given = ['(', 'first', '(', '1', '2', ')', ')'] | |
expect = (('sym', 'first'), (('num', 1), ('num', 2))) | |
do_assert(parse(given), expect) | |
given = ['(', 'foo', '(', '99', ')', '(', '99', ')', ')'] | |
expect = (('sym', 'foo'), (('num', 99),),(('num', 99),)) | |
do_assert(parse(given), expect) | |
given = ['(', 'first', '(', 'list', '1', | |
'(', '+', '2', '3', ')', '9', ')', ')'] | |
expect = (('sym', 'first'), (('sym', 'list'), ('num', 1), | |
(('sym', '+'), ('num', 2), ('num', 3)), ('num', 9))) | |
do_assert(parse(given), expect) | |
print("\t\tdone") | |
def test_interp(): | |
print("test_interp...", end="") | |
given = ('num', 3) | |
expect = 3 | |
do_assert(interp(given), expect) | |
print("\t\tdone") | |
def test_all(): | |
test_replace_n() | |
test_is_num() | |
test_tokenize() | |
test_parse() | |
test_interp() | |
print("\nALL TESTS PASSING\n") | |
if __name__ == "__main__": | |
if (len(sys.argv) > 1 and sys.argv[1].lower() == "test"): | |
test_all() | |
sys.exit(0) # read this with `echo $?` in shell | |
elif (len(sys.argv) > 1 and sys.argv[1].lower() == "debug"): | |
DEBUG = True | |
repl() | |
sys.exit(0) | |
else: | |
repl() | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment