mfekadu/.gitignore

## .gitignore

.DS_Store

## lisp.py
"""
Here is what an `expr` can be...

expr = int    # number
     | symbol # variable
     | str    # string
     | (if expr expr expr) # control flow
     | (expr expr ...)     # function call

Top-level overview

parse first tokenizes string input
then creates an Abstract Syntax Tree based on the text

ExprC Structures (represented by tuples because tuples are immutable):
* <DataType> -- (<type>, <value> ...)
* Number     -- ('num', 42)
* Symbol     -- ('sym', 'name')
* String     -- ('str', 'value')
* If         -- ('if', 'cond', 'then', 'else')
* func       -- (('sym', name), args ... )
Optionally...
* Func       -- ('func', 'name', (args))
* bin_func   -- ('bin', (left_expr), (right_expr))
Not Sure How...
* list       -- ('list', (values ...))
*

# interesting thing I learned....
# tuples are like C structs & thus slightly slower than lists
# https://stackoverflow.com/q/16940293
# but ExprC needs to be immutable, so it is worth it.

Design choices
* immutable ExprC
** tradeoff: speed vs reliability (testing/debugging)
* Python
** tradeoff: quick-to-code vs slower-than-low-level-language
"""

import re
import sys

# for debugging
DEBUG = False

# constant error messages
NOT_IMPL = "\n\tNOT IMPLEMENTED\n"
UNEXPECTED = '\n\tUNEXPECTED EXPRESSION\n'
LONELY_PAREN = '\n\tFOUND LONELY CLOSING PARENTHESIS ")"\n'

# Read-eval-print loop
def repl():
    print("\nWELCOME TO LISP-ish")
    print("type `quit()` or `exit()` or press CTRL+D to quit.\n")
    while(1):
        try:
            text = input('<> ')
            print("text:\t", text) if DEBUG else None
        except:
            print('You quit via CTRL+D')
            return 0
        if (text == "quit()" or text == 'exit()' ):
            print("GOODBYE!")
            return 0
        elif (text == ""):
            continue
        else:
            print(top_interp(text))

# top-level interpreter
def top_interp(expr):
    return interp(parse(tokenize(expr)))

"""
INTERP FUNCTIONS
"""

# TODO: implement interp
# return the actual value after interpreting the ExprC
def interp(E):

    t, *v = E # unpack the type and values

    if (t == 'num'): # if num then return that value
        return v[0]
    elif isinstance(t,tuple): # if tuple, read the type
        print("hello")
        t2, v2 = t
        if (v2 == 'list'):
            print("hello")
            return [interp(n) for n in v]

    print('t:\t', t) if DEBUG else None
    print('v:\t', v) if DEBUG else None

    return E
    # raise Exception(NOT_IMPL)

"""
PARSE FUNCTIONS
"""

# return a tuple of ExprC given a tokenized list of strings

# calls parse recursively to grow the AST

def parse(expr):
    # read a token # [token, [expr...]] # [first, [rest...]]
    print("expr:\t", expr) if DEBUG else None
    token = expr.pop(0) # pop(0) pops from the front
    print("token:\t", token) if DEBUG else None

    if (token == '('):
        AST = []
        print('got (') if DEBUG else None
        print('rest:\t', expr) if DEBUG else None
        while (expr[0] != ')'): # keep parsing the rest until ')'
            print("AST", AST) if DEBUG else None
            AST.append(parse(expr))
        expr.pop(0) # the ')' must be popped in case of contiguous lists
        return tuple(AST)
    elif (token == ')'):
        print('got )') if DEBUG else None
        raise Exception(LONELY_PAREN)
    elif (is_num(token)): # check if number
        print('got num') if DEBUG else None
        return ('num', is_num(token))
    elif (isinstance(token, str)):
        # notice "foo" is lisp string but 'foo and 'foo' are a symbols
        if (token[0] == '\"' and token[-1] == '\"'):
            print('got str') if DEBUG else None
            return ('str', token[1:-1])
        print('got sym') if DEBUG else None
        # anything else is a symbol
        if (token[0] == '\'' and token[-1] == '\''):
            return ('sym', token[1:-1])
        if (token[0] == '\''):
            return ('sym', token[1:])
        if (token[-1] == '\''):
            return ('sym', token[:-1])
        return ('sym', token)
    else:
        raise Exception(UNEXPECTED)

# given a string, return the number if it represents a number value
# Number is either int or float
def is_num(expr):
    try:
        return int(expr)
    except:
        try:
            return float(expr)
        except:
            return False

# return a string with replaced characters based on a dictionary, R.
# performs replacement in a single pass.
def replace_n(R, text):
    # escape any special characters in replacements
    R = { re.escape(k):v for k, v in R.items() }

    # create a pattern for use in regex
    # i.e. R = {'(':" ( ", ')':' ) '} >> P = r'foo|bar'
    pattern = re.compile('|'.join(R.keys()))

    # look up match and do the substitution if it is in dictionary
    return pattern.sub(lambda match: R[re.escape(match.group(0))], text)

# return list representation of Lisp code as string
# given "(first [1, '2'])" expect ['(', 'first', "[1,'2']", ')']
def tokenize(s):
    R = {'(':' ( ', ')':' ) ',
         '[':' [ ', ']':' ] ',
         '{':' { ', '}':' } '}
    # safely space out the parenthesis, then tokenize
    return replace_n(R, s).split()

"""
TEST CASES
"""

def do_assert(output, expect):
    msg = "\noutput:\t{} \n!=\nexpect:\t{}\n".format(output, expect)
    assert output == expect, msg

def test_replace_n():
    print("test_replace_n...", end="")
    # TEST replace_n
    R = {'foo':'very', 'bar':'fizzbuzz'}
    given = "apple pie is foo delicious, bar!"
    expect = "apple pie is very delicious, fizzbuzz!"
    output = replace_n(R, given)
    do_assert(output, expect)
    print("\tdone")

def test_is_num():
    print("test_is_num...", end="")
    # TEST is_num
    # int
    do_assert(is_num('42'), 42)
    do_assert(is_num('-42'), -42)
    # float
    do_assert(is_num('42.0'), 42.0)
    do_assert(is_num('-42.0'), -42.0)
    # non-numerical
    do_assert(is_num('abc'), False)
    do_assert(is_num('(42)'), False)
    print("\t\tdone")

def test_tokenize():
    print("test_tokenize...", end="")
    # TEST tokenize
    given = "(+ 1 (* 4 2)) (foo 42)"
    output = tokenize(given)
    expect = ['(', '+', '1', '(', '*','4','2',')',')','(','foo','42',')']
    do_assert(output, expect)

    given = '(foo (42))'
    expect = ['(', 'foo', '(', '42', ')', ')']
    do_assert(tokenize(given), expect)

    given = '(first (list 1 (+ 2 3) 9))'
    expect = ['(','first','(','list','1','(','+','2','3',')','9',')',')']
    do_assert(tokenize(given), expect)
    print("\tdone")

def test_parse():
    print("test_parse...", end="")
    # TEST parse
    given = ['1','2', "3", "[1,2]"]
    expect = ('num',1) # lisp repl seems to only parse the first thing
    do_assert(parse(given), expect)

    given = ['1']
    expect = (('num', 1))
    do_assert(parse(given), expect)

    given = ['"string"']
    expect = (('str', 'string'))
    do_assert(parse(given), expect)

    given = ['symbol']
    same_expect = (('sym', 'symbol'))
    do_assert(parse(given), same_expect)

    given = ['\'symbol']
    do_assert(parse(given), same_expect)

    given = ['\'symbol\'']
    do_assert(parse(given), same_expect)

    given = ['symbol\'']
    do_assert(parse(given), same_expect)

    given = ['(', 'func', '99', ')']
    expect = (('sym', 'func'), ('num', 99))
    do_assert(parse(given), expect)

    given = ['(', 'first', '(',  '1', '2', ')', ')']
    expect = (('sym', 'first'), (('num', 1), ('num', 2)))
    do_assert(parse(given), expect)

    given = ['(', 'foo', '(', '99', ')', '(', '99', ')', ')']
    expect = (('sym', 'foo'), (('num', 99),),(('num', 99),))
    do_assert(parse(given), expect)

    given = ['(', 'first', '(', 'list', '1',
            '(', '+', '2', '3', ')', '9', ')', ')']
    expect = (('sym', 'first'), (('sym', 'list'), ('num', 1),
                (('sym', '+'), ('num', 2), ('num', 3)), ('num', 9)))
    do_assert(parse(given), expect)
    print("\t\tdone")

def test_interp():
    print("test_interp...", end="")
    given = ('num', 3)
    expect = 3
    do_assert(interp(given), expect)

    print("\t\tdone")

def test_all():
    test_replace_n()
    test_is_num()
    test_tokenize()
    test_parse()
    test_interp()
    print("\nALL TESTS PASSING\n")

if __name__ == "__main__":
    if (len(sys.argv) > 1 and sys.argv[1].lower() == "test"):
        test_all()
        sys.exit(0) # read this with `echo $?` in shell
    elif (len(sys.argv) > 1 and sys.argv[1].lower() == "debug"):
        DEBUG = True
        repl()
        sys.exit(0)
    else:
        repl()
        sys.exit(0)
	"""
	Here is what an `expr` can be...

	expr = int # number
	\| symbol # variable
	\| str # string
	\| (if expr expr expr) # control flow
	\| (expr expr ...) # function call

	Top-level overview

	parse first tokenizes string input
	then creates an Abstract Syntax Tree based on the text

	ExprC Structures (represented by tuples because tuples are immutable):
	* <DataType> -- (<type>, <value> ...)
	* Number -- ('num', 42)
	* Symbol -- ('sym', 'name')
	* String -- ('str', 'value')
	* If -- ('if', 'cond', 'then', 'else')
	* func -- (('sym', name), args ... )
	Optionally...
	* Func -- ('func', 'name', (args))
	* bin_func -- ('bin', (left_expr), (right_expr))
	Not Sure How...
	* list -- ('list', (values ...))
	*

	# interesting thing I learned....
	# tuples are like C structs & thus slightly slower than lists
	# https://stackoverflow.com/q/16940293
	# but ExprC needs to be immutable, so it is worth it.

	Design choices
	* immutable ExprC
	** tradeoff: speed vs reliability (testing/debugging)
	* Python
	** tradeoff: quick-to-code vs slower-than-low-level-language
	"""

	import re
	import sys

	# for debugging
	DEBUG = False

	# constant error messages
	NOT_IMPL = "\n\tNOT IMPLEMENTED\n"
	UNEXPECTED = '\n\tUNEXPECTED EXPRESSION\n'
	LONELY_PAREN = '\n\tFOUND LONELY CLOSING PARENTHESIS ")"\n'

	# Read-eval-print loop
	def repl():
	print("\nWELCOME TO LISP-ish")
	print("type `quit()` or `exit()` or press CTRL+D to quit.\n")
	while(1):
	try:
	text = input('<> ')
	print("text:\t", text) if DEBUG else None
	except:
	print('You quit via CTRL+D')
	return 0
	if (text == "quit()" or text == 'exit()' ):
	print("GOODBYE!")
	return 0
	elif (text == ""):
	continue
	else:
	print(top_interp(text))

	# top-level interpreter
	def top_interp(expr):
	return interp(parse(tokenize(expr)))

	"""
	INTERP FUNCTIONS
	"""

	# TODO: implement interp
	# return the actual value after interpreting the ExprC
	def interp(E):

	t, *v = E # unpack the type and values

	if (t == 'num'): # if num then return that value
	return v[0]
	elif isinstance(t,tuple): # if tuple, read the type
	print("hello")
	t2, v2 = t
	if (v2 == 'list'):
	print("hello")
	return [interp(n) for n in v]

	print('t:\t', t) if DEBUG else None
	print('v:\t', v) if DEBUG else None

	return E
	# raise Exception(NOT_IMPL)

	"""
	PARSE FUNCTIONS
	"""

	# return a tuple of ExprC given a tokenized list of strings

	# calls parse recursively to grow the AST

	def parse(expr):
	# read a token # [token, [expr...]] # [first, [rest...]]
	print("expr:\t", expr) if DEBUG else None
	token = expr.pop(0) # pop(0) pops from the front
	print("token:\t", token) if DEBUG else None

	if (token == '('):
	AST = []
	print('got (') if DEBUG else None
	print('rest:\t', expr) if DEBUG else None
	while (expr[0] != ')'): # keep parsing the rest until ')'
	print("AST", AST) if DEBUG else None
	AST.append(parse(expr))
	expr.pop(0) # the ')' must be popped in case of contiguous lists
	return tuple(AST)
	elif (token == ')'):
	print('got )') if DEBUG else None
	raise Exception(LONELY_PAREN)
	elif (is_num(token)): # check if number
	print('got num') if DEBUG else None
	return ('num', is_num(token))
	elif (isinstance(token, str)):
	# notice "foo" is lisp string but 'foo and 'foo' are a symbols
	if (token[0] == '\"' and token[-1] == '\"'):
	print('got str') if DEBUG else None
	return ('str', token[1:-1])
	print('got sym') if DEBUG else None
	# anything else is a symbol
	if (token[0] == '\'' and token[-1] == '\''):
	return ('sym', token[1:-1])
	if (token[0] == '\''):
	return ('sym', token[1:])
	if (token[-1] == '\''):
	return ('sym', token[:-1])
	return ('sym', token)
	else:
	raise Exception(UNEXPECTED)

	# given a string, return the number if it represents a number value
	# Number is either int or float
	def is_num(expr):
	try:
	return int(expr)
	except:
	try:
	return float(expr)
	except:
	return False

	# return a string with replaced characters based on a dictionary, R.
	# performs replacement in a single pass.
	def replace_n(R, text):
	# escape any special characters in replacements
	R = { re.escape(k):v for k, v in R.items() }

	# create a pattern for use in regex
	# i.e. R = {'(':" ( ", ')':' ) '} >> P = r'foo\|bar'
	pattern = re.compile('\|'.join(R.keys()))

	# look up match and do the substitution if it is in dictionary
	return pattern.sub(lambda match: R[re.escape(match.group(0))], text)

	# return list representation of Lisp code as string
	# given "(first [1, '2'])" expect ['(', 'first', "[1,'2']", ')']
	def tokenize(s):
	R = {'(':' ( ', ')':' ) ',
	'[':' [ ', ']':' ] ',
	'{':' { ', '}':' } '}
	# safely space out the parenthesis, then tokenize
	return replace_n(R, s).split()

	"""
	TEST CASES
	"""

	def do_assert(output, expect):
	msg = "\noutput:\t{} \n!=\nexpect:\t{}\n".format(output, expect)
	assert output == expect, msg

	def test_replace_n():
	print("test_replace_n...", end="")
	# TEST replace_n
	R = {'foo':'very', 'bar':'fizzbuzz'}
	given = "apple pie is foo delicious, bar!"
	expect = "apple pie is very delicious, fizzbuzz!"
	output = replace_n(R, given)
	do_assert(output, expect)
	print("\tdone")

	def test_is_num():
	print("test_is_num...", end="")
	# TEST is_num
	# int
	do_assert(is_num('42'), 42)
	do_assert(is_num('-42'), -42)
	# float
	do_assert(is_num('42.0'), 42.0)
	do_assert(is_num('-42.0'), -42.0)
	# non-numerical
	do_assert(is_num('abc'), False)
	do_assert(is_num('(42)'), False)
	print("\t\tdone")

	def test_tokenize():
	print("test_tokenize...", end="")
	# TEST tokenize
	given = "(+ 1 (* 4 2)) (foo 42)"
	output = tokenize(given)
	expect = ['(', '+', '1', '(', '*','4','2',')',')','(','foo','42',')']
	do_assert(output, expect)

	given = '(foo (42))'
	expect = ['(', 'foo', '(', '42', ')', ')']
	do_assert(tokenize(given), expect)

	given = '(first (list 1 (+ 2 3) 9))'
	expect = ['(','first','(','list','1','(','+','2','3',')','9',')',')']
	do_assert(tokenize(given), expect)
	print("\tdone")

	def test_parse():
	print("test_parse...", end="")
	# TEST parse
	given = ['1','2', "3", "[1,2]"]
	expect = ('num',1) # lisp repl seems to only parse the first thing
	do_assert(parse(given), expect)

	given = ['1']
	expect = (('num', 1))
	do_assert(parse(given), expect)

	given = ['"string"']
	expect = (('str', 'string'))
	do_assert(parse(given), expect)

	given = ['symbol']
	same_expect = (('sym', 'symbol'))
	do_assert(parse(given), same_expect)

	given = ['\'symbol']
	do_assert(parse(given), same_expect)

	given = ['\'symbol\'']
	do_assert(parse(given), same_expect)

	given = ['symbol\'']
	do_assert(parse(given), same_expect)

	given = ['(', 'func', '99', ')']
	expect = (('sym', 'func'), ('num', 99))
	do_assert(parse(given), expect)

	given = ['(', 'first', '(', '1', '2', ')', ')']
	expect = (('sym', 'first'), (('num', 1), ('num', 2)))
	do_assert(parse(given), expect)

	given = ['(', 'foo', '(', '99', ')', '(', '99', ')', ')']
	expect = (('sym', 'foo'), (('num', 99),),(('num', 99),))
	do_assert(parse(given), expect)

	given = ['(', 'first', '(', 'list', '1',
	'(', '+', '2', '3', ')', '9', ')', ')']
	expect = (('sym', 'first'), (('sym', 'list'), ('num', 1),
	(('sym', '+'), ('num', 2), ('num', 3)), ('num', 9)))
	do_assert(parse(given), expect)
	print("\t\tdone")

	def test_interp():
	print("test_interp...", end="")
	given = ('num', 3)
	expect = 3
	do_assert(interp(given), expect)

	print("\t\tdone")

	def test_all():
	test_replace_n()
	test_is_num()
	test_tokenize()
	test_parse()
	test_interp()
	print("\nALL TESTS PASSING\n")

	if __name__ == "__main__":
	if (len(sys.argv) > 1 and sys.argv[1].lower() == "test"):
	test_all()
	sys.exit(0) # read this with `echo $?` in shell
	elif (len(sys.argv) > 1 and sys.argv[1].lower() == "debug"):
	DEBUG = True
	repl()
	sys.exit(0)
	else:
	repl()
	sys.exit(0)