Last active
July 16, 2018 00:18
-
-
Save veryeli/805ef91bdfe476a254ca45b5c98d241b to your computer and use it in GitHub Desktop.
Lisp Parser for Recurse Center Interview
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse an abstrat syntax tree from a Lisp expression | |
def tokenize(expression): | |
'''Ensures the expression is parsed into words and parentheses.''' | |
# only tokenize if it's not already tokenized | |
if type(expression) == list: | |
return expression | |
if type(expression) == str: | |
# put spaces around parentheses so they get tokenized separately | |
expression = expression.replace('(', ' ( ').replace(')', ' ) ') | |
return expression.split() | |
def is_balanced(expression): | |
'''Check whether the parentheses are balanced | |
We have to keep track of the 'depth' rather than just counting | |
open and close parens because e.g. ')(()' is not a balanced | |
expression despite having the same # of open and close parens | |
''' | |
depth = 0 | |
for i, char in enumerate(expression): | |
depth += 1 if char is '(' else -1 if char is ')' else 0 | |
if depth < 0: | |
return False | |
if depth > 0: | |
return False | |
return True | |
def validate(expression): | |
'''Ensure the expression is valid Lisp''' | |
if not is_balanced(expression): | |
raise RuntimeError("%s is not balanced" % ' '.join(expression)) | |
if len(expression) > 1 and expression[0] != '(': | |
raise RuntimeError("any multi atom expression must be in parentheses") | |
def _parse(expression): | |
'''Parses a string of tokens into an AST''' | |
# if the expression has parentheses, evaluate what's inside | |
if expression[0] == '(': | |
expression = expression[1:-1] | |
# otherwise, it's an atom, so just return the atom | |
else: | |
return expression[0] | |
# process the expression, sending any parenthesized portions | |
# to be parsed recursively | |
parsed = [] | |
while len(expression): | |
# if there's a sub-expression, recursively parse it | |
if expression[0] == '(': | |
sub_e = [] | |
for e in expression: | |
sub_e.append(e) | |
# the sub-expression is ready when it's balanced | |
if is_balanced(sub_e): | |
break | |
# we add the parsed sub-expression to our list and shorten | |
# the remaining expression we're looking at | |
expression = expression[len(sub_e):] | |
parsed.append(_parse(sub_e)) | |
else: | |
parsed.append(expression[0]) | |
expression = expression[1:] | |
return parsed | |
def parse(expression): | |
'''Parse a Lisp expression into an AST | |
Most of the work for this function happens in _parse, | |
which parse primarily serves as a wrapper for. | |
parse performs tokenization and basic validation, then | |
sends the tokenized list to _parse for parsing''' | |
expression = tokenize(expression) | |
validate(expression) | |
return _parse(expression) | |
def _test(): | |
# assert statements are a low-key way of checking code | |
# ensure the rc example works | |
assert parse("(first(list 1(+ 2 3) 9))") == [ | |
'first', | |
['list', '1', ['+', '2', '3'], '9']] | |
# ensure we can have adjacent sub-expressions with branched recursive structure | |
assert parse("(first(list 1(+ 2 3) 9)(list 1 (+ 2 3) 9))") == [ | |
'first', | |
['list', '1', ['+', '2', '3'], '9'], | |
['list', '1', ['+', '2', '3'], '9']] | |
# ensure atoms work | |
assert parse("7") == "7" | |
# and also empty lists | |
assert parse("()") == [] | |
# and also lists of empty lists | |
assert parse("(()())") == [[], []] | |
# and that we'll catch an unbalanced expression | |
assert is_balanced("((())") is False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment