Skip to content

Instantly share code, notes, and snippets.

@veryeli
Last active July 16, 2018 00:18
Show Gist options
  • Save veryeli/805ef91bdfe476a254ca45b5c98d241b to your computer and use it in GitHub Desktop.
Save veryeli/805ef91bdfe476a254ca45b5c98d241b to your computer and use it in GitHub Desktop.
Lisp Parser for Recurse Center Interview
# Parse an abstrat syntax tree from a Lisp expression
def tokenize(expression):
'''Ensures the expression is parsed into words and parentheses.'''
# only tokenize if it's not already tokenized
if type(expression) == list:
return expression
if type(expression) == str:
# put spaces around parentheses so they get tokenized separately
expression = expression.replace('(', ' ( ').replace(')', ' ) ')
return expression.split()
def is_balanced(expression):
'''Check whether the parentheses are balanced
We have to keep track of the 'depth' rather than just counting
open and close parens because e.g. ')(()' is not a balanced
expression despite having the same # of open and close parens
'''
depth = 0
for i, char in enumerate(expression):
depth += 1 if char is '(' else -1 if char is ')' else 0
if depth < 0:
return False
if depth > 0:
return False
return True
def validate(expression):
'''Ensure the expression is valid Lisp'''
if not is_balanced(expression):
raise RuntimeError("%s is not balanced" % ' '.join(expression))
if len(expression) > 1 and expression[0] != '(':
raise RuntimeError("any multi atom expression must be in parentheses")
def _parse(expression):
'''Parses a string of tokens into an AST'''
# if the expression has parentheses, evaluate what's inside
if expression[0] == '(':
expression = expression[1:-1]
# otherwise, it's an atom, so just return the atom
else:
return expression[0]
# process the expression, sending any parenthesized portions
# to be parsed recursively
parsed = []
while len(expression):
# if there's a sub-expression, recursively parse it
if expression[0] == '(':
sub_e = []
for e in expression:
sub_e.append(e)
# the sub-expression is ready when it's balanced
if is_balanced(sub_e):
break
# we add the parsed sub-expression to our list and shorten
# the remaining expression we're looking at
expression = expression[len(sub_e):]
parsed.append(_parse(sub_e))
else:
parsed.append(expression[0])
expression = expression[1:]
return parsed
def parse(expression):
'''Parse a Lisp expression into an AST
Most of the work for this function happens in _parse,
which parse primarily serves as a wrapper for.
parse performs tokenization and basic validation, then
sends the tokenized list to _parse for parsing'''
expression = tokenize(expression)
validate(expression)
return _parse(expression)
def _test():
# assert statements are a low-key way of checking code
# ensure the rc example works
assert parse("(first(list 1(+ 2 3) 9))") == [
'first',
['list', '1', ['+', '2', '3'], '9']]
# ensure we can have adjacent sub-expressions with branched recursive structure
assert parse("(first(list 1(+ 2 3) 9)(list 1 (+ 2 3) 9))") == [
'first',
['list', '1', ['+', '2', '3'], '9'],
['list', '1', ['+', '2', '3'], '9']]
# ensure atoms work
assert parse("7") == "7"
# and also empty lists
assert parse("()") == []
# and also lists of empty lists
assert parse("(()())") == [[], []]
# and that we'll catch an unbalanced expression
assert is_balanced("((())") is False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment