Skip to content

Instantly share code, notes, and snippets.

@NQNStudios
Created December 6, 2017 00:08
Show Gist options
  • Save NQNStudios/0a799a76c04d075d14f7e334b8d8feba to your computer and use it in GitHub Desktop.
Save NQNStudios/0a799a76c04d075d14f7e334b8d8feba to your computer and use it in GitHub Desktop.
Simple LISP parser.
# Return the given token as either a string, or the proper number type
def properType(token):
# This will throw an exception if it fails
try:
return int(token)
# Try again to return a float
except ValueError:
return float(token)
# Otherwise it's just a string.
finally:
return token
# Return the contents of the first pair of quotes encountered.
# Second return value is the number of characters consumed
def getString(lisp):
# TODO allow backslash to escape quotes inside of strings
# The first character will either be " or '. Search for the matching quote
# to terminate the strings
string_contents = lisp[1:lisp.find(lisp[0], 1)]
return string_contents, len(string_contents) + 2
# Recursive: Return a the list of the elements in the given Lisp string, or
# the atom specified.
# Second return value is the number of characters consumed, for the caller to
# skip over
# Last return value is True if a list was returned.
def getElements(lisp):
elements = []
# Just tokenize until we encounter '(',')', quotes, or the end of the string.
current_token = ''
current_index = 0
while current_index < len(lisp):
char = lisp[current_index]
# Encounter a space
if char == ' ':
# If this string terminates a token, add the token as an element
# Because of this check, multiple spaces in a row won't yield empty tokens
if len(current_token) > 0:
elements.append(properType(current_token))
current_token = ''
# Encounter the start of another list
elif char == '(':
# Although it's a minor syntax error for ( to occur in the middle
# of another raw token, we can cover for the user by treating the (
# as that token's termination
if len(current_token) > 0:
elements.append(properType(current_token))
current_token = ''
# Recursively call the function and add the result as an element
child_elements, chars_consumed = getElements(lisp[current_index+1:])
elements.append(child_elements)
current_index += chars_consumed + 1
# The termination of a list means we're definitely returning a list
elif char == ')':
# If a token was started, add it
if len(current_token) > 0:
elements.append(current_token)
return elements, current_index
# Single or double quotes start a string
elif char == '"' or char =="'":
# Although it's a minor syntax error for a quote to occur in the middle
# of another raw token, we can cover for the user by treating the (
# as that token's termination
if len(current_token) > 0:
elements.append(properType(current_token))
current_token = ''
string_contents, chars_consumed = getString(lisp[current_index:])
elements.append(string_contents)
current_index += chars_consumed
# Normal characters just get added to the current token
else:
current_token += char
# Keep track of our position in the string
current_index += 1
# If reaching the end of the string, add the last token unless it's removeEmptyElements
if len(current_token) > 0:
elements.append(current_token)
return elements, current_index
if __name__ == "__main__":
# Lisp REPL
while True:
# User enters Lisp code in the console -- we assume the Lisp to be valid
lispCode = raw_input('> ')
# getElements() always returns a list, even at root level
# Take the first element of the root because the input must be a valid lisp expression,
# not multiple.
astRoot = getElements(lispCode)[0][0]
print (astRoot)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment