Skip to content

Instantly share code, notes, and snippets.

@mhagiwara
Created March 2, 2019 03:10
Show Gist options
  • Save mhagiwara/a6fb0309f26ceb91d46d1b3ebed34d34 to your computer and use it in GitHub Desktop.
Save mhagiwara/a6fb0309f26ceb91d46d1b3ebed34d34 to your computer and use it in GitHub Desktop.
import re
from ast import literal_eval
def parse1(code):
"""Approach 1 — use Python's internal literal parser to parse code.
Parameter:
code (str): code string
Returns:
ast (list): parsed AST represented by a (nested) list. None if failed to parse.
"""
# normalize spaces etc.
code = re.sub(r'[ \n]+', ' ', code)
# quote everything except parentheses and numbers
code = re.sub(r'([^()\d ]+)', '"\\1"', code)
# replace parentheses with square brackets
code = code.replace('(', '[').replace(')', ']')
# insert commas
code = code.replace(' ', ', ')
try:
# parse it
ast = literal_eval(code)
return ast
except SyntaxError:
return None
def tokenize(code):
"""Tokenizer for Lisp
Parameter:
code (str): code string
Returns:
tokens (list): list of tokens
"""
# normalize spaces etc.
code = re.sub(r'[ \n]+', ' ', code)
# split into tokens
tokens = re.findall(r'\(|\)|[^() ]+', code)
# convert numbers
tokens = [int(token) if re.match('\d+', token) else token for token in tokens]
return tokens
def rindex(haystack, needle):
"""Return the rightmost index of `needle` in `haystack`, or -1 if not found."""
try:
start_pos = len(haystack) - haystack[::-1].index(needle) - 1
except ValueError:
start_pos = -1
return start_pos
def parse2(code):
"""Approach 2 — use my own tokenizer and stack-based parser.
Parameter:
code (str): code string
Returns:
ast (list): parsed AST represented by a (nested) list. None if failed to parse.
"""
tokens = tokenize(code)
queue = []
for token in tokens:
if token == '(':
queue.append(token)
elif token == ')':
start_pos = rindex(queue, '(') # find an opening parenthesis
if start_pos == -1:
return None
sub_list = queue[start_pos + 1:] # extract last N elements as a list
queue[start_pos:] = [sub_list] # replace
else:
queue.append(token)
return queue[0]
def test():
# test code 1 (from the task description)
code1 = "(first (list 1 (+ 2 3) 9))"
# test code 2 (from Wikipedia)
code2 = """(defun factorial (n)
(if (= n 0) 1
(* n (factorial (- n 1)))))"""
# test code 3 (with a syntax error)
code3 = """(+ 1 2))"""
print(parse1(code1))
print(parse1(code2))
print(parse1(code3))
print(parse2(code1))
print(parse2(code2))
print(parse2(code3))
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment