Created
March 2, 2019 03:10
-
-
Save mhagiwara/a6fb0309f26ceb91d46d1b3ebed34d34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from ast import literal_eval | |
def parse1(code): | |
"""Approach 1 — use Python's internal literal parser to parse code. | |
Parameter: | |
code (str): code string | |
Returns: | |
ast (list): parsed AST represented by a (nested) list. None if failed to parse. | |
""" | |
# normalize spaces etc. | |
code = re.sub(r'[ \n]+', ' ', code) | |
# quote everything except parentheses and numbers | |
code = re.sub(r'([^()\d ]+)', '"\\1"', code) | |
# replace parentheses with square brackets | |
code = code.replace('(', '[').replace(')', ']') | |
# insert commas | |
code = code.replace(' ', ', ') | |
try: | |
# parse it | |
ast = literal_eval(code) | |
return ast | |
except SyntaxError: | |
return None | |
def tokenize(code): | |
"""Tokenizer for Lisp | |
Parameter: | |
code (str): code string | |
Returns: | |
tokens (list): list of tokens | |
""" | |
# normalize spaces etc. | |
code = re.sub(r'[ \n]+', ' ', code) | |
# split into tokens | |
tokens = re.findall(r'\(|\)|[^() ]+', code) | |
# convert numbers | |
tokens = [int(token) if re.match('\d+', token) else token for token in tokens] | |
return tokens | |
def rindex(haystack, needle): | |
"""Return the rightmost index of `needle` in `haystack`, or -1 if not found.""" | |
try: | |
start_pos = len(haystack) - haystack[::-1].index(needle) - 1 | |
except ValueError: | |
start_pos = -1 | |
return start_pos | |
def parse2(code): | |
"""Approach 2 — use my own tokenizer and stack-based parser. | |
Parameter: | |
code (str): code string | |
Returns: | |
ast (list): parsed AST represented by a (nested) list. None if failed to parse. | |
""" | |
tokens = tokenize(code) | |
queue = [] | |
for token in tokens: | |
if token == '(': | |
queue.append(token) | |
elif token == ')': | |
start_pos = rindex(queue, '(') # find an opening parenthesis | |
if start_pos == -1: | |
return None | |
sub_list = queue[start_pos + 1:] # extract last N elements as a list | |
queue[start_pos:] = [sub_list] # replace | |
else: | |
queue.append(token) | |
return queue[0] | |
def test(): | |
# test code 1 (from the task description) | |
code1 = "(first (list 1 (+ 2 3) 9))" | |
# test code 2 (from Wikipedia) | |
code2 = """(defun factorial (n) | |
(if (= n 0) 1 | |
(* n (factorial (- n 1)))))""" | |
# test code 3 (with a syntax error) | |
code3 = """(+ 1 2))""" | |
print(parse1(code1)) | |
print(parse1(code2)) | |
print(parse1(code3)) | |
print(parse2(code1)) | |
print(parse2(code2)) | |
print(parse2(code3)) | |
if __name__ == '__main__': | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment