Skip to content

Instantly share code, notes, and snippets.

@Yoxem
Last active May 11, 2020 16:25
Show Gist options
  • Save Yoxem/9b14becac0642279b077ab80b16df2a1 to your computer and use it in GitHub Desktop.
Save Yoxem/9b14becac0642279b077ab80b16df2a1 to your computer and use it in GitHub Desktop.
Compiler Test
import re
# test code
# test = "1 + 2 * (23 / 12)"
test1 = "(123 (88) 1) 9(8 7) "
test2 = '''
12
(+ 2 3)
(let (int x) 2)
(let (int y) (+ 5 6))
(l ((int a) (int b)) (
(+ x a)
))
((l ((int x) (int y)) ((+ x y))) 3 2)
'''
test3 = '''
(let a 10)
(l (x y z)
(
(let g 17)
(let b 12)
(let c
(l (x y)
((let g 12)
(let c1 3)
(sum g c1 b a))
))))
'''
# TOKEN
INT = r'(?P<INT>\d+)'
SYMBOL = r'(?P<SYMBOL>[a-zA-Z_+\-*/]+)'
LPARE = r'(?P<LPARE>\()'
RPARE = r'(?P<RPARE>\))'
WSNL = r'(?P<WS>(\s|\n|\r)+)'# whitespace & newline
token_pattern_list = [INT, SYMBOL , LPARE, RPARE, WSNL]
def tokenize(token_pattern_list, src):
token_pattern = re.compile("|".join(token_pattern_list))
raw_token_result_list = []
for match in re.finditer(token_pattern, src):
raw_token_result_list.append((match.lastgroup, match.group(0)))
# [(None, None)] 必加,以避免出錯
token_result_list = [i for i in raw_token_result_list if i[0] != "WS"] + [(None, None)]
return token_result_list
token_result_list = tokenize(token_pattern_list, test2)
print(token_result_list)
"""
exprs = expr +
expr = ( expr + ) | term
term = int | symbol
factor = number
"""
class Parser:
def __init__(self):
self.token = None
self.remained_token = None
def parse(self, token_list):
if token_list[0] == (None, None):
print("The compiled content is empty. Stop parsing.")
return None
self.token = token_list[0]
self.remained_token = token_list[1:]
result = self.exprs()
return result
def advance(self):
self.token = self.remained_token[0]
self.remained_token = self.remained_token[1:]
def exprs(self):
result_content = []
while self.token != (None, None):
result_item = self.expr()
result_content.append(result_item)
result = {"content" : result_content, "type": None}
return result
def expr(self):
if self.token[0] == "LPARE":
result_content = []
self.advance()
while self.token[0] != "RPARE":
result_item = self.expr()
result_content.append(result_item)
result = {"content" : result_content, "type": None}
self.advance()
else:
result = self.term()
return result
def term(self):
result = {"content" : None, "type": None}
if self.token[0] == "INT":
result["content"] = int(self.token[1])
self.advance()
elif self.token[0] == "SYMBOL":
result["content"] = self.token[1]
self.advance()
else:
if self.token[1] == None:
raise Exception("The right parethesis may be missed.")
else:
raise Exception("Expect integer or symbol, find \"%s\"" % self.token[1])
return result
parser = Parser()
src_tree = parser.parse(token_result_list)
print(src_tree)
class TypeChecker:
def __init__(self):
self.var_type_frame_list = self.extend_var_type_frame_list([])
self.original_var_type_pair = \
{"+" : ["->", "int", "int", "int"],
"-" : ["->", "int", "int", "int"],
"*" : ["->", "int", "int", "int"]}
for key, value in self.original_var_type_pair.items():
self.var_type_frame_list[-1][key] = value
print(self.var_type_frame_list)
def extend_var_type_frame_list(self, var_type_frame_list):
new_dict = dict()
cloned_var_type_frame_list = var_type_frame_list.copy()
cloned_var_type_frame_list.append(new_dict)
return cloned_var_type_frame_list
def type_check_src_tree(self, src_tree):
# get src_lines
result = self.type_check_lines(src_tree, self.var_type_frame_list)[0]
return result
def check_function_arg_num(self, function_type, content):
function_arg_len = len(function_type) - 2
if function_arg_len != len(content) - 1:
raise Exception('''The length of the function is %d,
but the arg. number you input is %d''', function_arg_len, len(content))
def is_polymorphism(self, item_type):
return re.match(r'^\'', item_type)
def type_check_line(self, src_line, var_frame):
content = src_line['content']
if isinstance(content, list):
# let
if content[0]['content'] == 'let':
content[2] = self.type_check_line(content[2], var_frame)[0]
lhs_type = content[1]['content'][0]['content']
rhs_type = content[2]['type']
if lhs_type == rhs_type:
var = content[1]['content'][1]
content[1]['content'][1]['type'] = lhs_type
var_frame[-1][var['content']] = lhs_type
else:
raise Exception("the type of %s should be %s, but got %s" %
(var['content'], lhs_type, rhs_type))
# lambda
elif content[0]['content'] == 'l':
new_var_frame = self.extend_var_type_frame_list(var_frame)
function_type = ['->']
for item in content[1]['content']:
# record the type name of biding variable in lambda
type_name = item['content'][0]['content']
new_var_frame[-1][item['content'][1]['content']] = type_name
item['content'][1]['type'] = type_name
new_body = self.type_check_lines(content[2], new_var_frame)[0]
src_line['content'][2] = new_body
return_type = src_line['content'][2]['content'][-1]['type']
function_type.append(return_type)
src_line['type'] = function_type
# function-applying
else:
# evaluate the type first
for i in range(len(content)):
src_line['content'][i] = self.type_check_line(content[i], var_frame)[0]
is_arg_polymorphism = False
function_type = src_line['content'][0]['type']
for item_type in function_type[1:]:
if self.is_polymorphism(item_type):
is_arg_polymorphism = True
#eg. (-> 'A int 'A)
if is_arg_polymorphism == True:
self.check_function_arg_num(function_type, content)
poly_type_dict = dict()
for i in range(1, len(function_type)-1):
this_fun_item_type = function_type[i]
this_item_type = src_line['content'][i]['type']
if self.is_polymorphism(this_fun_item_type) == True:
if not this_item_type in poly_type_dict.keys():
poly_type_dict[this_fun_item_type] = this_item_type
else:
if poly_type_dict[this_fun_item_type] != this_item_type:
expected_type = poly_type_dict[this_fun_item_type]
raise Exception("The polymorphic type should be %s, but got %s" %
(poly_type_dict[this_fun_item_type], this_item_type))
if function_type[i] != src_line['content'][i]['type']:
raise Exception("The type of %s is %s, but it should be %s",
content[i]['content'], src_line['content'][i]['type'],function_type[i])
src_line['content'][-1]['type'] = function_type[-1]
else:
self.check_function_arg_num(function_type, content)
for i in range(1, len(function_type)-1):
if function_type[i] != src_line['content'][i]['type']:
raise Exception("The type of %s is %s, but it should be %s",
content[i]['content'], src_line['content'][i]['type'], function_type[i])
src_line['type'] = function_type[-1]
else:
#if it's a integer constant
if isinstance(content, int):
src_line['type'] = 'int'
elif isinstance(content, str):
# if it's a string
if re.match("\".*\"", content):
src_line['type'] = 'str'
#if it's a bind variable:
else:
if content in var_frame[-1].keys():
src_line['type'] = var_frame[-1][content]
else:
# if it's a free variable:
for i in range(len(var_frame)):
if content in set(var_frame[-i-1].keys()):
src_line['type'] = var_frame[-i-1][content]
if src_line['type'] == None:
raise Exception("The variable %s is not found." % content)
return (src_line, var_frame)
def type_check_lines(self, src_lines, var_frame):
result_tree_list = []
new_var_frame = var_frame
for line in src_lines["content"]:
type_check_line_result = self.type_check_line(line, new_var_frame)
line_result = type_check_line_result[0] # line_mark_result
new_var_frame = type_check_line_result[1] # var_frame
result_tree_list.append(line_result)
#update src_lines["content"]
src_lines["content"] = result_tree_list
return (src_lines, var_frame)
type_checker = TypeChecker()
checked_src_tree = type_checker.type_check_src_tree(src_tree)
print(checked_src_tree)
'''
def extend_var_checker_frame(frame_ls):
frame = { "bind_var" : set(), "free_var" : set()}
frame_ls.append(frame)
return frame_ls
def var_checker_line(src, frame_ls):
# if src is a let expression
if isinstance(src, list):
if src[0] == 'let':
var_name = src[1]
rhs = src[2]
# if var is appeared in the block, raise an exception
if var_name in frame_ls[-1]['free_var'] or var_name in frame_ls[-1]['bind_var']:
raise Exception("The variable %s you want to define is dupilicated." % var_name)
else:
frame_ls[-1]['free_var'] = var_name
frame_ls = var_checker_line(rhs)
# if src is a lambda
if src[0] == 'l':
new_frame_ls = extend_var_checker_frame(frame_ls) # create a new frame
var_ls = src[1]
body = src[2]
var_set = set(var_ls)
#if var_ls have duplicating var, raise Exception.
var_duplicated = [i for i in var_ls if var_ls.count(i)>1]
if len(var_duplicated) > 0:
var_duplicated_str = ", ".join(var_duplicated)
raise Exception("The variables %s in lambda is/are duplicated.")
else:
new_frame_ls[-1]['bind_var'] = var_set
var_checker(body, new_frame_ls) # var. check the body
# function applying (f x y ...)
else:
for item in src:
frame_ls = var_checker_line(item)
else:
# if src is a integer
if isinstance(src, int):
pass
# TODO
# if src is a var
else:
var_name = src
# if var is appeared in the block, do nothing
if var_name in frame_ls[-1]['free_var'] or var_name in frame_ls[-1]['bind_var']:
pass
else:
var_position = None
# find the number of the frame which the var. is defined
for i in range(len(frame_ls)):
if var_name in frame_ls[-i-1]['bind_var']:
var_position = -i-1
else:
pass
if var_position == None:
raise Exception("variable%s is not found." % var_name)
else:
# add the var. to the frame(s) from the frame next to the frame
# defining it to the last frame
for j in range(var_position+1, 0):
frame_ls[j]['free_var'].add(var_name)
return frame_ls
def var_checker(src_tree):
frame_ls = extend_var_checker_frame([])
for src_line in src_tree:
frame_ls = var_checker_line(src_line, frame_ls)
print(frame_ls)
return frame_ls
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment