Last active
May 11, 2020 16:25
-
-
Save Yoxem/9b14becac0642279b077ab80b16df2a1 to your computer and use it in GitHub Desktop.
Compiler Test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# test code | |
# test = "1 + 2 * (23 / 12)" | |
test1 = "(123 (88) 1) 9(8 7) " | |
test2 = ''' | |
12 | |
(+ 2 3) | |
(let (int x) 2) | |
(let (int y) (+ 5 6)) | |
(l ((int a) (int b)) ( | |
(+ x a) | |
)) | |
((l ((int x) (int y)) ((+ x y))) 3 2) | |
''' | |
test3 = ''' | |
(let a 10) | |
(l (x y z) | |
( | |
(let g 17) | |
(let b 12) | |
(let c | |
(l (x y) | |
((let g 12) | |
(let c1 3) | |
(sum g c1 b a)) | |
)))) | |
''' | |
# TOKEN | |
INT = r'(?P<INT>\d+)' | |
SYMBOL = r'(?P<SYMBOL>[a-zA-Z_+\-*/]+)' | |
LPARE = r'(?P<LPARE>\()' | |
RPARE = r'(?P<RPARE>\))' | |
WSNL = r'(?P<WS>(\s|\n|\r)+)'# whitespace & newline | |
token_pattern_list = [INT, SYMBOL , LPARE, RPARE, WSNL] | |
def tokenize(token_pattern_list, src): | |
token_pattern = re.compile("|".join(token_pattern_list)) | |
raw_token_result_list = [] | |
for match in re.finditer(token_pattern, src): | |
raw_token_result_list.append((match.lastgroup, match.group(0))) | |
# [(None, None)] 必加,以避免出錯 | |
token_result_list = [i for i in raw_token_result_list if i[0] != "WS"] + [(None, None)] | |
return token_result_list | |
token_result_list = tokenize(token_pattern_list, test2) | |
print(token_result_list) | |
""" | |
exprs = expr + | |
expr = ( expr + ) | term | |
term = int | symbol | |
factor = number | |
""" | |
class Parser: | |
def __init__(self): | |
self.token = None | |
self.remained_token = None | |
def parse(self, token_list): | |
if token_list[0] == (None, None): | |
print("The compiled content is empty. Stop parsing.") | |
return None | |
self.token = token_list[0] | |
self.remained_token = token_list[1:] | |
result = self.exprs() | |
return result | |
def advance(self): | |
self.token = self.remained_token[0] | |
self.remained_token = self.remained_token[1:] | |
def exprs(self): | |
result_content = [] | |
while self.token != (None, None): | |
result_item = self.expr() | |
result_content.append(result_item) | |
result = {"content" : result_content, "type": None} | |
return result | |
def expr(self): | |
if self.token[0] == "LPARE": | |
result_content = [] | |
self.advance() | |
while self.token[0] != "RPARE": | |
result_item = self.expr() | |
result_content.append(result_item) | |
result = {"content" : result_content, "type": None} | |
self.advance() | |
else: | |
result = self.term() | |
return result | |
def term(self): | |
result = {"content" : None, "type": None} | |
if self.token[0] == "INT": | |
result["content"] = int(self.token[1]) | |
self.advance() | |
elif self.token[0] == "SYMBOL": | |
result["content"] = self.token[1] | |
self.advance() | |
else: | |
if self.token[1] == None: | |
raise Exception("The right parethesis may be missed.") | |
else: | |
raise Exception("Expect integer or symbol, find \"%s\"" % self.token[1]) | |
return result | |
parser = Parser() | |
src_tree = parser.parse(token_result_list) | |
print(src_tree) | |
class TypeChecker: | |
def __init__(self): | |
self.var_type_frame_list = self.extend_var_type_frame_list([]) | |
self.original_var_type_pair = \ | |
{"+" : ["->", "int", "int", "int"], | |
"-" : ["->", "int", "int", "int"], | |
"*" : ["->", "int", "int", "int"]} | |
for key, value in self.original_var_type_pair.items(): | |
self.var_type_frame_list[-1][key] = value | |
print(self.var_type_frame_list) | |
def extend_var_type_frame_list(self, var_type_frame_list): | |
new_dict = dict() | |
cloned_var_type_frame_list = var_type_frame_list.copy() | |
cloned_var_type_frame_list.append(new_dict) | |
return cloned_var_type_frame_list | |
def type_check_src_tree(self, src_tree): | |
# get src_lines | |
result = self.type_check_lines(src_tree, self.var_type_frame_list)[0] | |
return result | |
def check_function_arg_num(self, function_type, content): | |
function_arg_len = len(function_type) - 2 | |
if function_arg_len != len(content) - 1: | |
raise Exception('''The length of the function is %d, | |
but the arg. number you input is %d''', function_arg_len, len(content)) | |
def is_polymorphism(self, item_type): | |
return re.match(r'^\'', item_type) | |
def type_check_line(self, src_line, var_frame): | |
content = src_line['content'] | |
if isinstance(content, list): | |
# let | |
if content[0]['content'] == 'let': | |
content[2] = self.type_check_line(content[2], var_frame)[0] | |
lhs_type = content[1]['content'][0]['content'] | |
rhs_type = content[2]['type'] | |
if lhs_type == rhs_type: | |
var = content[1]['content'][1] | |
content[1]['content'][1]['type'] = lhs_type | |
var_frame[-1][var['content']] = lhs_type | |
else: | |
raise Exception("the type of %s should be %s, but got %s" % | |
(var['content'], lhs_type, rhs_type)) | |
# lambda | |
elif content[0]['content'] == 'l': | |
new_var_frame = self.extend_var_type_frame_list(var_frame) | |
function_type = ['->'] | |
for item in content[1]['content']: | |
# record the type name of biding variable in lambda | |
type_name = item['content'][0]['content'] | |
new_var_frame[-1][item['content'][1]['content']] = type_name | |
item['content'][1]['type'] = type_name | |
new_body = self.type_check_lines(content[2], new_var_frame)[0] | |
src_line['content'][2] = new_body | |
return_type = src_line['content'][2]['content'][-1]['type'] | |
function_type.append(return_type) | |
src_line['type'] = function_type | |
# function-applying | |
else: | |
# evaluate the type first | |
for i in range(len(content)): | |
src_line['content'][i] = self.type_check_line(content[i], var_frame)[0] | |
is_arg_polymorphism = False | |
function_type = src_line['content'][0]['type'] | |
for item_type in function_type[1:]: | |
if self.is_polymorphism(item_type): | |
is_arg_polymorphism = True | |
#eg. (-> 'A int 'A) | |
if is_arg_polymorphism == True: | |
self.check_function_arg_num(function_type, content) | |
poly_type_dict = dict() | |
for i in range(1, len(function_type)-1): | |
this_fun_item_type = function_type[i] | |
this_item_type = src_line['content'][i]['type'] | |
if self.is_polymorphism(this_fun_item_type) == True: | |
if not this_item_type in poly_type_dict.keys(): | |
poly_type_dict[this_fun_item_type] = this_item_type | |
else: | |
if poly_type_dict[this_fun_item_type] != this_item_type: | |
expected_type = poly_type_dict[this_fun_item_type] | |
raise Exception("The polymorphic type should be %s, but got %s" % | |
(poly_type_dict[this_fun_item_type], this_item_type)) | |
if function_type[i] != src_line['content'][i]['type']: | |
raise Exception("The type of %s is %s, but it should be %s", | |
content[i]['content'], src_line['content'][i]['type'],function_type[i]) | |
src_line['content'][-1]['type'] = function_type[-1] | |
else: | |
self.check_function_arg_num(function_type, content) | |
for i in range(1, len(function_type)-1): | |
if function_type[i] != src_line['content'][i]['type']: | |
raise Exception("The type of %s is %s, but it should be %s", | |
content[i]['content'], src_line['content'][i]['type'], function_type[i]) | |
src_line['type'] = function_type[-1] | |
else: | |
#if it's a integer constant | |
if isinstance(content, int): | |
src_line['type'] = 'int' | |
elif isinstance(content, str): | |
# if it's a string | |
if re.match("\".*\"", content): | |
src_line['type'] = 'str' | |
#if it's a bind variable: | |
else: | |
if content in var_frame[-1].keys(): | |
src_line['type'] = var_frame[-1][content] | |
else: | |
# if it's a free variable: | |
for i in range(len(var_frame)): | |
if content in set(var_frame[-i-1].keys()): | |
src_line['type'] = var_frame[-i-1][content] | |
if src_line['type'] == None: | |
raise Exception("The variable %s is not found." % content) | |
return (src_line, var_frame) | |
def type_check_lines(self, src_lines, var_frame): | |
result_tree_list = [] | |
new_var_frame = var_frame | |
for line in src_lines["content"]: | |
type_check_line_result = self.type_check_line(line, new_var_frame) | |
line_result = type_check_line_result[0] # line_mark_result | |
new_var_frame = type_check_line_result[1] # var_frame | |
result_tree_list.append(line_result) | |
#update src_lines["content"] | |
src_lines["content"] = result_tree_list | |
return (src_lines, var_frame) | |
type_checker = TypeChecker() | |
checked_src_tree = type_checker.type_check_src_tree(src_tree) | |
print(checked_src_tree) | |
''' | |
def extend_var_checker_frame(frame_ls): | |
frame = { "bind_var" : set(), "free_var" : set()} | |
frame_ls.append(frame) | |
return frame_ls | |
def var_checker_line(src, frame_ls): | |
# if src is a let expression | |
if isinstance(src, list): | |
if src[0] == 'let': | |
var_name = src[1] | |
rhs = src[2] | |
# if var is appeared in the block, raise an exception | |
if var_name in frame_ls[-1]['free_var'] or var_name in frame_ls[-1]['bind_var']: | |
raise Exception("The variable %s you want to define is dupilicated." % var_name) | |
else: | |
frame_ls[-1]['free_var'] = var_name | |
frame_ls = var_checker_line(rhs) | |
# if src is a lambda | |
if src[0] == 'l': | |
new_frame_ls = extend_var_checker_frame(frame_ls) # create a new frame | |
var_ls = src[1] | |
body = src[2] | |
var_set = set(var_ls) | |
#if var_ls have duplicating var, raise Exception. | |
var_duplicated = [i for i in var_ls if var_ls.count(i)>1] | |
if len(var_duplicated) > 0: | |
var_duplicated_str = ", ".join(var_duplicated) | |
raise Exception("The variables %s in lambda is/are duplicated.") | |
else: | |
new_frame_ls[-1]['bind_var'] = var_set | |
var_checker(body, new_frame_ls) # var. check the body | |
# function applying (f x y ...) | |
else: | |
for item in src: | |
frame_ls = var_checker_line(item) | |
else: | |
# if src is a integer | |
if isinstance(src, int): | |
pass | |
# TODO | |
# if src is a var | |
else: | |
var_name = src | |
# if var is appeared in the block, do nothing | |
if var_name in frame_ls[-1]['free_var'] or var_name in frame_ls[-1]['bind_var']: | |
pass | |
else: | |
var_position = None | |
# find the number of the frame which the var. is defined | |
for i in range(len(frame_ls)): | |
if var_name in frame_ls[-i-1]['bind_var']: | |
var_position = -i-1 | |
else: | |
pass | |
if var_position == None: | |
raise Exception("variable%s is not found." % var_name) | |
else: | |
# add the var. to the frame(s) from the frame next to the frame | |
# defining it to the last frame | |
for j in range(var_position+1, 0): | |
frame_ls[j]['free_var'].add(var_name) | |
return frame_ls | |
def var_checker(src_tree): | |
frame_ls = extend_var_checker_frame([]) | |
for src_line in src_tree: | |
frame_ls = var_checker_line(src_line, frame_ls) | |
print(frame_ls) | |
return frame_ls | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment