Created
July 30, 2020 17:02
-
-
Save LQ1234/22f6644b289767de55b2702dc6ef408e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def lexer(str): | |
tokens=[] | |
is_string_mode=False | |
is_special_mode=False | |
string_ending_stack=[] #ok technically it's not a lexer | |
paren_count_stack=[0] | |
string_chars=['"',"'"] | |
for char in str: | |
if(not is_string_mode): | |
if char in string_chars: | |
string_ending_stack.append(char) | |
is_string_mode=True | |
tokens.append("start-string") | |
elif char ==" " or char == "\n": | |
pass | |
elif char == "+": | |
tokens.append("join") | |
elif char == "(": | |
tokens.append("start-paren") | |
paren_count_stack[-1]+=1 | |
elif char == ")": | |
if(paren_count_stack[-1]==0): | |
paren_count_stack.pop() | |
tokens.append("end-inpol") | |
is_string_mode=True | |
else: | |
paren_count_stack[-1]-=1 | |
tokens.append("end-paren") | |
else: | |
#print(f"unexpected {char} in expression mode") | |
return None | |
else: | |
if is_special_mode: | |
if char in string_chars: | |
tokens.append(char) | |
elif char == "\\": | |
tokens.append(char) | |
elif char == "(": | |
is_string_mode=False | |
paren_count_stack.append(0) | |
tokens.append("start-inpol") | |
else: | |
print(f"unexpected {char} in special mode") | |
return None | |
is_special_mode=False | |
else: | |
if char == "\\": | |
is_special_mode=True | |
elif char == string_ending_stack[-1]: | |
string_ending_stack.pop() | |
is_string_mode=False | |
tokens.append("end-string") | |
else: | |
tokens.append(char) | |
#print(char,tokens,paren_count_stack,string_ending_stack) | |
if(paren_count_stack!=[0]): | |
#print(f"unexpected end of input (mismatched parens)") | |
return None | |
if(string_ending_stack!=[]): | |
#print(f"unexpected end of input (mismatched quotes)") | |
return None | |
return tokens | |
def parser(tokens): | |
syntax_tree=[] | |
next_token=tokens[0] | |
if next_token=="start-paren": | |
tokens.pop(0) | |
contents=parser(tokens) | |
if(tokens.pop(0)!="end-paren"): | |
print("mismatched parens") | |
return None | |
if contents is None: | |
return None | |
syntax_tree.append(contents) | |
elif next_token == "start-string": | |
print("!",tokens.pop(0)) | |
strings=[] | |
while(True): | |
str_next_token=tokens.pop(0) | |
print(str_next_token) | |
if str_next_token=="end-string": | |
break | |
elif len(str_next_token)==1: | |
if(len(strings)==0 or (type(strings[-1]) is not str)): | |
strings.append("") | |
strings[-1]+=str_next_token | |
elif str_next_token=="start-inpol": | |
contents=parser(tokens) | |
if(tokens.pop(0)!="end-inpol"): | |
print("mismatched interpolation") | |
return None | |
if contents is None: | |
return None | |
strings.append(contents) | |
syntax_tree.append(strings) | |
else: | |
print(f"unexpected token {next_token} (A)") | |
print(syntax_tree) | |
if(len(tokens)==0): | |
print(syntax_tree) | |
return syntax_tree | |
next_token=tokens[0] | |
if next_token=="join": # operators | |
tokens.pop(0) | |
the_rest=parser(tokens) | |
if the_rest is None: | |
return None | |
syntax_tree.extend(the_rest) | |
print(syntax_tree) | |
return syntax_tree | |
elif next_token== "end-paren" or next_token== "end-inpol": | |
print(syntax_tree) | |
return syntax_tree | |
else: | |
print(f"unexpected token {next_token} (B)") | |
return(None) | |
test=""" | |
"a" + ('b' + "d'b'c") + ("a \('b') c" + 'd \(("e"+'f')+'g \('h'+("i"+'j'))')') | |
""" | |
lexed=lexer(test) | |
print(lexed) | |
print(parser(lexed)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment