Skip to content

Instantly share code, notes, and snippets.

@LQ1234
Created July 30, 2020 17:02
Show Gist options
  • Save LQ1234/22f6644b289767de55b2702dc6ef408e to your computer and use it in GitHub Desktop.
Save LQ1234/22f6644b289767de55b2702dc6ef408e to your computer and use it in GitHub Desktop.
def lexer(str):
tokens=[]
is_string_mode=False
is_special_mode=False
string_ending_stack=[] #ok technically it's not a lexer
paren_count_stack=[0]
string_chars=['"',"'"]
for char in str:
if(not is_string_mode):
if char in string_chars:
string_ending_stack.append(char)
is_string_mode=True
tokens.append("start-string")
elif char ==" " or char == "\n":
pass
elif char == "+":
tokens.append("join")
elif char == "(":
tokens.append("start-paren")
paren_count_stack[-1]+=1
elif char == ")":
if(paren_count_stack[-1]==0):
paren_count_stack.pop()
tokens.append("end-inpol")
is_string_mode=True
else:
paren_count_stack[-1]-=1
tokens.append("end-paren")
else:
#print(f"unexpected {char} in expression mode")
return None
else:
if is_special_mode:
if char in string_chars:
tokens.append(char)
elif char == "\\":
tokens.append(char)
elif char == "(":
is_string_mode=False
paren_count_stack.append(0)
tokens.append("start-inpol")
else:
print(f"unexpected {char} in special mode")
return None
is_special_mode=False
else:
if char == "\\":
is_special_mode=True
elif char == string_ending_stack[-1]:
string_ending_stack.pop()
is_string_mode=False
tokens.append("end-string")
else:
tokens.append(char)
#print(char,tokens,paren_count_stack,string_ending_stack)
if(paren_count_stack!=[0]):
#print(f"unexpected end of input (mismatched parens)")
return None
if(string_ending_stack!=[]):
#print(f"unexpected end of input (mismatched quotes)")
return None
return tokens
def parser(tokens):
syntax_tree=[]
next_token=tokens[0]
if next_token=="start-paren":
tokens.pop(0)
contents=parser(tokens)
if(tokens.pop(0)!="end-paren"):
print("mismatched parens")
return None
if contents is None:
return None
syntax_tree.append(contents)
elif next_token == "start-string":
print("!",tokens.pop(0))
strings=[]
while(True):
str_next_token=tokens.pop(0)
print(str_next_token)
if str_next_token=="end-string":
break
elif len(str_next_token)==1:
if(len(strings)==0 or (type(strings[-1]) is not str)):
strings.append("")
strings[-1]+=str_next_token
elif str_next_token=="start-inpol":
contents=parser(tokens)
if(tokens.pop(0)!="end-inpol"):
print("mismatched interpolation")
return None
if contents is None:
return None
strings.append(contents)
syntax_tree.append(strings)
else:
print(f"unexpected token {next_token} (A)")
print(syntax_tree)
if(len(tokens)==0):
print(syntax_tree)
return syntax_tree
next_token=tokens[0]
if next_token=="join": # operators
tokens.pop(0)
the_rest=parser(tokens)
if the_rest is None:
return None
syntax_tree.extend(the_rest)
print(syntax_tree)
return syntax_tree
elif next_token== "end-paren" or next_token== "end-inpol":
print(syntax_tree)
return syntax_tree
else:
print(f"unexpected token {next_token} (B)")
return(None)
test="""
"a" + ('b' + "d'b'c") + ("a \('b') c" + 'd \(("e"+'f')+'g \('h'+("i"+'j'))')')
"""
lexed=lexer(test)
print(lexed)
print(parser(lexed))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment