Skip to content

Instantly share code, notes, and snippets.

@ahmedbilal
Last active November 25, 2018 16:41
Show Gist options
  • Save ahmedbilal/da50e987fb7fc5891e1304b5baa92379 to your computer and use it in GitHub Desktop.
Save ahmedbilal/da50e987fb7fc5891e1304b5baa92379 to your computer and use it in GitHub Desktop.
import pickle
import sys
"""
|------------------------------------------------------|
|Grammar notation | Code representation |
|------------------|-----------------------------------|
|Terminal | Code to match and consume a token|
|Nonterminal | Call to that rule’s function |
| | | If or switch statement |
| * or + | While or for loop |
| ? | If statement |
|------------------------------------------------------|
"""
RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
"extends", "implements", "for", "while", "if", "else", "return", "break", "new",
"NewArray", "Print", "ReadInteger", "ReadLine"]
class Token:
def __init__(self, _type, value=None, priority=0):
self.type = _type
self.value = value
self.priority = priority
self.line_no = 0
def __repr__(self):
if not self.value:
return self.type + " AT Line#" + str(self.line_no)
return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)
def ret_all_tokens():
tokens = []
for resr in RESERVED_WORDS:
tokens.append(Token("T_" + resr.upper()))
tokens.append(Token("T_ID"))
tokens.append(Token("T_BOOL"))
tokens.append(Token("T_INTCONSTANT"))
tokens.append(Token("T_STRINGCONSTANT"))
tokens.append(Token("T_DOUBLECONSTANT"))
tokens.append(Token("T_PLUS"))
tokens.append(Token("T_MINUS"))
tokens.append(Token("T_MULT"))
tokens.append(Token("T_DIV"))
tokens.append(Token("T_MOD"))
tokens.append(Token("T_LT"))
tokens.append(Token("T_LEQ"))
tokens.append(Token("T_GT"))
tokens.append(Token("T_GEQ"))
tokens.append(Token("T_ASSIGN"))
tokens.append(Token("T_EQ"))
tokens.append(Token("T_NEQ"))
tokens.append(Token("T_AND"))
tokens.append(Token("T_OR"))
tokens.append(Token("T_NOT"))
tokens.append(Token("T_SEMICOLON"))
tokens.append(Token("T_COMMA"))
tokens.append(Token("T_DOT"))
tokens.append(Token("T_ARRDECL"))
tokens.append(Token("T_LSB"))
tokens.append(Token("T_RSB"))
tokens.append(Token("T_LPAREN"))
tokens.append(Token("T_RPAREN"))
tokens.append(Token("T_LCB"))
tokens.append(Token("T_RCB"))
return tokens
EOF = Token("T_EOF") # End-Of-File Token
tokens = None # tokens read from file
if len(sys.argv) == 2:
filename = sys.argv[1]
f = open(filename, "rb")
tokens = pickle.load(f)
tokens.append(EOF)
else:
print("Unknown # of args")
sys.exit(-1)
current_index = 0
def is_at_end():
return peek() == EOF
def peek():
# print("peek", tokens[current_index])
return tokens[current_index]
def previous():
# print("previous", tokens[current_index - 1])
return tokens[current_index - 1]
def check(_type):
if is_at_end():
return False
# print("check", "peek().type == _type", peek().type, _type, peek().type == _type)
return peek().type == _type
def advance():
global current_index
if not is_at_end():
current_index += 1
return previous()
def match(types):
for _t in types:
if check(_t):
advance()
# print("matched")
return True
return False
FirstSet = dict()
FirstSet["Type"] = ["T_INT", "T_DOUBLE", "T_BOOL", "T_STRING", "T_IDENT"]
FirstSet["Decl"] = FirstSet["Type"] + ["T_VOID", "T_CLASS", "T_INTERFACE"]
FirstSet["Actuals"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_READINTEGER", "T_READLINE",
"T_BOOLCONSTANT", "T_DOUBLECONSTANT", "T_IDENT", "T_INTCONSTANT", "T_NEW",
"T_NULL","T_STRINGCONSTANT","T_THIS"]
FirstSet["P"] = FirstSet["Type"] + ["T_VOID"]
FirstSet["Stmt"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_PRINT", "T_READINTEGER", "T_READLINE", "T_BOOLCONSTANT",
"T_BREAK", "T_DOUBLECONSTANT", "T_FOR", "T_IDENT", "T_IF",
"T_INTCONSTANT","T_NEW","T_NULL","T_RETURN", "T_STRINGCONSTANT", "T_THIS", "T_WHILE", "T_LCB"]
FirstSet["Constant"] = ["T_INTCONSTANT", "T_DOUBLECONSTANT", "T_BOOLCONSTANT", "T_STRINGCONSTANT", "T_NULL"]
class Grammar(object):
def Program(self):
print("Program()", tokens[current_index])
return self.Decl() and self.Program_Prime()
def Program_Prime(self):
print("Program`()", tokens[current_index])
if peek().type in FirstSet["Decl"]:
return self.Decl() and self.Program_Prime()
else:
return True
def Decl(self):
print("Decl()", tokens[current_index])
if match(["T_VOID"]):
return self.FunctionDecl()
elif match(["T_CLASS"]):
return self.ClassDecl()
elif match(["T_INTERFACE"]):
return self.InterfaceDecl()
elif peek().type in FirstSet["Type"]:
return self.Type() and match(["T_IDENT"]) and self.VF()
return True
def VF(self):
print("VF()", tokens[current_index])
if check("T_SEMICOLON"):
return self.VariableDecl()
elif check("T_LPAREN"):
return self.FunctionDecl()
return False
def VariableDecl(self):
print("VariableDecl()", tokens[current_index])
return match(["T_SEMICOLON"])
def Variable(self):
print("Variable()", tokens[current_index])
return self.Type() and match(["T_IDENT"])
def Type(self):
print("Type()", tokens[current_index])
return match(FirstSet["Type"]) and self.Type_Prime()
def Type_Prime(self):
print("Type_Prime()", tokens[current_index])
if match(["T_LSB"]):
return match(["T_RSB"]) and self.Type_Prime()
else:
return True
def FunctionDecl(self):
print("FunctionDecl()", tokens[current_index])
return match(["T_IDENT"]) and match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and self.StmtBlock()
def Formals(self):
print("Formals()", tokens[current_index])
if peek().type in FirstSet["Type"]:
return self.Parameters()
return True
def Parameters(self):
print("Parameters()", tokens[current_index])
return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.Parameters_Prime()
def Parameters_Prime(self):
print("Parameters_Prime()", tokens[current_index])
if match(["T_COMMA"]):
return self.Variable() and self.Parameters_Prime()
return True
def ClassDecl(self):
print("ClassDecl()", tokens[current_index])
return match(["T_CLASS"]) and match(["T_IDENT"]) and self.Extend() and self.Implement() and match(["T_LCB"]) \
and self.F() and match(["T_RCB"])
def Extend(self):
print("Extend()", tokens[current_index])
if match(["T_EXTENDS"]):
return match(["T_IDENT"])
return True
def Implement(self):
print("Implements()", tokens[current_index])
if match(["T_IMPLEMENTS"]):
return self.K()
return True
def F(self):
print("F()", tokens[current_index])
if match(FirstSet["Type"]):
return self.Field() and self.F()
return True
def K(self):
print("K()", tokens[current_index])
return match(["T_IDENT"]) and self.K_Prime()
def K_Prime(self):
print("K`()", tokens[current_index])
if match(["T_COMMA"]):
return match(["T_IDENT"]) and self.K_Prime()
return True
def Field(self):
print("Field()", tokens[current_index])
return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.VF()
def InterfaceDecl(self):
print("InterfaceDecl()", tokens[current_index])
return match(["T_INTERFACE"]) and match(["T_IDENT"]) and match(["T_LCB"]) and self.P() and match(["T_RCB"])
def P(self):
print("P()", tokens[current_index])
if peek().type in FirstSet["P"]:
return self.Prototype() and self.P()
return True
def Prototype(self):
print("Prototype()", tokens[current_index])
return match(FirstSet["P"]) and self.Type_Prime() and match(["T_IDENT"]) and \
match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and match(["T_SEMICOLON"])
def StmtBlock(self):
print("StmtBlock()", tokens[current_index])
return match(["T_LCB"]) and self.V() and self.S() and match(["T_RCB"])
def V(self):
print("V()", tokens[current_index])
if match(FirstSet["Type"]):
return self.Type_Prime() and match(["T_IDENT"]) and self.VariableDecl() and self.V()
return True
def S(self):
print("S()", tokens[current_index])
if peek().type in FirstSet["Stmt"]:
return self.Stmt() and self.S()
return True
def Stmt(self):
print("Stmt()", tokens[current_index])
if peek().type in FirstSet["Expr"]:
return self.Expr() and match(["T_SEMICOLON"])
elif peek().type == "T_IF":
return self.IfStmt()
elif peek().type == "T_WHILE":
return self.WhileStmt()
elif peek().type == "T_FOR":
return self.ForStmt()
elif peek().type == "T_BREAK":
return self.BreakStmt()
elif peek().type == "T_RETURN":
return self.ReturnStmt()
elif peek().type == "T_PRINT":
return self.PrintStmt()
elif match(["T_LCB"]):
return self.V() and self.S() and match(["T_RCB"])
return False
def E(self):
print("StEmt()", tokens[current_index])
if peek().type in FirstSet["Expr"]:
return self.Expr()
return True
def IfStmt(self):
print("IfStmt()", tokens[current_index])
return match(["T_IF"]) and match(["T_LPAREN"]) and self.Expr() and match(["R_PAREN"]) and \
self.Stmt() and self.Else()
def Else(self):
print("Else()", tokens[current_index])
if match(["T_ELSE"]):
return self.Stmt()
return True
def WhileStmt(self):
print("WhileStmt()", tokens[current_index])
return match(["T_WHILE"]) and match(["T_LPAREN"]) and self.Expr() and match(["T_RPAREN"]) and self.Stmt()
def ForStmt(self):
print("ForStmt()", tokens[current_index])
return match(["T_FOR"]) and match(["T_LPAREN"]) and self.E() and match(["T_SEMICOLON"]) and \
self.Expr() and match(["T_SEMICOLON"]) and self.E() and match(["T_RPAREN"]) and self.Stmt()
def ReturnStmt(self):
print("ReturnStmt()", tokens[current_index])
return match(["T_RETURN"]) and self.E() and match(["T_SEMICOLON"])
def BreakStmt(self):
print("BreakStmt()", tokens[current_index])
return match(["T_BREAK"]) and match(["T_SEMICOLON"])
def PrintStmt(self):
print("PrintStmt()", tokens[current_index])
return match(["T_PRINT"]) and match(["T_LPAREN"]) and self.Pr() and \
match(["T_RPAREN"]) and match(["T_SEMICOLON"])
def Pr(self):
print("Pr()", tokens[current_index])
if peek().type in FirstSet["Expr"]:
return self.Expr() and self.Pr_Prime()
return False
def Pr_Prime(self):
print("Pr_Prime()", tokens[current_index])
if match(["T_COMMA"]):
return self.Expr() and self.Pr_Prime()
return True
def Expr(self):
print("Expr()", tokens[current_index])
if match(["T_IDENT"]):
return self.LC()
elif match(FirstSet["Constant"]):
return self.Constant()
elif match(["T_THIS"]):
return True
elif match(["T_LPAREN"]):
return self.Expr() and match(["T_RPAREN"])
elif match(["T_NOT"]):
return self.Expr()
elif match(["T_READINTEGER"]):
return match(["T_LPAREN"]) and match(["T_RPAREN"])
elif match(["T_READLINE"]):
return match(["T_LPAREN"]) and match(["T_RPAREN"])
elif match(["T_NEW"]):
return match(["T_IDENT"])
elif match(["T_NEWARRAY"]):
return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
self.Type() and match(["T_RPAREN"])
def LC(self):
print("LC()", tokens[current_index])
if match(["T_ASSIGN"]):
return self.Expr0()
elif match(["T_DOT"]):
return match(["T_IDENT"]) and self.GC()
elif match(["T_LSB"]):
return self.Expr() and match(["T_RSB"]) and match(["T_ASSIGN"]) and self.Expr0()
elif match(["T_LPAREN"]):
return self.Actuals() and match(["T_RPAREN"])
return False
def GC(self):
print("GC()", tokens[current_index])
if peek().type in ["T_ASSIGN"]:
return self.Expr0()
elif match(["L_PAREN"]):
return self.Actuals() and match(["R_PAREN"])
return False
def Expr0(self):
print("Expr0()", tokens[current_index])
if peek().type in FirstSet["Actuals"]:
return self.Expr1() and self.Expr0()
return False
def Expr0_Prime(self):
print("Expr0`()", tokens[current_index])
if match(["T_OR"]):
return self.Expr1() and self.Expr0_Prime()
else:
return True
def Expr1(self):
print("Expr1()", tokens[current_index])
if peek().type in FirstSet["Actuals"]:
return self.Expr2() and self.Expr1_Prime()
return False
def Expr1_Prime(self):
print("Expr1`()", tokens[current_index])
if match(["T_AND"]):
return self.Expr2() and self.Expr1_Prime()
else:
return True
def Expr2(self):
print("Expr2()", tokens[current_index])
if peek().type in FirstSet["Actuals"]:
return self.Expr3() and self.Expr2_Prime()
return False
def Expr2_Prime(self):
print("Expr2`()", tokens[current_index])
if match(["T_NEQ"]):
return self.Expr3() and self.Expr2_Prime()
else:
return True
def Expr3(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr4() and self.Expr3_Prime()
return False
def Expr3_Prime(self):
if match(["T_EQ"]):
return self.Expr4() and self.Expr3_Prime()
else:
return True
def Expr4(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr5() and self.Expr4_Prime()
return False
def Expr4_Prime(self):
if match(["T_GEQ"]):
return self.Expr5() and self.Expr4_Prime()
else:
return True
def Expr5(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr6() and self.Expr5_Prime()
return False
def Expr5_Prime(self):
if match(["T_GT"]):
return self.Expr6() and self.Expr5_Prime()
else:
return True
def Expr6(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr7() and self.Expr6_Prime()
return False
def Expr6_Prime(self):
if match(["T_LEQ"]):
return self.Expr7() and self.Expr6_Prime()
else:
return True
def Expr7(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr8() and self.Expr7_Prime()
return False
def Expr7_Prime(self):
if match(["T_LT"]):
return self.Expr8() and self.Expr7_Prime()
else:
return True
def Expr8(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr9() and self.Expr8_Prime()
return False
def Expr8_Prime(self):
if match(["T_MINUS"]):
return self.Expr9() and self.Expr8_Prime()
else:
return True
def Expr9(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr10() and self.Expr9_Prime()
return False
def Expr9_Prime(self):
if match(["T_PLUS"]):
return self.Expr10() and self.Expr9_Prime()
else:
return True
def Expr10(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr11() and self.Expr10_Prime()
return False
def Expr10_Prime(self):
if match(["T_MOD"]):
return self.Expr11() and self.Expr10_Prime()
else:
return True
def Expr11(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr12() and self.Expr11_Prime()
return False
def Expr11_Prime(self):
if match(["T_DIV"]):
return self.Expr12() and self.Expr11_Prime()
else:
return True
def Expr12(self):
if peek().type in FirstSet["Actuals"]:
return self.Expr13() and self.Expr12_Prime()
return False
def Expr12_Prime(self):
if match(["T_MULT"]):
return self.Expr13() and self.Expr12_Prime()
else:
return True
def Expr13(self):
if match(["T_IDENT"]):
return self.FC()
elif peek().type in FirstSet["Constant"]:
return self.Constant()
elif match(["T_THIS"]):
return True
elif match(["T_LPAREN"]):
return self.Expr() and match(["T_RPAREN"])
elif match(["T_NOT"]):
return self.Expr()
elif match(["T_READINTEGER"]):
return match(["T_LPAREN"]) and match(["T_RPAREN"])
elif match(["T_READLINE"]):
return match(["T_LPAREN"]) and match(["T_RPAREN"])
elif match(["T_NEW"]):
return match(["T_IDENT"])
elif match(["T_NEWARRAY"]):
return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
self.Type() and match(["T_RPAREN"])
def FC(self):
if match(["T_DOT"]):
return match(["T_IDENT"]) and self.FC_Prime()
elif match(["T_LSB"]):
return self.Expr() and match(["T_RSB"])
elif match(["T_LPAREN"]):
return self.Actuals() and match(["T_RPAREN"])
else:
return True
def FC_Prime(self):
if match(["T_LPAREN"]):
return self.Actuals() and match(["T_RPAREN"])
else:
return True
def Actuals(self):
if peek().type in FirstSet["Actuals"]:
return self.Pr()
else:
return True
def Constant(self):
return match(FirstSet["Constant"])
def main():
g = Grammar()
print(g.Program())
print(current_index)
main()
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import string
import sys
import logging
import pickle
logging.basicConfig(level=logging.CRITICAL)
filename = ""
code = ""
LINE_NO = 1
if len(sys.argv) == 2:
filename = sys.argv[1]
f = open(filename, "r")
code = f.read()
else:
print("Unknown # of args")
sys.exit(-1)
def preprocess(_code):
output = _code
while "/*" in output:
output = output[:output.index("/*")] + output[output.index("*/") + 2:]
while "//" in output:
output = output[:output.index(
"//")] + output[output.index("\n", output.index("//")):]
return output
code = preprocess(code)
RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
"extends", "implements", "for", "while", "if", "else", "return", "break", "new",
"NewArray", "Print", "ReadInteger", "ReadLine"]
# In[3]:
class Token:
def __init__(self, _type, value=None, priority=0):
self.type = _type
self.value = value
self.priority = priority
global LINE_NO
self.line_no = LINE_NO
def __repr__(self):
if not self.value:
return self.type + " AT Line#" + str(self.line_no)
return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)
# In[4]:
def is_reserve(s):
return s in RESERVED_WORDS
def is_identifier(s):
if s and s[0] in string.ascii_letters and len(s) < 32:
for c in s[1:]:
if not (c.isalnum() or c == '_'):
return False
return True
else:
return False
def is_non_resv_non_ident(s):
return not is_reserve(s) and not is_identifier(s)
def is_white_space(s):
return s.isspace()
def is_integer(s):
if s[:2] in ["0x", "0X"] and all(c in string.hexdigits for c in s[2:]):
return True
if s.isdigit():
return True
return False
def is_string(s):
if s.count('"') == 2 and s[0] == '"' and s[-1] == '"' and '"' not in s[1:-1] and '\n' not in s:
return True
return False
def is_double(s):
if s[0].isdigit() and s.count(".") == 1:
if s.count("E") == 1 and s.index("E") > s.index(".") and s[s.index("E")+1:]:
if all(c in string.digits for c in s[s.index("E") + 1:]):
return True
elif s[s.index("E")+1] in ["+", "-"] and all(c in string.digits for c in s[s.index("E")+2:]):
return True
elif "E" not in s and all(c in string.digits for c in s[s.index(".")+1:]):
return True
return False
# In[5]:
def all_tokens():
tokens = []
for resr in RESERVED_WORDS:
tokens.append(Token("T_" + resr.upper()))
tokens.append(Token("T_IDENT"))
tokens.append(Token("T_BOOL"))
tokens.append(Token("T_INTCONSTANT"))
tokens.append(Token("T_STRINGCONSTANT"))
tokens.append(Token("T_DOUBLECONSTANT"))
tokens.append(Token("T_PLUS"))
tokens.append(Token("T_MINUS"))
tokens.append(Token("T_MULT"))
tokens.append(Token("T_DIV"))
tokens.append(Token("T_MOD"))
tokens.append(Token("T_LT"))
tokens.append(Token("T_LEQ"))
tokens.append(Token("T_GT"))
tokens.append(Token("T_GEQ"))
tokens.append(Token("T_ASSIGN"))
tokens.append(Token("T_EQ"))
tokens.append(Token("T_NEQ"))
tokens.append(Token("T_AND"))
tokens.append(Token("T_OR"))
tokens.append(Token("T_NOT"))
tokens.append(Token("T_SEMICOLON"))
tokens.append(Token("T_COMMA"))
tokens.append(Token("T_DOT"))
tokens.append(Token("T_ARRDECL"))
tokens.append(Token("T_LSB"))
tokens.append(Token("T_RSB"))
tokens.append(Token("T_LPAREN"))
tokens.append(Token("T_RPAREN"))
tokens.append(Token("T_LCB"))
tokens.append(Token("T_RCB"))
return tokens
def ret_token(s):
tokens = []
if is_reserve(s):
tokens.append(Token("T_" + s.upper(), value=None, priority=999))
if is_identifier(s):
tokens.append(Token("T_IDENT", s))
if s in ["true", "false"]:
tokens.append(Token("T_BOOL", s, 998))
if is_integer(s):
tokens.append(Token("T_INTCONSTANT", s))
if is_string(s):
tokens.append(Token("T_STRINGCONSTANT", s))
if is_double(s):
tokens.append(Token("T_DOUBLECONSTANT", s))
if s == "+":
tokens.append(Token("T_PLUS"))
if s == "-":
tokens.append(Token("T_MINUS"))
if s == "*":
tokens.append(Token("T_MULT"))
if s == "/":
tokens.append(Token("T_DIV"))
if s == "%":
tokens.append(Token("T_MOD"))
if s == "<":
tokens.append(Token("T_LT"))
if s == "<=":
tokens.append(Token("T_LEQ"))
if s == ">":
tokens.append(Token("T_GT"))
if s == ">=":
tokens.append(Token("T_GEQ"))
if s == "=":
tokens.append(Token("T_ASSIGN"))
if s == "==":
tokens.append(Token("T_EQ"))
if s == "!=":
tokens.append(Token("T_NEQ"))
if s == "&&":
tokens.append(Token("T_AND"))
if s == "||":
tokens.append(Token("T_OR"))
if s == "!":
tokens.append(Token("T_NOT"))
if s == ";":
tokens.append(Token("T_SEMICOLON"))
if s == ",":
tokens.append(Token("T_COMMA"))
if s == ".":
tokens.append(Token("T_DOT"))
if s == "[]":
tokens.append(Token("T_ARRDECL"))
if s == "[":
tokens.append(Token("T_LSB"))
if s == "]":
tokens.append(Token("T_RSB"))
if s == "(":
tokens.append(Token("T_LPAREN"))
if s == ")":
tokens.append(Token("T_RPAREN"))
if s == "{":
tokens.append(Token("T_LCB"))
if s == "}":
tokens.append(Token("T_RCB"))
return tokens
# In[ ]:
# In[6]:
OUTPUT = []
identified = {}
stack = ""
for _cindex, char in enumerate(code):
if char in string.whitespace and '"' not in stack:
if char == "\n":
LINE_NO += 1
logging.debug("--------- Whitespace occur ----------")
if identified:
OUTPUT.append(
identified[max(identified, key=lambda k: identified[k].priority)])
logging.debug("Remaining {}".format(code[_cindex:]))
identified.clear()
stack = ""
logging.debug("Stack = {}".format(stack))
logging.debug("--------- Whitespace end ----------")
continue
stack += char
logging.debug("\nSTACK {}".format(stack))
identified_tokens = ret_token(stack)
logging.debug("LATEST_TOKENS {}".format(identified_tokens))
if not identified_tokens and identified:
OUTPUT.append(
identified[max(identified, key=lambda k: identified[k].priority)])
logging.debug("OUTPUT {}".format(OUTPUT))
logging.debug("Remaining: {}".format(code[_cindex:]))
identified.clear()
stack = stack[-1]
logging.debug("Stack {}".format(stack))
identified_tokens = ret_token(stack)
logging.debug("LATEST_TOKENS = {}".format(identified_tokens))
identified.clear()
for identified_token in identified_tokens:
if identified_token.type in identified.keys():
identified[identified_token.type] = identified_token
else:
identified[identified_token.type] = identified_token
logging.debug("IDENTIFIED = {}".format(identified))
if _cindex == len(code) - 1:
OUTPUT.append(
identified[max(identified, key=lambda k: identified[k].priority)])
logging.debug("STACK {}".format(stack))
print("OUTPUT {}".format(OUTPUT))
output_file = open(sys.argv[1] + ".tokens", "wb")
pickle.dump(OUTPUT, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment