-
-
Save clsk/22c386695dd1ddb7ca75 to your computer and use it in GitHub Desktop.
Attempt at a vba parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ply.lex as lex | |
import ply.yacc as yacc | |
tokens = ( | |
"DOUBLE", | |
"IDENTIFIER", | |
"INT", | |
"TYPE", | |
"BOOLEAN", | |
"DIM", | |
"AS", | |
"ASSIGN", | |
) | |
t_INT = "\d+" | |
t_DOUBLE = r"-?\d+\.\d+([e|E][+-]?\d+)?" | |
t_IDENTIFIER = r"[a-z]+" | |
t_TYPE = "Double|Integer|Boolean|Byte" | |
t_BOOLEAN = "True|False" | |
t_DIM = "DIM" | |
t_AS = "AS" | |
t_ASSIGN = "=" | |
t_ignore = ' \t' | |
# Ignore comments | |
def t_comment(t): | |
r"['|REM][^\n]*" | |
pass | |
def t_newline(t): | |
r"\n+" | |
t.lexer.lineno += len(t.value) | |
pass | |
def t_error(t): | |
raise TypeError("%d:%d: Unknown text '%s'" % (t.lineno, find_column(self.text, t), t.value,)) | |
lex.lex() | |
class Identifier: | |
def __init__(self, t, name): | |
self.t = t # type | |
self.name = name | |
if (t == "BOOLEAN"): | |
self.value = "False" | |
else: | |
self.value = "0" | |
def __repr__(self): | |
return self.t + " " + self.name + " = " + repr(self.value) | |
identifiers = {} | |
def p_statement(p): | |
'''statement : declaration | |
| assignment''' | |
print p[1] | |
def p_declaration(p): | |
"declaration : DIM IDENTIFIER AS TYPE" | |
identifiers[p[2]] = Identifier(p[4], p[2]) | |
p[0] = identifiers[p[2]] | |
def p_assignment(p): | |
'''assignment : IDENTIFIER ASSIGN BOOLEAN | |
| IDENTIFIER ASSIGN DOUBLE | |
| IDENTIFIER ASSIGN INT''' | |
id = identifiers.get(p[1], None) | |
if (not id): | |
raise TypeError("%d: Undeclared IDENTIFIER '%s'." % (p.lineno(1), p[1])) | |
if ((id.t == "Boolean" and p[3].type != "BOOLEAN") or | |
(id.t == "Integer" and p[3].type != "INT") or | |
(id.t == "Double" and p[3].type == "BOOLEAN") or | |
(id.t == "Byte" and p[3].type != "INT")): | |
print("%d: Type mismatch. Trying to assign (%s) to identifier '%s' which is of type %s" % (p.lineno(3), p[3], p[1], p[3].type)) | |
id.value = p[3] | |
def p_error(p): | |
if p: | |
raise TypeError("line %d: Syntax error near '%s': Unexpected %s found." % (p.lineno, p.value, p.type)) | |
else: | |
print "Syntax error: Reached end ouf output." | |
yacc.yacc() | |
yacc.parse("DIM a AS Integer\na = 3") | |
print "Identifiers(" + repr(len(identifiers)) + ")" | |
for k,v in identifiers.iteritems(): | |
print repr(v) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment