Created
April 27, 2018 12:34
-
-
Save Fhernd/dda34840e463bb81548fed4716522009 to your computer and use it in GitHub Desktop.
Parser en Python. Beazly y Jones (2013).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example.py | |
# | |
# An example of writing a simple recursive descent parser | |
import re | |
import collections | |
# Token specification | |
NUM = r'(?P<NUM>\d+)' | |
PLUS = r'(?P<PLUS>\+)' | |
MINUS = r'(?P<MINUS>-)' | |
TIMES = r'(?P<TIMES>\*)' | |
DIVIDE = r'(?P<DIVIDE>/)' | |
LPAREN = r'(?P<LPAREN>\()' | |
RPAREN = r'(?P<RPAREN>\))' | |
WS = r'(?P<WS>\s+)' | |
master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES, | |
DIVIDE, LPAREN, RPAREN, WS])) | |
# Tokenizer | |
Token = collections.namedtuple('Token', ['type','value']) | |
def generate_tokens(text): | |
scanner = master_pat.scanner(text) | |
for m in iter(scanner.match, None): | |
tok = Token(m.lastgroup, m.group()) | |
if tok.type != 'WS': | |
yield tok | |
# Parser | |
class ExpressionEvaluator: | |
''' | |
Implementation of a recursive descent parser. Each method | |
implements a single grammar rule. Use the ._accept() method | |
to test and accept the current lookahead token. Use the ._expect() | |
method to exactly match and discard the next token on on the input | |
(or raise a SyntaxError if it doesn't match). | |
''' | |
def parse(self,text): | |
self.tokens = generate_tokens(text) | |
self.tok = None # Last symbol consumed | |
self.nexttok = None # Next symbol tokenized | |
self._advance() # Load first lookahead token | |
return self.expr() | |
def _advance(self): | |
'Advance one token ahead' | |
self.tok, self.nexttok = self.nexttok, next(self.tokens, None) | |
def _accept(self,toktype): | |
'Test and consume the next token if it matches toktype' | |
if self.nexttok and self.nexttok.type == toktype: | |
self._advance() | |
return True | |
else: | |
return False | |
def _expect(self,toktype): | |
'Consume next token if it matches toktype or raise SyntaxError' | |
if not self._accept(toktype): | |
raise SyntaxError('Expected ' + toktype) | |
# Grammar rules follow | |
def expr(self): | |
"expression ::= term { ('+'|'-') term }*" | |
exprval = self.term() | |
while self._accept('PLUS') or self._accept('MINUS'): | |
op = self.tok.type | |
right = self.term() | |
if op == 'PLUS': | |
exprval += right | |
elif op == 'MINUS': | |
exprval -= right | |
return exprval | |
def term(self): | |
"term ::= factor { ('*'|'/') factor }*" | |
termval = self.factor() | |
while self._accept('TIMES') or self._accept('DIVIDE'): | |
op = self.tok.type | |
right = self.factor() | |
if op == 'TIMES': | |
termval *= right | |
elif op == 'DIVIDE': | |
termval /= right | |
return termval | |
def factor(self): | |
"factor ::= NUM | ( expr )" | |
if self._accept('NUM'): | |
return int(self.tok.value) | |
elif self._accept('LPAREN'): | |
exprval = self.expr() | |
self._expect('RPAREN') | |
return exprval | |
else: | |
raise SyntaxError('Expected NUMBER or LPAREN') | |
if __name__ == '__main__': | |
e = ExpressionEvaluator() | |
print(e.parse('2')) | |
print(e.parse('2 + 3')) | |
print(e.parse('2 + 3 * 4')) | |
print(e.parse('2 + (3 + 4) * 5')) | |
# Example of building trees | |
class ExpressionTreeBuilder(ExpressionEvaluator): | |
def expr(self): | |
"expression ::= term { ('+'|'-') term }" | |
exprval = self.term() | |
while self._accept('PLUS') or self._accept('MINUS'): | |
op = self.tok.type | |
right = self.term() | |
if op == 'PLUS': | |
exprval = ('+', exprval, right) | |
elif op == 'MINUS': | |
exprval = ('-', exprval, right) | |
return exprval | |
def term(self): | |
"term ::= factor { ('*'|'/') factor }" | |
termval = self.factor() | |
while self._accept('TIMES') or self._accept('DIVIDE'): | |
op = self.tok.type | |
right = self.factor() | |
if op == 'TIMES': | |
termval = ('*', termval, right) | |
elif op == 'DIVIDE': | |
termval = ('/', termval, right) | |
return termval | |
def factor(self): | |
'factor ::= NUM | ( expr )' | |
if self._accept('NUM'): | |
return int(self.tok.value) | |
elif self._accept('LPAREN'): | |
exprval = self.expr() | |
self._expect('RPAREN') | |
return exprval | |
else: | |
raise SyntaxError('Expected NUMBER or LPAREN') | |
if __name__ == '__main__': | |
e = ExpressionTreeBuilder() | |
print(e.parse('2 + 3')) | |
print(e.parse('2 + 3 * 4')) | |
print(e.parse('2 + (3 + 4) * 5')) | |
print(e.parse('2 + 3 + 4')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment