Created
January 10, 2016 21:30
-
-
Save alexaleluia12/fc4457b1e4cf0119b25e to your computer and use it in GitHub Desktop.
compiler / compilador / sintatico / in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# font | |
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_writing_a_simple_recursive_descent_parser | |
""" | |
grammar | |
expr ::= expr + term | |
| expr - term | |
| term | |
term ::= term * factor | |
| term / factor | |
| factor | |
factor ::= ( expr ) | |
| NUM | |
""" | |
import re | |
import collections | |
# Token specification | |
NUM = r'(?P<NUM>\d+)' | |
PLUS = r'(?P<PLUS>\+)' | |
MINUS = r'(?P<MINUS>-)' | |
TIMES = r'(?P<TIMES>\*)' | |
DIVIDE = r'(?P<DIVIDE>/)' | |
LPAREN = r'(?P<LPAREN>\()' | |
RPAREN = r'(?P<RPAREN>\))' | |
WS = r'(?P<WS>\s+)' | |
master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES, | |
DIVIDE, LPAREN, RPAREN, WS])) | |
# Tokenizer | |
Token = collections.namedtuple('Token', ['type','value']) | |
def generate_tokens(text): | |
scanner = master_pat.scanner(text) | |
# iter(callable, sentinel) callable is called until it returns the sentinel. | |
for m in iter(scanner.match, None): | |
tok = Token(m.lastgroup, m.group()) | |
if tok.type != 'WS': | |
yield tok | |
# Parser | |
class ExpressionEvaluator: | |
''' | |
Implementation of a recursive descent parser. Each method | |
implements a single grammar rule. Use the ._accept() method | |
to test and accept the current lookahead token. Use the ._expect() | |
method to exactly match and discard the next token on on the input | |
(or raise a SyntaxError if it doesn't match). | |
''' | |
def parse(self,text): | |
self.tokens = generate_tokens(text) | |
self.tok = None # Last symbol consumed | |
self.nexttok = None # Next symbol tokenized | |
self._advance() # Load first lookahead token | |
return self.expr() | |
def _advance(self): | |
'Advance one token ahead' | |
self.tok, self.nexttok = self.nexttok, next(self.tokens, None) | |
def _accept(self,toktype): | |
'Test and consume the next token if it matches toktype' | |
if self.nexttok and self.nexttok.type == toktype: | |
self._advance() | |
return True | |
else: | |
return False | |
def _expect(self,toktype): | |
'Consume next token if it matches toktype or raise SyntaxError' | |
if not self._accept(toktype): | |
raise SyntaxError('Expected ' + toktype) | |
# Grammar rules follow | |
def expr(self): | |
"expression ::= term { ('+'|'-') term }*" | |
exprval = self.term() | |
while self._accept('PLUS') or self._accept('MINUS'): | |
op = self.tok.type | |
right = self.term() | |
if op == 'PLUS': | |
exprval += right | |
elif op == 'MINUS': | |
exprval -= right | |
return exprval | |
def term(self): | |
"term ::= factor { ('*'|'/') factor }*" | |
termval = self.factor() | |
while self._accept('TIMES') or self._accept('DIVIDE'): | |
op = self.tok.type | |
right = self.factor() | |
if op == 'TIMES': | |
termval *= right | |
elif op == 'DIVIDE': | |
termval /= right | |
return termval | |
def factor(self): | |
"factor ::= NUM | ( expr )" | |
if self._accept('NUM'): | |
return int(self.tok.value) | |
elif self._accept('LPAREN'): | |
exprval = self.expr() | |
self._expect('RPAREN') | |
return exprval | |
else: | |
raise SyntaxError('Expected NUMBER or LPAREN') | |
# use | |
""" | |
>>> e = ExpressionEvaluator() | |
>>> e.parse('2') | |
2 | |
>>> e.parse('2 + 3') | |
5 | |
>>> e.parse('2 + 3 * 4') | |
14 | |
>>> e.parse('2 + (3 + 4) * 5') | |
37 | |
>>> e.parse('2 + (3 + * 4)') | |
""" | |
# to build a parse tree | |
class ExpressionTreeBuilder(ExpressionEvaluator): | |
def expr(self): | |
"expression ::= term { ('+'|'-') term }" | |
exprval = self.term() | |
while self._accept('PLUS') or self._accept('MINUS'): | |
op = self.tok.type | |
right = self.term() | |
if op == 'PLUS': | |
exprval = ('+', exprval, right) | |
elif op == 'MINUS': | |
exprval = ('-', exprval, right) | |
return exprval | |
def term(self): | |
"term ::= factor { ('*'|'/') factor }" | |
termval = self.factor() | |
while self._accept('TIMES') or self._accept('DIVIDE'): | |
op = self.tok.type | |
right = self.factor() | |
if op == 'TIMES': | |
termval = ('*', termval, right) | |
elif op == 'DIVIDE': | |
termval = ('/', termval, right) | |
return termval | |
def factor(self): | |
'factor ::= NUM | ( expr )' | |
if self._accept('NUM'): | |
return int(self.tok.value) | |
elif self._accept('LPAREN'): | |
exprval = self.expr() | |
self._expect('RPAREN') | |
return exprval | |
else: | |
raise SyntaxError('Expected NUMBER or LPAREN') | |
#use | |
""" | |
>>> e = ExpressionTreeBuilder() | |
>>> e.parse('2 + 3') | |
('+', 2, 3) | |
>>> e.parse('2 + 3 * 4') | |
('+', 2, ('*', 3, 4)) | |
>>> e.parse('2 + (3 + 4) * 5') | |
('+', 2, ('*', ('+', 3, 4), 5)) | |
>>> e.parse('2 + 3 + 4') | |
('+', ('+', 2, 3), 4) | |
>>> | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment