Created
August 16, 2020 06:59
-
-
Save telescreen/614413bdea8d7115b223b9c49a8b2cc6 to your computer and use it in GitHub Desktop.
Pratt Combinator Parsing using python3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
## Study Pratt parsing for python3 | |
## Ref: http://effbot.org/zone/simple-top-down-parsing.htm | |
import re | |
token_pattern = re.compile(r"\s*(?:(\d+)|(\*\*|.))\s*") | |
def tokenize(program): | |
for number, operator in token_pattern.findall(program): | |
if number: | |
symbol = symbol_table["(literal)"] | |
s = symbol() | |
s.value = number | |
yield s | |
else: | |
symbol = symbol_table.get(operator) | |
if not symbol: | |
raise SyntaxError("Unknown operator") | |
yield symbol() | |
symbol = symbol_table["(end)"] | |
yield symbol() | |
class symbol_base: | |
id = None | |
value = None | |
first = second = third = None | |
def nud(self): | |
raise SyntaxError("Unknown operator ({}}).".format(self.id)) | |
def led(self, left): | |
raise SyntaxError("Unknown operator ({}}).".format(self.id)) | |
def __repr__(self): | |
if self.id == "(name)" or self.id == "(literal)": | |
return "(%s %s)" % (self.id[1:-1], self.value) | |
out = [self.id, self.first, self.second, self.third] | |
out = map(str, filter(None, out)) | |
return "(" + " ".join(out) + ")" | |
symbol_table = {} | |
def symbol(id, bp=0): | |
try: | |
s = symbol_table[id] | |
except KeyError: | |
class s(symbol_base): | |
pass | |
s.__name__ = "symbol-" + id | |
s.id = id | |
s.lbp = bp | |
symbol_table[id] = s | |
else: | |
s.lbp = max(bp, s.lbp) | |
return s | |
def parse(program): | |
global token, token_stream | |
token_stream = tokenize(program) | |
token = next(token_stream) | |
return expression() | |
def expression(rbp=0): | |
global token | |
t = token | |
token = next(token_stream) | |
left = t.nud() | |
while rbp < token.lbp: | |
t = token | |
token = next(token_stream) | |
left = t.led(left) | |
return left | |
def infix(id, bp): | |
def led(self, left): | |
self.first = left | |
self.second = expression(bp) | |
return self | |
symbol(id, bp).led = led | |
def prefix(id, bp): | |
def nud(self): | |
self.first = expression(bp) | |
self.second = None | |
return self | |
symbol(id, bp).nud = nud | |
def infix_r(id, bp): | |
def led(self, left): | |
self.first = left | |
self.second = expression(bp-1) | |
return self | |
symbol(id, bp).led = led | |
infix("+", 10); infix("-", 10) | |
infix("*", 20); infix("/", 20) | |
infix_r("**", 30) | |
prefix("+", 100); prefix("-", 100) | |
symbol("(literal)").nud = lambda self: self | |
symbol("(end)") | |
if __name__ == "__main__": | |
print(parse("1")) | |
print(parse("-1")) | |
print(parse("-1 * 2")) | |
print(parse("1 + 2")) | |
print(parse("1 + 2 * 3")) | |
print(parse("1 + 2 - 3 * 4 / 5")) | |
print(parse("2**2**3")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment