Skip to content

Instantly share code, notes, and snippets.

@telescreen
Created August 16, 2020 06:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save telescreen/614413bdea8d7115b223b9c49a8b2cc6 to your computer and use it in GitHub Desktop.
Save telescreen/614413bdea8d7115b223b9c49a8b2cc6 to your computer and use it in GitHub Desktop.
Pratt Combinator Parsing using python3
#!/usr/bin/env python3
## Study Pratt parsing for python3
## Ref: http://effbot.org/zone/simple-top-down-parsing.htm
import re
token_pattern = re.compile(r"\s*(?:(\d+)|(\*\*|.))\s*")
def tokenize(program):
for number, operator in token_pattern.findall(program):
if number:
symbol = symbol_table["(literal)"]
s = symbol()
s.value = number
yield s
else:
symbol = symbol_table.get(operator)
if not symbol:
raise SyntaxError("Unknown operator")
yield symbol()
symbol = symbol_table["(end)"]
yield symbol()
class symbol_base:
id = None
value = None
first = second = third = None
def nud(self):
raise SyntaxError("Unknown operator ({}}).".format(self.id))
def led(self, left):
raise SyntaxError("Unknown operator ({}}).".format(self.id))
def __repr__(self):
if self.id == "(name)" or self.id == "(literal)":
return "(%s %s)" % (self.id[1:-1], self.value)
out = [self.id, self.first, self.second, self.third]
out = map(str, filter(None, out))
return "(" + " ".join(out) + ")"
symbol_table = {}
def symbol(id, bp=0):
try:
s = symbol_table[id]
except KeyError:
class s(symbol_base):
pass
s.__name__ = "symbol-" + id
s.id = id
s.lbp = bp
symbol_table[id] = s
else:
s.lbp = max(bp, s.lbp)
return s
def parse(program):
global token, token_stream
token_stream = tokenize(program)
token = next(token_stream)
return expression()
def expression(rbp=0):
global token
t = token
token = next(token_stream)
left = t.nud()
while rbp < token.lbp:
t = token
token = next(token_stream)
left = t.led(left)
return left
def infix(id, bp):
def led(self, left):
self.first = left
self.second = expression(bp)
return self
symbol(id, bp).led = led
def prefix(id, bp):
def nud(self):
self.first = expression(bp)
self.second = None
return self
symbol(id, bp).nud = nud
def infix_r(id, bp):
def led(self, left):
self.first = left
self.second = expression(bp-1)
return self
symbol(id, bp).led = led
infix("+", 10); infix("-", 10)
infix("*", 20); infix("/", 20)
infix_r("**", 30)
prefix("+", 100); prefix("-", 100)
symbol("(literal)").nud = lambda self: self
symbol("(end)")
if __name__ == "__main__":
print(parse("1"))
print(parse("-1"))
print(parse("-1 * 2"))
print(parse("1 + 2"))
print(parse("1 + 2 * 3"))
print(parse("1 + 2 - 3 * 4 / 5"))
print(parse("2**2**3"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment