Skip to content

Instantly share code, notes, and snippets.

@millerdev
Last active April 3, 2024 14:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save millerdev/03d881620dd0879d4347b0e971ff0be5 to your computer and use it in GitHub Desktop.
Save millerdev/03d881620dd0879d4347b0e971ff0be5 to your computer and use it in GitHub Desktop.
Pratt Parsers: Expression Parsing Made Easy
# Pratt Parser in Python
# https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#
# Works with input like `a + b`.
# The lexer does not support numbers (yet).
#
# https://gist.github.com/millerdev/03d881620dd0879d4347b0e971ff0be5
from dataclasses import dataclass
def main():
while True:
print("Type an expression:")
try:
expr = input("> ")
if not expr:
raise EOFError
except EOFError:
print()
break
print("AST:", parse(lex(expr)))
print()
def parse(tokens):
parser = Parser(tokens)
return parse_expression(parser, 0)
def parse_expression(parser, precedence):
token = parser.consume()
parse_prefix = PREFIX_PARSERS.get(token.type)
if parse_prefix is None:
raise ParseError(f"Could not parse: {token}")
left = parse_prefix(parser, token)
while precedence < get_precedence(parser):
token = parser.consume()
parse_infix = INFIX_PARSERS[token.type]
left = parse_infix(parser, left, token)
return left
def get_precedence(parser):
token = parser.look_ahead()
parse = INFIX_PARSERS.get(token.type)
return 0 if parse is None else parse.precedence
def parse_name(parser, token):
return NameExpression(token.value)
def parse_prefix_expression(parser, token, *, precedence):
operand = parse_expression(parser, precedence)
return PrefixExpression(token.value, operand)
def parse_binary_expression(parser, left, token, *, precedence):
right = parse_expression(parser, precedence)
return OperatorExpression(left, token.type, right)
def parse_postfix_expression(parser, left, token, *, precedence):
return PostfixExpression(left, token.type)
def parse_conditional_expression(parser, left, token, *, precedence):
then_arm = parse_expression(parser, precedence)
token = parser.consume()
assert token.type == ':', token
else_arm = parse_expression(parser, precedence)
return ConditionalExpression(left, then_arm, else_arm)
class Expression:
...
@dataclass
class ConditionalExpression(Expression):
condition: Expression
then_arm: Expression
else_arm: Expression
@dataclass
class NameExpression(Expression):
name: str
@dataclass
class PrefixExpression(Expression):
operator: str
operand: str
@dataclass
class OperatorExpression(Expression):
left: Expression
operator: str
right: Expression
@dataclass
class PostfixExpression(Expression):
left: Expression
operator: str
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.peeked = None
def consume(self):
if self.peeked is None:
return next(self.tokens)
token = self.peeked
self.peeked = None
return token
def look_ahead(self):
if self.peeked is None:
self.peeked = next(self.tokens)
return self.peeked
def lex(text):
i = 0
while i < len(text):
char = text[i]
i += 1
if char in PUNCTUATORS:
yield Token(char, char)
elif char.isidentifier():
start = i - 1
while i < len(text):
if not text[i].isalnum():
break
i += 1
name = text[start:i]
assert name.isidentifier(), name
yield Token(NAME, name)
yield Token(EOF, "")
PRECEDENCES = {
'=': 1,
'?': 2,
'+': 3,
'-': 3,
'*': 4,
'/': 4,
'^': 5,
'pre': 6,
'!': 7,
'call': 8,
}
def add_precedence(parse_expr, op):
def parse(*args):
return parse_expr(*args, precedence=value)
parse.precedence = value = PRECEDENCES[op]
return parse
NAME = 'NAME'
EOF = 'EOF'
PUNCTUATORS = '(),=+-*/^~!?:'
PREFIX_OPS = list('+-~!')
BINARY_OPS = list('^*/+-=')
def make_prefix_parsers():
parsers = {}
parsers[NAME] = parse_name
for op in PREFIX_OPS:
parsers[op] = add_precedence(parse_prefix_expression, 'pre')
return parsers
def make_infix_parsers():
parsers = {}
for op in BINARY_OPS:
parsers[op] = add_precedence(parse_binary_expression, op)
parsers['!'] = add_precedence(parse_postfix_expression, '!')
parsers['?'] = add_precedence(parse_conditional_expression, '?')
return parsers
PREFIX_PARSERS = make_prefix_parsers()
INFIX_PARSERS = make_infix_parsers()
@dataclass
class Token:
type: str
value: str
class ParseError(Exception):
...
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment