Skip to content

Instantly share code, notes, and snippets.

@jg-rp
Created January 4, 2022 17:32
Show Gist options
  • Save jg-rp/00e213368782711e1720274693db3512 to your computer and use it in GitHub Desktop.
Save jg-rp/00e213368782711e1720274693db3512 to your computer and use it in GitHub Desktop.
Arithmetic expression evaluation using a stack
"""Evaluate arithmetic expressions using a stack by fist converting
an expression from infix notation to postfix notation.
Expressions are tokenized using regular expressions, as described at
https://docs.python.org/3/library/re.html#writing-a-tokenizer
"""
import operator
import re
from collections import deque
from typing import Deque
from typing import Iterable
from typing import Iterator
from typing import List
from typing import NamedTuple
from typing import Union
TOKEN_MUL = "MUL"
TOKEN_ADD = "ADD"
TOKEN_SUB = "SUB"
TOKEN_DIV = "DIV"
TOKEN_POW = "POW"
TOKEN_LP = "LP"
TOKEN_RP = "RP"
TOKEN_NEG = "NEG"
TOKEN_POS = "POS"
TOKEN_INT = "INT"
TOKEN_FLOAT = "FLOAT"
TOKEN_SKIP = "SKIP"
TOKEN_ILLEGAL = "ILLEGAL"
TOKEN_EMPTY = "EMPTY"
class Token(NamedTuple):
type: str
value: str
Operand = Union[int, float]
OPERANDS = {
TOKEN_INT: int,
TOKEN_FLOAT: float,
}
BINARY_OPERATORS = {
TOKEN_ADD: operator.add,
TOKEN_SUB: operator.sub,
TOKEN_MUL: operator.mul,
TOKEN_DIV: operator.truediv,
TOKEN_POW: operator.pow,
}
UNARY_OPERATORS = {
TOKEN_NEG: operator.neg,
TOKEN_POS: operator.pos,
}
# A map of binary operators to unary operators that use the same
# symbol. If a token with an overloaded operator appears after a
# token with a type in UNARY_LOOK_BEHIND, it will be considered a
# unary operator.
OVERLOADED_OPERATORS = {
TOKEN_SUB: TOKEN_NEG,
TOKEN_ADD: TOKEN_POS,
}
EMPTY = Token(TOKEN_EMPTY, "")
UNARY_LOOK_BEHIND = frozenset(
[
*BINARY_OPERATORS,
TOKEN_EMPTY,
TOKEN_LP,
]
)
PRECEDENCE = {
**{op: 1 for op in BINARY_OPERATORS},
TOKEN_MUL: 2,
TOKEN_DIV: 2,
TOKEN_POW: 5,
**{op: 3 for op in UNARY_OPERATORS},
}
RIGHT_ASSOCIATIVE = frozenset(
[
TOKEN_POW,
]
)
TOKEN_RULES = (
(TOKEN_LP, r"\("),
(TOKEN_RP, r"\)"),
(TOKEN_ADD, r"\+"),
(TOKEN_SUB, r"\-"),
(TOKEN_POW, r"\*\*"),
(TOKEN_MUL, r"\*"),
(TOKEN_DIV, r"\/"),
(TOKEN_FLOAT, r"\d+\.\d*"),
(TOKEN_INT, r"\d+"),
(TOKEN_SKIP, r"\s+"),
(TOKEN_ILLEGAL, r"."),
)
RE_TOKENS = re.compile(
"|".join(
f"(?P<{name}>{pattern})" for name, pattern in TOKEN_RULES
),
re.DOTALL,
)
def tokenize(expression: str) -> Iterator[Token]:
"""Yield tokens from the given arithmetic expression."""
for match in RE_TOKENS.finditer(expression):
kind = match.lastgroup
value = match.group()
if kind == TOKEN_SKIP:
continue
if kind == TOKEN_ILLEGAL:
raise Exception(f"unexpected {value!r}")
assert kind is not None
yield Token(type=kind, value=value)
def postfix(tokens: Iterable[Token]) -> List[Token]:
"""Convert a stream of tokens in infix notation into a list of
tokens in postfix notaion."""
post: List[Token] = []
stack: Deque[Token] = deque()
# Previous token. Used for determining if an overloaded operator
# is binary or unary. A hyphen, for example, could be infix
# subtraction or prefix negation.
prev = EMPTY
for token in tokens:
if token.type in OPERANDS:
post.append(token)
elif token.type == TOKEN_LP:
stack.append(token)
elif token.type == TOKEN_RP:
while stack and stack[-1].type != TOKEN_LP:
post.append(stack.pop())
assert (
stack and stack.pop().type == TOKEN_LP
), "unbalanced parentheses"
elif token.type in BINARY_OPERATORS:
if (
token.type in OVERLOADED_OPERATORS
and prev.type in UNARY_LOOK_BEHIND
):
# Unary operator
token = token._replace(
type=OVERLOADED_OPERATORS[token.type]
)
if not stack or stack[-1].type == TOKEN_LP:
stack.append(token)
else:
while (
stack
and stack[-1].type != TOKEN_LP
and token.type not in RIGHT_ASSOCIATIVE
and PRECEDENCE[token.type]
<= PRECEDENCE[stack[-1].type]
):
post.append(stack.pop())
stack.append(token)
prev = token
while stack:
token = stack.pop()
assert token.type != TOKEN_LP, "unbalanced parentheses"
post.append(token)
return post
def eval_postfix(tokens: Iterable[Token]) -> Operand:
"""Evaluate an expression in postfix notation given as a
sequence of tokens."""
stack: List[Operand] = []
for token in tokens:
if token.type in OPERANDS:
stack.append(OPERANDS[token.type](token.value))
elif token.type in BINARY_OPERATORS:
right = stack.pop()
left = stack.pop()
res = BINARY_OPERATORS[token.type](left, right)
stack.append(res)
elif token.type in UNARY_OPERATORS:
val = stack.pop()
res = UNARY_OPERATORS[token.type](val)
stack.append(res)
assert len(stack) == 1
return stack.pop()
def eval_infix(expression: str) -> Operand:
"""Evaluate an expression string given in infix notation."""
return eval_postfix(postfix(tokenize(expression)))
if __name__ == "__main__":
# Example usage
result = eval_infix("6 * (2.5 + 42) / (7 - 5.0)")
print(result) # 133.5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment