Last active
June 27, 2022 05:49
-
-
Save tomdaley92/507c3a99c56b779144d9c79c0a3900be to your computer and use it in GitHub Desktop.
Abstract Syntax Trees and the Shunting Yard Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Abstract Syntax Trees | |
01/04/2018 | |
You may override the constants OPERATORS and PRECEDENCE | |
to define your own grammar. The is_operand() function is | |
intended to be redefined as well and is used to determine | |
if a given token within an expression string is a valid | |
operand. | |
''' | |
from sys import stdout, modules | |
from math import inf | |
# Override these constants | |
OPERATORS = ['^', '*', '/', '%', '+', '-'] | |
PRECEDENCE = { | |
'^' : 2, # raise | |
'*' : 1, # multiply | |
'/' : 1, # divide | |
'%' : 1, # modulo | |
'+' : 0, # add | |
'-' : 0 # subtract | |
} | |
# TODO: Implement Operator Associativity | |
# https://en.wikipedia.org/wiki/Operator_associativity | |
# Override this function | |
def is_operand(token): | |
'''Returns True if token is a legal operand''' | |
try: | |
float(token) | |
return True | |
except (TypeError, ValueError): | |
pass | |
try: | |
import unicodedata | |
unicodedata.numeric(token) | |
return True | |
except (TypeError, ValueError): | |
pass | |
return False | |
class AST: | |
'''Data structure for an Abstract Syntax Tree''' | |
def __init__(self, value, left=None, right=None): | |
self.value = value | |
if left != None or right != None: | |
assert left != None and right != None, \ | |
'Arguments left and right must both be either present or absent.' | |
self.left = None | |
if left != None: | |
if type(left) == AST: | |
self.left = left | |
else: | |
self.left = AST(left) | |
self.right = None | |
if right != None: | |
if type(right) == AST: | |
self.right = right | |
else: | |
self.right = AST(right) | |
def preOrder(self): | |
# Value, Left, Right | |
result = [] | |
result.append(self.value) | |
if self.left: | |
result += self.left.preOrder() | |
if self.right: | |
result += self.right.preOrder() | |
return result | |
def inOrder(self): | |
# Left, Value, Right | |
result = [] | |
if self.left: | |
result += self.left.inOrder() | |
result.append(self.value) | |
if self.right: | |
result += self.right.inOrder() | |
return result | |
def postOrder(self): | |
# Left, Right, Value | |
result = [] | |
if self.left: | |
result += self.left.postOrder() | |
if self.right: | |
result += self.right.postOrder() | |
result.append(self.value) | |
return result | |
def _format(self, _padding=''): | |
'''Formats the AST to a string''' | |
line = '('+str(self.value)+')\n' | |
if self.left: | |
line += _padding | |
line += ' \u251C' | |
# push | |
_padding += ' |' | |
line += self.left._format(_padding) | |
# pop | |
_padding = _padding[:-2] | |
if self.right: | |
line += _padding | |
line += ' \u2514' | |
# push | |
_padding += ' ' | |
line += self.right._format(_padding) | |
# pop | |
_padding = _padding[:-2] | |
return line | |
def format(self): | |
return self._format() | |
def display(self): | |
'''Pretty-prints the AST to the terminal''' | |
stdout.write(self.format()) | |
def printPreOrder(self): | |
stdout.write(' '.join(map(str, self.preOrder()))+'\n') | |
def printInOrder(self): | |
stdout.write(' '.join(map(str, self.inOrder()))+'\n') | |
def printPostOrder(self): | |
stdout.write(' '.join(map(str, self.postOrder()))+'\n') | |
def parse(expression): | |
''' | |
Parses an infix notation expression into an AST | |
using Edsger Dijkstra's Shunting-Yard algorithm. | |
There must be whitespace between operands and | |
operators. | |
''' | |
class Stack: | |
'''Data structure for a Stack (LIFO)''' | |
def __init__(self, another=None): | |
self.list = [] | |
if another: | |
self.list = another.list | |
def push(self, val): | |
self.list.append(val) | |
def pop(self): | |
return self.list.pop() if self.list else None | |
def peek(self): | |
return self.list[-1] if self.list else None | |
def isEmpty(self): | |
return False if self.list else True | |
def size(self): | |
return len(self.list) | |
def display(self): | |
print(self.list) | |
def next_token(expression): | |
''' | |
Returns a tuple (expression, token). | |
Tokens are either some string or an | |
open/closed parenthesis. | |
The expression returned is updated to reflect | |
the new expression without the token that was | |
just found. | |
''' | |
exp = expression.strip() | |
end = 0 | |
previous = '' | |
for c in exp: | |
if c == ' ': | |
break | |
elif c == '(' or c == ')': | |
if end == 0: | |
end = 1 | |
break | |
else: | |
break | |
else: | |
end += 1 | |
return exp[end:], exp[0:end] | |
ERROR_MESSAGE = 'Unable to parse \''+expression+'\'' | |
global PRECEDENCE | |
PARENTHESIS = { '(' : inf, ')' : inf } | |
PRECEDENCE = { **PRECEDENCE, **PARENTHESIS } | |
op_stack = Stack() | |
exp_stack = Stack() | |
exp, token = next_token('('+expression+')') | |
while token: | |
if token == '(': | |
op_stack.push(token) | |
elif is_operand(token): | |
exp_stack.push(AST(token)) | |
elif token in OPERATORS: | |
while op_stack.size(): | |
if op_stack.peek() == '(': | |
break | |
if PRECEDENCE[op_stack.peek()] < PRECEDENCE[token]: | |
break | |
op = op_stack.pop() | |
e2 = exp_stack.pop() | |
e1 = exp_stack.pop() | |
exp_stack.push(AST(op, e1, e2)) | |
op_stack.push(token) | |
elif token == ')': | |
while op_stack.size(): | |
if op_stack.peek() == '(': | |
break | |
op = op_stack.pop() | |
e2 = exp_stack.pop() | |
e1 = exp_stack.pop() | |
exp_stack.push(AST(op, e1, e2)) | |
# Pop the '(' off the operator stack. | |
op_stack.pop() | |
else: | |
raise RuntimeError(ERROR_MESSAGE) | |
# Grab the next token | |
exp, token = next_token(exp) | |
# Only one item should be left on the expression stack | |
assert exp_stack.size() == 1, \ | |
('The expression stack is expected to be of size 1 ' | |
'after applying the Shunting-Yard algorithm. ' + ERROR_MESSAGE) | |
# Return the root node | |
return exp_stack.pop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ast | |
t1 = ast.parse('(1 + 2)') | |
t1.printPreOrder() | |
t1.printInOrder() | |
t1.printPostOrder() | |
t1.display() | |
t2 = ast.parse('(3 * (1 + 2))') | |
t2.printPreOrder() | |
t2.printInOrder() | |
t2.printPostOrder() | |
t2.display() | |
t3 = ast.parse('5.1 / 6') | |
t3.printPreOrder() | |
t3.printInOrder() | |
t3.printPostOrder() | |
t3.display() | |
t4 = ast.parse('(4 ^ 2) * (8 + 9)') | |
t4.printPreOrder() | |
t4.printInOrder() | |
t4.printPostOrder() | |
t4.display() | |
t5 = ast.parse('4 ^ 2 * 8 + 9') | |
t5.printPreOrder() | |
t5.printInOrder() | |
t5.printPostOrder() | |
t5.display() | |
t6 = ast.parse('(10 ^ 20) * (99.99 + 0.01)') | |
t6.printPreOrder() | |
t6.printInOrder() | |
t6.printPostOrder() | |
t6.display() | |
t7 = ast.parse('5 * 3 + (4 + 2 % 2 * 8)') | |
t7.printPreOrder() | |
t7.printInOrder() | |
t7.printPostOrder() | |
t7.display() | |
t8 = ast.parse('0.85 * .15 + 3.14 - 2') | |
t8.printPreOrder() | |
t8.printInOrder() | |
t8.printPostOrder() | |
t8.display() | |
try: | |
t9 = parse('') | |
except: | |
# This is expected behavior | |
pass | |
t10 = ast.parse('1 + 2 * 3 - 4 / 5 + 6 * 7 - 8 / 9 + 10') | |
t10.printPreOrder() | |
t10.printInOrder() | |
t10.printPostOrder() | |
t10.display() | |
# AST class Seems to work okay with generic objects as | |
# well although more testing is needed... | |
t11 = ast.AST('&&', { 'a' : 'apples'}, [1, 2, 3, 4]) | |
t11.printPreOrder() | |
t11.printInOrder() | |
t11.printPostOrder() | |
t11.display() | |
preOrderList = t11.preOrder() | |
inOrderList = t11.inOrder() | |
postOrderList = t11.postOrder() | |
print(preOrderList) | |
print(inOrderList) | |
print(postOrderList) | |
# The ZERO test | |
t12 = ast.AST(0, 0, 0) | |
t12.printPreOrder() | |
t12.printInOrder() | |
t12.printPostOrder() | |
t12.display() | |
# Overriding tests | |
OPERATORS = ['&', '|'] | |
PRECEDENCE = { | |
'&' : 0, # logical AND | |
'|' : 0, # logical OR | |
} | |
def is_operand(token): | |
# Any single letter in the alphabet | |
if len(token) == 1: | |
if token.lower() >= 'a' and token.lower() <= 'z': | |
return True | |
return False | |
ast.OPERATORS = OPERATORS | |
ast.PRECEDENCE = PRECEDENCE | |
ast.is_operand = is_operand | |
t13 = ast.parse('(A & B) | C') | |
t13.printPreOrder() | |
t13.printInOrder() | |
t13.printPostOrder() | |
t13.display() | |
# Format test | |
print(t8.format()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment