Skip to content
Create a gist now

Instantly share code, notes, and snippets.

@breuleux /
Last active

Operator precedence parsing
class Operator:
def __init__(self, name, leftp, rightp, merge, fixity, label = None): = name
self.leftp = leftp
self.rightp = rightp
self.merge = merge
self.fixity = fixity
self.label = label or name
optable = {}
def reg(name, priority, associativity, merge = [], nodename = None):
if associativity == 'left':
lp, rp, f = priority, priority + 1, 'i'
elif associativity == 'right':
lp, rp, f = priority + 1, priority, 'i'
elif associativity == 'prefix':
lp, rp, f = 1e10, priority, 'p'
elif associativity == 'suffix':
lp, rp, f = priority, 1e10, 's'
optable[name] = Operator(name, lp, rp, merge, f, nodename)
reg("", 2000, 'left', [], 'juxt')
reg("**", 1000, 'right')
reg("^", 1000, 'right')
reg("*", 900, 'left')
reg("/", 900, 'left')
reg("%", 900, 'left')
reg("+", 500, 'left')
reg("-", 500, 'left')
reg("u+", 901, 'prefix')
optable["+"].unary = optable['u+']
reg("u-", 901, 'prefix')
optable["-"].unary = optable['u-']
reg("&", 300, 'left')
reg("^", 300, 'left')
reg("|", 300, 'left')
reg("<", 200, 'left')
reg("<=", 200, 'left')
reg(">", 200, 'left')
reg(">=", 200, 'left')
reg("!=", 200, 'left')
reg("==", 200, 'left')
reg("in", 200, 'left')
reg("is", 200, 'left')
reg("not in", 200, 'left')
reg("is not", 200, 'left')
reg("not", 120, 'prefix')
reg("and", 110, 'left')
reg("or", 100, 'left')
reg("if", 50, 'right', ["else"], 'ifelse')
reg("else", 50, 'right', [], 'ifelse')
reg(",", 10, 'left', [","], 'arguments')
optable['('] = Operator('(', 2000, 0, [')'], 'i', 'fcall')
reg(")", 0, 'suffix', [])
def alt(tokens):
# Make operator tokens alternate with non-operator tokens
# non-operator tokens are guaranteed to be the first and last tokens
# <juxt> is placeholder operator
# <void> is placeholder non-operator
# x y -> x <juxt> y
# a + - x -> a + <void> - x
# a + ( b ) -> a + <void> ( b )
# etc.
# Operators marked as suffix (s) or prefix (p) may introduce extra
# placeholders to ensure, respectively, that their rhs and lhs are
# <void>
juxt = optable['']
void = None
last = 'i'
for token in tokens:
# i = infix, p = prefix, s = suffix, - = symbol or number
f = (token.fixity if isinstance(token, Operator) else '-')
succ = last + f
if succ == '--':
yield juxt
elif succ == 'ii':
yield void
token = getattr(token, 'unary', token)
elif succ in 'pi pp ps ip is si ss'.split(' '):
yield void
elif succ == '-p':
yield juxt
yield void
elif succ == 's-':
yield void
yield juxt
elif succ == 'sp':
yield void
yield juxt
yield void
yield token
last = f
if last in 'ips':
yield void
def operator_parse(tokenizer, order, finalize):
# Note: requires tokens to alternate between non-operators and operators
# starting and ending with a non-operator; all operators are considered to
# be infix, but prefix/suffix can easily be emulated by infix operators that
# take a dummy argument on their left or right.
between = next(tokenizer)
right_op = next(tokenizer)
except StopIteration:
return between
stack = []
left_op = None
current = None
while True:
if left_op is None:
if right_op is None:
return between
o = 'r'
elif right_op is None:
o = 'l'
o = order(left_op, right_op)
if o == 'l':
# Left operator takes the cake. Its expression is closed,
# and the result becomes the new "between". We will then
# compare right_op to the op to the left of left_op.
between = finalize(current)
left_op, current = stack.pop()
elif o == 'r':
# Right operator takes the cake. Left operator and its
# partial expression (stored in current) are pushed on the
# stack for when its right operand will be available. We
# build a partial expression for the right operator, which
# becomes the left, and we fetch the next operator.
stack.append((left_op, current))
left_op = right_op
current = [[right_op], between]
between = next(tokenizer)
right_op = next(tokenizer)
except StopIteration:
right_op = None
elif o == 'a':
# Left operator and right operator are aggregated. This
# means the partial expression for the left operator is
# reused. This can be used to parse brackets or ternary
# operators, since left and right operators will "close"
# on the middle operand.
left_op = right_op
between = next(tokenizer)
right_op = next(tokenizer)
except StopIteration:
right_op = None
raise SyntaxError("Cannot order", left_op, right_op, o)
def order(l, r):
if in l.merge:
return 'a'
lp = l.rightp
rp = r.leftp
if lp == rp:
raise Exception("inconsistent associativity")
elif rp > lp:
return 'r'
return 'l'
def finalize(x):
ops, *arguments = x
ops = [op.label for op in ops]
op = ops[0]
if op == 'fcall':
# f(...) is treated as a ternary operator with a dummy
# third argument
assert arguments[2] is None
if arguments[0] is None:
return ['round', arguments[1]]
return ['fcall', arguments[0], arguments[1]]
return [op] + arguments
def mkop(seq):
return (optable.get(x, x) for x in seq)
def parse(seq):
return operator_parse(alt(mkop(seq)), order, finalize)
for expr in ['a+b+c*d',
'a ** b ** c * d'.split(),
'a if a < b else a if a < c else c'.split(),
print(expr if isinstance(expr, str) else ' '.join(expr))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.