Last active
December 20, 2015 14:29
-
-
Save breuleux/6147321 to your computer and use it in GitHub Desktop.
Operator precedence parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Operator: | |
def __init__(self, name, leftp, rightp, merge, fixity, label = None): | |
self.name = name | |
self.leftp = leftp | |
self.rightp = rightp | |
self.merge = merge | |
self.fixity = fixity | |
self.label = label or name | |
optable = {} | |
def reg(name, priority, associativity, merge = [], nodename = None): | |
if associativity == 'left': | |
lp, rp, f = priority, priority + 1, 'i' | |
elif associativity == 'right': | |
lp, rp, f = priority + 1, priority, 'i' | |
elif associativity == 'prefix': | |
lp, rp, f = 1e10, priority, 'p' | |
elif associativity == 'suffix': | |
lp, rp, f = priority, 1e10, 's' | |
optable[name] = Operator(name, lp, rp, merge, f, nodename) | |
reg("", 2000, 'left', [], 'juxt') | |
reg("**", 1000, 'right') | |
reg("^", 1000, 'right') | |
reg("*", 900, 'left') | |
reg("/", 900, 'left') | |
reg("%", 900, 'left') | |
reg("+", 500, 'left') | |
reg("-", 500, 'left') | |
reg("u+", 901, 'prefix') | |
optable["+"].unary = optable['u+'] | |
reg("u-", 901, 'prefix') | |
optable["-"].unary = optable['u-'] | |
reg("&", 300, 'left') | |
reg("^", 300, 'left') | |
reg("|", 300, 'left') | |
reg("<", 200, 'left') | |
reg("<=", 200, 'left') | |
reg(">", 200, 'left') | |
reg(">=", 200, 'left') | |
reg("!=", 200, 'left') | |
reg("==", 200, 'left') | |
reg("in", 200, 'left') | |
reg("is", 200, 'left') | |
reg("not in", 200, 'left') | |
reg("is not", 200, 'left') | |
reg("not", 120, 'prefix') | |
reg("and", 110, 'left') | |
reg("or", 100, 'left') | |
reg("if", 50, 'right', ["else"], 'ifelse') | |
reg("else", 50, 'right', [], 'ifelse') | |
reg(",", 10, 'left', [","], 'arguments') | |
optable['('] = Operator('(', 2000, 0, [')'], 'i', 'fcall') | |
reg(")", 0, 'suffix', []) | |
def alt(tokens): | |
# Make operator tokens alternate with non-operator tokens | |
# non-operator tokens are guaranteed to be the first and last tokens | |
# <juxt> is placeholder operator | |
# <void> is placeholder non-operator | |
# x y -> x <juxt> y | |
# a + - x -> a + <void> - x | |
# a + ( b ) -> a + <void> ( b ) | |
# etc. | |
# Operators marked as suffix (s) or prefix (p) may introduce extra | |
# placeholders to ensure, respectively, that their rhs and lhs are | |
# <void> | |
juxt = optable[''] | |
void = None | |
last = 'i' | |
for token in tokens: | |
# i = infix, p = prefix, s = suffix, - = symbol or number | |
f = (token.fixity if isinstance(token, Operator) else '-') | |
succ = last + f | |
if succ == '--': | |
yield juxt | |
elif succ == 'ii': | |
yield void | |
token = getattr(token, 'unary', token) | |
elif succ in 'pi pp ps ip is si ss'.split(' '): | |
yield void | |
elif succ == '-p': | |
yield juxt | |
yield void | |
elif succ == 's-': | |
yield void | |
yield juxt | |
elif succ == 'sp': | |
yield void | |
yield juxt | |
yield void | |
yield token | |
last = f | |
if last in 'ips': | |
yield void | |
def operator_parse(tokenizer, order, finalize): | |
# Note: requires tokens to alternate between non-operators and operators | |
# starting and ending with a non-operator; all operators are considered to | |
# be infix, but prefix/suffix can easily be emulated by infix operators that | |
# take a dummy argument on their left or right. | |
between = next(tokenizer) | |
try: | |
right_op = next(tokenizer) | |
except StopIteration: | |
return between | |
stack = [] | |
left_op = None | |
current = None | |
while True: | |
if left_op is None: | |
if right_op is None: | |
return between | |
else: | |
o = 'r' | |
elif right_op is None: | |
o = 'l' | |
else: | |
o = order(left_op, right_op) | |
if o == 'l': | |
# Left operator takes the cake. Its expression is closed, | |
# and the result becomes the new "between". We will then | |
# compare right_op to the op to the left of left_op. | |
current.append(between) | |
between = finalize(current) | |
left_op, current = stack.pop() | |
elif o == 'r': | |
# Right operator takes the cake. Left operator and its | |
# partial expression (stored in current) are pushed on the | |
# stack for when its right operand will be available. We | |
# build a partial expression for the right operator, which | |
# becomes the left, and we fetch the next operator. | |
stack.append((left_op, current)) | |
left_op = right_op | |
current = [[right_op], between] | |
between = next(tokenizer) | |
try: | |
right_op = next(tokenizer) | |
except StopIteration: | |
right_op = None | |
elif o == 'a': | |
# Left operator and right operator are aggregated. This | |
# means the partial expression for the left operator is | |
# reused. This can be used to parse brackets or ternary | |
# operators, since left and right operators will "close" | |
# on the middle operand. | |
current[0].append(right_op) | |
current.append(between) | |
left_op = right_op | |
between = next(tokenizer) | |
try: | |
right_op = next(tokenizer) | |
except StopIteration: | |
right_op = None | |
else: | |
raise SyntaxError("Cannot order", left_op, right_op, o) | |
def order(l, r): | |
if r.name in l.merge: | |
return 'a' | |
lp = l.rightp | |
rp = r.leftp | |
if lp == rp: | |
raise Exception("inconsistent associativity") | |
elif rp > lp: | |
return 'r' | |
else: | |
return 'l' | |
def finalize(x): | |
ops, *arguments = x | |
ops = [op.label for op in ops] | |
op = ops[0] | |
if op == 'fcall': | |
# f(...) is treated as a ternary operator with a dummy | |
# third argument | |
assert arguments[2] is None | |
if arguments[0] is None: | |
return ['round', arguments[1]] | |
else: | |
return ['fcall', arguments[0], arguments[1]] | |
return [op] + arguments | |
def mkop(seq): | |
return (optable.get(x, x) for x in seq) | |
def parse(seq): | |
return operator_parse(alt(mkop(seq)), order, finalize) | |
for expr in ['a+b+c*d', | |
'a ** b ** c * d'.split(), | |
'f(a+b,c,d)', | |
'a if a < b else a if a < c else c'.split(), | |
'a((+b)+c*d)', | |
'a+-b*c', | |
'a(b)(c)(d)', | |
'(a+b)(c+d)']: | |
print(expr if isinstance(expr, str) else ' '.join(expr)) | |
print(parse(expr)) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment