public
Last active

Operator precedence parsing

  • Download Gist
op.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
class Operator:
 
def __init__(self, name, leftp, rightp, merge, fixity, label = None):
self.name = name
self.leftp = leftp
self.rightp = rightp
self.merge = merge
self.fixity = fixity
self.label = label or name
 
optable = {}
 
def reg(name, priority, associativity, merge = [], nodename = None):
if associativity == 'left':
lp, rp, f = priority, priority + 1, 'i'
elif associativity == 'right':
lp, rp, f = priority + 1, priority, 'i'
elif associativity == 'prefix':
lp, rp, f = 1e10, priority, 'p'
elif associativity == 'suffix':
lp, rp, f = priority, 1e10, 's'
 
optable[name] = Operator(name, lp, rp, merge, f, nodename)
 
 
reg("", 2000, 'left', [], 'juxt')
 
reg("**", 1000, 'right')
reg("^", 1000, 'right')
reg("*", 900, 'left')
reg("/", 900, 'left')
reg("%", 900, 'left')
 
reg("+", 500, 'left')
reg("-", 500, 'left')
 
reg("u+", 901, 'prefix')
optable["+"].unary = optable['u+']
reg("u-", 901, 'prefix')
optable["-"].unary = optable['u-']
 
reg("&", 300, 'left')
reg("^", 300, 'left')
reg("|", 300, 'left')
 
reg("<", 200, 'left')
reg("<=", 200, 'left')
reg(">", 200, 'left')
reg(">=", 200, 'left')
reg("!=", 200, 'left')
reg("==", 200, 'left')
reg("in", 200, 'left')
reg("is", 200, 'left')
reg("not in", 200, 'left')
reg("is not", 200, 'left')
 
reg("not", 120, 'prefix')
 
reg("and", 110, 'left')
reg("or", 100, 'left')
 
reg("if", 50, 'right', ["else"], 'ifelse')
reg("else", 50, 'right', [], 'ifelse')
 
reg(",", 10, 'left', [","], 'arguments')
 
optable['('] = Operator('(', 2000, 0, [')'], 'i', 'fcall')
reg(")", 0, 'suffix', [])
 
 
 
def alt(tokens):
# Make operator tokens alternate with non-operator tokens
# non-operator tokens are guaranteed to be the first and last tokens
# <juxt> is placeholder operator
# <void> is placeholder non-operator
# x y -> x <juxt> y
# a + - x -> a + <void> - x
# a + ( b ) -> a + <void> ( b )
# etc.
# Operators marked as suffix (s) or prefix (p) may introduce extra
# placeholders to ensure, respectively, that their rhs and lhs are
# <void>
juxt = optable['']
void = None
last = 'i'
for token in tokens:
# i = infix, p = prefix, s = suffix, - = symbol or number
f = (token.fixity if isinstance(token, Operator) else '-')
succ = last + f
if succ == '--':
yield juxt
elif succ == 'ii':
yield void
token = getattr(token, 'unary', token)
elif succ in 'pi pp ps ip is si ss'.split(' '):
yield void
elif succ == '-p':
yield juxt
yield void
elif succ == 's-':
yield void
yield juxt
elif succ == 'sp':
yield void
yield juxt
yield void
yield token
last = f
if last in 'ips':
yield void
 
 
 
def operator_parse(tokenizer, order, finalize):
# Note: requires tokens to alternate between non-operators and operators
# starting and ending with a non-operator; all operators are considered to
# be infix, but prefix/suffix can easily be emulated by infix operators that
# take a dummy argument on their left or right.
 
between = next(tokenizer)
try:
right_op = next(tokenizer)
except StopIteration:
return between
 
stack = []
left_op = None
current = None
 
while True:
 
if left_op is None:
if right_op is None:
return between
else:
o = 'r'
elif right_op is None:
o = 'l'
else:
o = order(left_op, right_op)
 
if o == 'l':
# Left operator takes the cake. Its expression is closed,
# and the result becomes the new "between". We will then
# compare right_op to the op to the left of left_op.
current.append(between)
between = finalize(current)
left_op, current = stack.pop()
 
elif o == 'r':
# Right operator takes the cake. Left operator and its
# partial expression (stored in current) are pushed on the
# stack for when its right operand will be available. We
# build a partial expression for the right operator, which
# becomes the left, and we fetch the next operator.
stack.append((left_op, current))
left_op = right_op
current = [[right_op], between]
between = next(tokenizer)
try:
right_op = next(tokenizer)
except StopIteration:
right_op = None
 
elif o == 'a':
# Left operator and right operator are aggregated. This
# means the partial expression for the left operator is
# reused. This can be used to parse brackets or ternary
# operators, since left and right operators will "close"
# on the middle operand.
current[0].append(right_op)
current.append(between)
left_op = right_op
between = next(tokenizer)
try:
right_op = next(tokenizer)
except StopIteration:
right_op = None
 
else:
raise SyntaxError("Cannot order", left_op, right_op, o)
 
 
def order(l, r):
if r.name in l.merge:
return 'a'
lp = l.rightp
rp = r.leftp
if lp == rp:
raise Exception("inconsistent associativity")
elif rp > lp:
return 'r'
else:
return 'l'
 
 
def finalize(x):
ops, *arguments = x
ops = [op.label for op in ops]
op = ops[0]
if op == 'fcall':
# f(...) is treated as a ternary operator with a dummy
# third argument
assert arguments[2] is None
if arguments[0] is None:
return ['round', arguments[1]]
else:
return ['fcall', arguments[0], arguments[1]]
return [op] + arguments
 
 
 
def mkop(seq):
return (optable.get(x, x) for x in seq)
 
def parse(seq):
return operator_parse(alt(mkop(seq)), order, finalize)
 
 
 
for expr in ['a+b+c*d',
'a ** b ** c * d'.split(),
'f(a+b,c,d)',
'a if a < b else a if a < c else c'.split(),
'a((+b)+c*d)',
'a+-b*c',
'a(b)(c)(d)',
'(a+b)(c+d)']:
print(expr if isinstance(expr, str) else ' '.join(expr))
print(parse(expr))
print()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.