Skip to content

Instantly share code, notes, and snippets.

@derrickturk
Last active July 27, 2022 19:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save derrickturk/3c97593399c008a95d76387af28b49a1 to your computer and use it in GitHub Desktop.
Save derrickturk/3c97593399c008a95d76387af28b49a1 to your computer and use it in GitHub Desktop.
fun with lark and PHDwin
%import common.SIGNED_NUMBER
%import common.CNAME
%import common.WS
%ignore WS
?start: expr
?expr: comp_expr
?comp_expr: addsub_expr [comp_op addsub_expr]
?addsub_expr: muldiv_expr (addsub_op muldiv_expr)*
?muldiv_expr: factor (muldiv_op factor)*
?factor: sign_op factor | atom_expr
// TODO: not equal?
!comp_op: "<" | ">" | "=" | "<=" | ">="
!addsub_op: "+" | "-"
!muldiv_op: "x" | "/"
!sign_op: "+" | "-"
?atom_expr: identifier "(" [args] ")" -> funcall
| atom
// TODO: I see some function calls(?) inside { } - what are these?
?atom: "(" expr ")"
| identifier
| number
?args: expr ("," expr)*
// TODO: we need to know the escaping rules here...
?identifier: (CNAME | "{" /[^}]+/ "}") -> var
?number: SIGNED_NUMBER -> const
import sys
import lark
from enum import Enum, auto
from collections import namedtuple
class UnaryOp(Enum):
Pos = auto()
Neg = auto()
class BinaryOp(Enum):
Add = auto()
Sub = auto()
Mul = auto()
Div = auto()
Lt = auto()
Gt = auto()
LtEq = auto()
GtEq = auto()
Eq = auto()
class Constant(namedtuple('Constant', ['val'])):
__slots__ = ()
def eval(self, var_dict):
return self.val
def pprint(self):
return f'{self.val}'
class Var(namedtuple('Var', ['name'])):
__slots__ = ()
def eval(self, var_dict):
return var_dict[self.name]
def pprint(self):
return f'{{{self.name}}}'
class BinaryOpApply(namedtuple('BinaryOpApply', ['op', 'lhs', 'rhs'])):
__slots__ = ()
def eval(self, var_dict):
lhs_val = self.lhs.eval(var_dict)
rhs_val = self.rhs.eval(var_dict)
if self.op == BinaryOp.Add:
return lhs_val + rhs_val
if self.op == BinaryOp.Sub:
return lhs_val - rhs_val
if self.op == BinaryOp.Mul:
return lhs_val * rhs_val
if self.op == BinaryOp.Div:
return lhs_val / rhs_val
if self.op == BinaryOp.Lt:
return lhs_val < rhs_val
if self.op == BinaryOp.Gt:
return lhs_val > rhs_val
if self.op == BinaryOp.LtEq:
return lhs_val <= rhs_val
if self.op == BinaryOp.GtEq:
return lhs_val >= rhs_val
if self.op == BinaryOp.Eq:
return lhs_val == rhs_val
raise ArgumentError('invalid binary operator')
def pprint(self):
lhs_pp = self.lhs.pprint()
rhs_pp = self.rhs.pprint()
if self.op == BinaryOp.Add:
return f'({lhs_pp} + {rhs_pp})'
if self.op == BinaryOp.Sub:
return f'({lhs_pp} - {rhs_pp})'
if self.op == BinaryOp.Mul:
return f'({lhs_pp} x {rhs_pp})'
if self.op == BinaryOp.Div:
return f'({lhs_pp} / {rhs_pp})'
if self.op == BinaryOp.Lt:
return f'({lhs_pp} < {rhs_pp})'
if self.op == BinaryOp.Gt:
return f'({lhs_pp} > {rhs_pp})'
if self.op == BinaryOp.LtEq:
return f'({lhs_pp} <= {rhs_pp})'
if self.op == BinaryOp.GtEq:
return f'({lhs_pp} >= {rhs_pp})'
if self.op == BinaryOp.Eq:
return f'({lhs_pp} = {rhs_pp})'
raise ArgumentError('invalid binary operator')
class UnaryOpApply(namedtuple('UnaryOpApply', ['op', 'expr'])):
__slots__ = ()
def eval(self, var_dict):
expr_val = self.expr.eval(var_dict)
if self.op == UnaryOp.Pos:
return expr_val
if self.op == UnaryOp.Neg:
return -expr_val
raise ArgumentError('invalid unary operator')
def pprint(self):
expr_pp = self.expr.pprint()
if self.op == UnaryOp.Pos:
return f'+{expr_pp}'
if self.op == UnaryOp.Neg:
return f'-{expr_pp}'
raise ArgumentError('invalid unary operator')
class FunCall(namedtuple('FunCall', ['fn', 'args'])):
__slots__ = ()
def eval(self, var_dict):
if self.fn == 'If':
# if has special rules - it only evaluates one or the
# other of its arguments!
cond, do_if, do_else = self.args
if cond.eval(var_dict):
return do_if.eval(var_dict)
return do_else.eval(var_dict)
if self.fn == 'Abs':
val, = self.args
return abs(val.eval(var_dict))
raise ArgumentError(f'function {self.fn} not yet implemented!')
def pprint(self):
args = ', '.join(a.pprint() for a in self.args)
return f'{self.fn}({args})'
class ASTBuilder(lark.Transformer):
def var(self, args):
return Var(name=args[0].value)
def const(self, args):
return Constant(float(args[0]))
def sign_op(self, args):
if args[0] == '-':
return UnaryOp.Neg
elif args[0] == '+':
return UnaryOp.Pos
else:
raise ArgumentError('invalid unary operator')
def addsub_op(self, args):
if args[0] == '+':
return BinaryOp.Add
elif args[0] == '-':
return BinaryOp.Sub
else:
raise ArgumentError('invalid add/sub operator')
def muldiv_op(self, args):
if args[0] == 'x':
return BinaryOp.Mul
elif args[0] == '/':
return BinaryOp.Div
else:
raise ArgumentError('invalid mul/div operator')
def comp_op(self, args):
if args[0] == '<':
return BinaryOp.Lt
elif args[0] == '>':
return BinaryOp.Gt
elif args[0] == '<=':
return BinaryOp.LtEq
elif args[0] == '>=':
return BinaryOp.GtEq
elif args[0] == '=':
return BinaryOp.Eq
else:
raise ArgumentError('invalid comparison operator')
def comp_expr(self, args):
lhs, op, rhs = args
# either just an expression
if op is None and rhs is None:
return lhs
# or a comparison between two expressions
return BinaryOpApply(op=op, lhs=lhs, rhs=rhs)
def addsub_expr(self, args):
# either just an expression
if len(args) == 1:
return args[0]
# or a sequence of left-associative operations
# (this looks a little crazy, but it's going to turn
# x + y + z into (x + y) + z and so on)
ex, *args = args
while len(args) > 0:
op, next_ex, *args = args
ex = BinaryOpApply(op, lhs=ex, rhs=next_ex)
return ex
def muldiv_expr(self, args):
# same logic as addsub_expr
if len(args) == 1:
return args[0]
ex, *args = args
while len(args) > 0:
op, next_ex, *args = args
ex = BinaryOpApply(op=op, lhs=ex, rhs=next_ex)
return ex
def funcall(self, args):
fn, fnargs = args
# we have to account for the way Lark handles [args], and ensure
# that we always end up with a list
if fnargs is None:
fnargs = []
elif not isinstance(fnargs, list):
fnargs = [fnargs]
# fn will be a Var, having been already transformed;
# however, functions can only have certain hard-coded names,
# so we just want the string
return FunCall(fn=fn.name, args=fnargs)
def args(self, args):
# function args should just be in-lined into the FunCall tuple
return args
def main(argv):
with open('formula.lark') as f:
p = lark.Lark(f, parser='lalr', transformer=ASTBuilder())
for l in sys.stdin:
tree = p.parse(l)
print(tree)
print(tree.eval({}))
print(tree.pprint())
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {
"sphinx.png": {
"image/png": ""
}
},
"cell_type": "markdown",
"id": "d0329b4e",
"metadata": {},
"source": [
"# Parsing & evaluating PHDwin formulas\n",
"\n",
"PHDwin implements a small formula language (similar to Excel, Spotfire, or a bunch of other tools) which can be used to perform custom calculations for economic arrays and product streams.\n",
"\n",
"While the PHDwin data format is tabular, this embedded \"little language\" doesn't lend itself to table-based (e.g. Pandas) handling - it's much more like a programming language, and we can make use of tools and ideas (lexers, parsers, interpreters, and compilers) commonly used in the implementation of programming languages to handle this formula language and other embedded \"little languages\" (such as ARIES economic or overlay lines, perhaps).\n",
"\n",
"There are many approaches to parsing, from hand-written [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) to functional [parser combinators](https://usethe.computer/posts/18-irregular-expressions.html). For applications like this, though, we can save a lot of time by using a parser generator - a tool which can read a simple representation of a language's \"grammar\" and automatically produce or execute code to parse strings matching that language.\n",
"\n",
"We'll use a parser generator for Python, called [Lark](https://github.com/lark-parser/lark), to build a parser for (a subset of) the PHDwin formula language. Then, we'll implement a simple interpreter which can access per-well variable values and evaluate these formulas to produce new values.\n",
"\n",
"Lark requires us to write down the grammar of our language - the rules governing what \"atomic\" parts it consists of, and how they can be assembled into larger structures - in [Extended Backus-Naur form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form). Don't worry, it sounds more complicated than it is. Parts of the syntax should be familiar from regular expressions; the rest is mostly concerned with naming rules which build pieces of the grammar from other pieces.\n",
"\n",
"Lark parsers will read strings of text and either fail with a useful error message (if the string doesn't match the grammar) or produce an _abstract syntax tree_: a structured representation of the data, conforming to the rules of the grammar. If you remember diagramming sentences in school, it's the exact same idea:\n",
"![sphinx.png](attachment:sphinx.png)"
]
},
{
"attachments": {
"ratio.png": {
"image/png": ""
}
},
"cell_type": "markdown",
"id": "c60bb6d1",
"metadata": {},
"source": [
"Here's how an ARIES line might break down into a similar syntax tree (sorry, ARIES happens to be the example I had sitting around - but we'll be parsing PHDwin formulas shortly):\n",
"![ratio.png](attachment:ratio.png)\n",
"\n",
"In Python, each node of this tree might correspond to an object - e.g. (with appropriate enum and named tuple types defined):\n",
"```\n",
"RatioLine(\n",
" keyword=Ratio(Phase.Gas, Phase.Oil),\n",
" initial_ratio=1.283,\n",
" final_ratio=1.706,\n",
" units=RatioUnits(Unit.Mcf, Unit.Bbl),\n",
" second_point=RatioSecondPoint(\n",
" duration=3,\n",
" units=Unit.Month\n",
" ),\n",
" yaxis=YAxis.Log,\n",
" interpolate=Interpolate.Time\n",
")\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "fe13af5a",
"metadata": {},
"source": [
"## Lark example: address book\n",
"\n",
"Let's look at a simple grammar to get a feel for how Lark operates as well as the EBNF syntax. We'll parse a phone book whose lines look like:\n",
"\n",
"```\n",
"John:555-123-4567\n",
"Sally : 555 321 7654\n",
"```\n",
"\n",
"Each entry consists of a single name, a colon, and then a U.S. phone number with an area code. Components of the phone number can be separated by either spaces or hyphens.\n",
"\n",
"We can begin our grammar by importing some helpful built-ins from Lark, for whitespace, newlines, \"words\" (one or more letters), and digits. We're also going to tell Lark that we want to ignore whitespace in our entries.\n",
"```\n",
"%import common.WS_INLINE\n",
"%import common.NEWLINE\n",
"%import common.WORD\n",
"%import common.DIGIT\n",
"\n",
"%ignore WS_INLINE\n",
"```\n",
"\n",
"Every grammar needs a \"start\" rule, indicating the top-level structure of the thing we want to parse (in a programming language, this would typically be the rule corresponding to an entire source file). We'll parse a phone book of at least one entry, with entries separated by newlines. (The `*` zero-or-more operator is the same one familiar from regular expressions.) The newline after the final entry is optional.\n",
"```\n",
"start: phonebook\n",
"\n",
"?phonebook: entry (NEWLINE entry)* NEWLINE?\n",
"```\n",
"\n",
"A rule reads left-to-right: `thing: recipe` means \"when you see `recipe`, assemble a `thing` out of it\".\n",
"Lark uses prefixes on rules (like `?` and `!`) to control aspects of the generated syntax tree, which we won't go into here.\n",
"For details of the rule format, the [Lark cheatsheet](https://lark-parser.readthedocs.io/en/latest/_static/lark_cheatsheet.pdf) and [documentation](https://lark-parser.readthedocs.io/en/latest/) are helpful.\n",
"\n",
"We now need to write rules for how a phonebook entry is constructed from parts:\n",
"```\n",
"?entry: name \":\" area_code \"-\"? number_part1 \"-\"? number_part2\n",
"?name: WORD\n",
"?area_code: DIGIT~3\n",
"?number_part1: DIGIT~3\n",
"?number_part2: DIGIT~4\n",
"```\n",
"\n",
"That completes our grammar! For this small language, we'll load the grammar into Lark directly as a string, and apply the resulting parser to some example data."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5a3411aa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'phonebook'), [Tree(Token('RULE', 'entry'), [Token('WORD', 'John'), Tree(Token('RULE', 'area_code'), [Token('DIGIT', '5'), Token('DIGIT', '5'), Token('DIGIT', '5')]), Tree(Token('RULE', 'number_part1'), [Token('DIGIT', '1'), Token('DIGIT', '2'), Token('DIGIT', '3')]), Tree(Token('RULE', 'number_part2'), [Token('DIGIT', '4'), Token('DIGIT', '2'), Token('DIGIT', '3'), Token('DIGIT', '4')])]), Token('NEWLINE', '\\n'), Tree(Token('RULE', 'entry'), [Token('WORD', 'Sally'), Tree(Token('RULE', 'area_code'), [Token('DIGIT', '5'), Token('DIGIT', '5'), Token('DIGIT', '5')]), Tree(Token('RULE', 'number_part1'), [Token('DIGIT', '1'), Token('DIGIT', '7'), Token('DIGIT', '7')]), Tree(Token('RULE', 'number_part2'), [Token('DIGIT', '2'), Token('DIGIT', '8'), Token('DIGIT', '3'), Token('DIGIT', '8')])]), Token('NEWLINE', '\\n'), Tree(Token('RULE', 'entry'), [Token('WORD', 'Bob'), Tree(Token('RULE', 'area_code'), [Token('DIGIT', '5'), Token('DIGIT', '5'), Token('DIGIT', '5')]), Tree(Token('RULE', 'number_part1'), [Token('DIGIT', '1'), Token('DIGIT', '1'), Token('DIGIT', '1')]), Tree(Token('RULE', 'number_part2'), [Token('DIGIT', '2'), Token('DIGIT', '2'), Token('DIGIT', '2'), Token('DIGIT', '2')])]), Token('NEWLINE', '\\n')])])\n"
]
}
],
"source": [
"import lark\n",
"\n",
"phonebook_parser = lark.Lark('''\n",
"%import common.WS_INLINE\n",
"%import common.NEWLINE\n",
"%import common.WORD\n",
"%import common.DIGIT\n",
"\n",
"%ignore WS_INLINE\n",
"\n",
"start: phonebook\n",
"\n",
"?phonebook: entry (NEWLINE entry)* NEWLINE?\n",
"\n",
"?entry: name \":\" area_code \"-\"? number_part1 \"-\"? number_part2\n",
"?name: WORD\n",
"?area_code: DIGIT~3\n",
"?number_part1: DIGIT~3\n",
"?number_part2: DIGIT~4\n",
"''')\n",
"\n",
"phonebook = '''John: 555-123-4234\n",
"Sally : 555 177 2838\n",
"Bob : 555 111-2222\n",
"'''\n",
"\n",
"phonebook_ast = phonebook_parser.parse(phonebook)\n",
"print(phonebook_ast)"
]
},
{
"cell_type": "markdown",
"id": "9a740c3f",
"metadata": {},
"source": [
"That's a little hard to read! Lark has successfully parsed our phonebook according to the grammar and produced a structured abstract syntax tree. Let's have Lark pretty-print it for us, using indentation to show the nested structure."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "04265578",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start\n",
" phonebook\n",
" entry\n",
" John\n",
" area_code\n",
" 5\n",
" 5\n",
" 5\n",
" number_part1\n",
" 1\n",
" 2\n",
" 3\n",
" number_part2\n",
" 4\n",
" 2\n",
" 3\n",
" 4\n",
" \n",
"\n",
" entry\n",
" Sally\n",
" area_code\n",
" 5\n",
" 5\n",
" 5\n",
" number_part1\n",
" 1\n",
" 7\n",
" 7\n",
" number_part2\n",
" 2\n",
" 8\n",
" 3\n",
" 8\n",
" \n",
"\n",
" entry\n",
" Bob\n",
" area_code\n",
" 5\n",
" 5\n",
" 5\n",
" number_part1\n",
" 1\n",
" 1\n",
" 1\n",
" number_part2\n",
" 2\n",
" 2\n",
" 2\n",
" 2\n",
" \n",
"\n",
"\n"
]
}
],
"source": [
"print(phonebook_ast.pretty())"
]
},
{
"cell_type": "markdown",
"id": "16a3c73b",
"metadata": {},
"source": [
"## Parsing PHDwin formulas\n",
"\n",
"The grammar for PHDwin formulas is, obviously, a little more complex than our phonebook.\n",
"From sifting through an example model file's \"OTPF\" table, I found formulas which look like:\n",
"```\n",
"{Oil} / (AddMonthsToDate({CurrentDate},1) - {CurrentDate}) / 1000\n",
"```\n",
"Or:\n",
"```\n",
"If({MB} < {CumThreshold1},{CumTranche1},If({MB} < {CumThreshold2},{CumTranche2},{CumTranche3}))\n",
"```\n",
"Or:\n",
"```\n",
"If({StartHist} <= 0,If({MajSeg1} <= 0,{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3}))),If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3})))\n",
"```\n",
"\n",
"What I've inferred (and I may be wrong, but that's easy to fix) is that these formulas support: \n",
"- function calls `F(x, y, z)`\n",
"- reference to variables, which sometimes (?) need \"quotes\" `{Some Variable Name}`\n",
"- two-argument math operators `+`, `-`, `*`, `/` following PEMDAS order-of-operations\n",
"- one-argument negation or (no-op) \"postiviation\" like `-3` or `+5`\n",
"- boolean comparison operators `<`, `>`, `<=`, `>=`, `=`\n",
"\n",
"We'll write a grammar for these formulas. It's large enough we'll write it into a separate `formula.lark` file, shown below:"
]
},
{
"cell_type": "markdown",
"id": "d93ba9df",
"metadata": {},
"source": [
"```\n",
"%import common.SIGNED_NUMBER\n",
"%import common.CNAME\n",
"%import common.WS\n",
"\n",
"%ignore WS\n",
"\n",
"?start: expr\n",
"\n",
"?expr: comp_expr\n",
"?comp_expr: addsub_expr (comp_op addsub_expr)*\n",
"?addsub_expr: muldiv_expr (addsub_op muldiv_expr)*\n",
"?muldiv_expr: factor (muldiv_op factor)*\n",
"?factor: sign_op factor | atom_expr\n",
"\n",
"// TODO: not equal?\n",
"!comp_op: \"<\" | \">\" | \"=\" | \"<=\" | \">=\"\n",
"!addsub_op: \"+\" | \"-\"\n",
"!muldiv_op: \"x\" | \"/\"\n",
"!sign_op: \"+\" | \"-\"\n",
"\n",
"?atom_expr: identifier \"(\" [args] \")\" -> funcall\n",
" | atom\n",
"\n",
"// TODO: I see some function calls(?) inside { } - what are these?\n",
"?atom: \"(\" expr \")\"\n",
" | identifier\n",
" | number\n",
"\n",
"?args: expr (\",\" expr)*\n",
"\n",
"// TODO: we need to know the escaping rules here...\n",
"?identifier: (CNAME | \"{\" /[^}]+/ \"}\") -> var\n",
"\n",
"?number: SIGNED_NUMBER -> const\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "395259e1",
"metadata": {},
"source": [
"The trickiest part here is handling the combination of \"left recursion\" (e.g. an expression can be **\"some expression\"** `+` \"some expression\") and operator precedence (i.e. PEMDAS rules); the approach here (using nested expression rules like `comp_expr`, `addsub_expr`...) is fairly standard and well-supported by Lark.\n",
"\n",
"We can now use our parser to read formulas!"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "078ed143",
"metadata": {},
"outputs": [],
"source": [
"import lark\n",
"\n",
"with open('formula.lark') as f:\n",
" formula_parser = lark.Lark(f)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fdf9ffe4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"comp_expr\n",
" muldiv_expr\n",
" var\tOil\n",
" muldiv_op\t/\n",
" comp_expr\n",
" addsub_expr\n",
" funcall\n",
" var\tAddMonthsToDate\n",
" args\n",
" comp_expr\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" comp_expr\n",
" const\t1\n",
" None\n",
" None\n",
" addsub_op\t-\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" muldiv_op\t/\n",
" const\t1000\n",
" None\n",
" None\n",
"\n"
]
}
],
"source": [
"example = '{Oil} / (AddMonthsToDate({CurrentDate},1) - {CurrentDate}) / 1000'\n",
"tree = formula_parser.parse(example)\n",
"print(tree.pretty())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b64a584a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" var\tMB\n",
" comp_op\t<\n",
" var\tCumThreshold1\n",
" comp_expr\n",
" var\tCumTranche1\n",
" None\n",
" None\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" var\tMB\n",
" comp_op\t<\n",
" var\tCumThreshold2\n",
" comp_expr\n",
" var\tCumTranche2\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tCumTranche3\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
"\n"
]
}
],
"source": [
"example = 'If({MB} < {CumThreshold1},{CumTranche1},If({MB} < {CumThreshold2},{CumTranche2},{CumTranche3}))'\n",
"tree = formula_parser.parse(example)\n",
"print(tree.pretty())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "57fa43b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" var\tStartHist\n",
" comp_op\t<=\n",
" const\t0\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" var\tMajSeg1\n",
" comp_op\t<=\n",
" const\t0\n",
" comp_expr\n",
" var\tDateTranche1\n",
" None\n",
" None\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" funcall\n",
" var\tMonthsBetweenDates\n",
" args\n",
" comp_expr\n",
" var\tMajSeg1\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" comp_op\t<=\n",
" var\tMonthThreshold1\n",
" comp_expr\n",
" var\tDateTranche1\n",
" None\n",
" None\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" funcall\n",
" var\tMonthsBetweenDates\n",
" args\n",
" comp_expr\n",
" var\tMajSeg1\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" comp_op\t<=\n",
" var\tMonthThreshold2\n",
" comp_expr\n",
" var\tDateTranche2\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tDateTranche3\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" funcall\n",
" var\tMonthsBetweenDates\n",
" args\n",
" comp_expr\n",
" var\tStartHist\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" comp_op\t<=\n",
" var\tMonthThreshold1\n",
" comp_expr\n",
" var\tDateTranche1\n",
" None\n",
" None\n",
" comp_expr\n",
" funcall\n",
" var\tIf\n",
" args\n",
" comp_expr\n",
" funcall\n",
" var\tMonthsBetweenDates\n",
" args\n",
" comp_expr\n",
" var\tStartHist\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tCurrentDate\n",
" None\n",
" None\n",
" comp_op\t<=\n",
" var\tMonthThreshold2\n",
" comp_expr\n",
" var\tDateTranche2\n",
" None\n",
" None\n",
" comp_expr\n",
" var\tDateTranche3\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
" None\n",
"\n"
]
}
],
"source": [
"example = 'If({StartHist} <= 0,If({MajSeg1} <= 0,{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3}))),If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3})))'\n",
"tree = formula_parser.parse(example)\n",
"print(tree.pretty())"
]
},
{
"cell_type": "markdown",
"id": "df29b5fb",
"metadata": {},
"source": [
"Let's go back to the first (simple) example and take a look at the structure of the syntax tree, so that we can begin thinking about how to evaluate these formulas."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0d85c3ba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(Token('RULE', 'comp_expr'), [Tree(Token('RULE', 'muldiv_expr'), [Tree('var', [Token('__ANON_2', 'Oil')]), Tree(Token('RULE', 'muldiv_op'), [Token('SLASH', '/')]), Tree(Token('RULE', 'comp_expr'), [Tree(Token('RULE', 'addsub_expr'), [Tree('funcall', [Tree('var', [Token('CNAME', 'AddMonthsToDate')]), Tree(Token('RULE', 'args'), [Tree(Token('RULE', 'comp_expr'), [Tree('var', [Token('__ANON_2', 'CurrentDate')]), None, None]), Tree(Token('RULE', 'comp_expr'), [Tree('const', [Token('SIGNED_NUMBER', '1')]), None, None])])]), Tree(Token('RULE', 'addsub_op'), [Token('MINUS', '-')]), Tree('var', [Token('__ANON_2', 'CurrentDate')])]), None, None]), Tree(Token('RULE', 'muldiv_op'), [Token('SLASH', '/')]), Tree('const', [Token('SIGNED_NUMBER', '1000')])]), None, None])\n"
]
}
],
"source": [
"example = '{Oil} / (AddMonthsToDate({CurrentDate},1) - {CurrentDate}) / 1000'\n",
"tree = formula_parser.parse(example)\n",
"print(tree)"
]
},
{
"cell_type": "markdown",
"id": "1007c7a2",
"metadata": {},
"source": [
"The Lark AST is fine, but it'd be nicer to \"lock down\" the representation a little and make it more suitable for our particular task. There are many approaches we could take here, ranging from making use of the pattern matching features in newer versions of Python to a highly \"object-oriented\" approach. We'll split the interest here, defining `Enum` and `namedtuple` types to represent the nodes in our syntax tree.\n",
"\n",
"We can use the Lark \"transformers\" feature (an implementation of the [vistor pattern](https://en.wikipedia.org/wiki/Visitor_pattern)) to have Lark recursively turn its AST into our own representation.\n",
"\n",
"Let's begin by defining some types for a structured representation of PHDwin formulas. I'll make use of classes which subclass custom `namedtuple` classes. For now the only \"contents\" are `__slots__ = ()` (for performance), but later we'll add interesting methods on these to enable evaluation of formulas.\n",
"\n",
"I'll use custom `Enum` types to represent the different unary (one-argument) and binary (two-argument) operators available in formulas."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "01fce43e",
"metadata": {},
"outputs": [],
"source": [
"from enum import Enum, auto\n",
"from collections import namedtuple\n",
"\n",
"class UnaryOp(Enum):\n",
" Pos = auto()\n",
" Neg = auto()\n",
"\n",
"class BinaryOp(Enum):\n",
" Add = auto()\n",
" Sub = auto()\n",
" Mul = auto()\n",
" Div = auto()\n",
" Lt = auto()\n",
" Gt = auto()\n",
" LtEq = auto()\n",
" GtEq = auto()\n",
" Eq = auto()\n",
"\n",
"# constant numbers\n",
"class Constant(namedtuple('Constant', ['val'])):\n",
" __slots__ = ()\n",
"\n",
"# named variables\n",
"class Var(namedtuple('Var', ['name'])):\n",
" __slots__ = ()\n",
"\n",
"# binary operator applications, like \"2 + {X}\"\n",
"# lhs, rhs = \"left-hand side\", \"right-hand side\"\n",
"class BinaryOpApply(namedtuple('BinaryOpApply', ['op', 'lhs', 'rhs'])):\n",
" __slots__ = ()\n",
"\n",
"# unary operator applications, like \"-{Capex}\"\n",
"class UnaryOpApply(namedtuple('UnaryOpApply', ['op', 'expr'])):\n",
" __slots__ = ()\n",
"\n",
"# built-in function calls, like \"AddMonthsToDate({CurrentDate}, 1)\"\n",
"# fn = function name\n",
"# args = (possibly empty) list of arguments\n",
"class FunCall(namedtuple('FunCall', ['fn', 'args'])):\n",
" __slots__ = ()"
]
},
{
"cell_type": "markdown",
"id": "aa761081",
"metadata": {},
"source": [
"We can now create a custom `Transformer` class which will rewrite Lark ASTs into our own data types, recursively from the \"leaves\" up. Since our grammar is simple enough to be handled by the less-powerful but simpler LALR parser implementation provided by Lark, we can also ask Lark to perform this transformation on-the-fly rather than apply it at the end of parsing to a complete AST.\n",
"\n",
"We'll do this to produce a \"new and improved\" formula parser."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1bb9d1c4",
"metadata": {},
"outputs": [],
"source": [
"class ASTBuilder(lark.Transformer):\n",
" def var(self, args):\n",
" return Var(name=args[0].value)\n",
"\n",
" def const(self, args):\n",
" return Constant(float(args[0]))\n",
"\n",
" def sign_op(self, args):\n",
" if args[0] == '-':\n",
" return UnaryOp.Neg\n",
" elif args[0] == '+':\n",
" return UnaryOp.Pos\n",
" else:\n",
" raise ArgumentError('invalid unary operator')\n",
"\n",
" def addsub_op(self, args):\n",
" if args[0] == '+':\n",
" return BinaryOp.Add\n",
" elif args[0] == '-':\n",
" return BinaryOp.Sub\n",
" else:\n",
" raise ArgumentError('invalid add/sub operator')\n",
"\n",
" def muldiv_op(self, args):\n",
" if args[0] == 'x':\n",
" return BinaryOp.Mul\n",
" elif args[0] == '/':\n",
" return BinaryOp.Div\n",
" else:\n",
" raise ArgumentError('invalid mul/div operator')\n",
"\n",
" def comp_op(self, args):\n",
" if args[0] == '<':\n",
" return BinaryOp.Lt\n",
" elif args[0] == '>':\n",
" return BinaryOp.Gt\n",
" elif args[0] == '<=':\n",
" return BinaryOp.LtEq\n",
" elif args[0] == '>=':\n",
" return BinaryOp.GtEq\n",
" elif args[0] == '=':\n",
" return BinaryOp.Eq\n",
" else:\n",
" raise ArgumentError('invalid comparison operator')\n",
"\n",
" def comp_expr(self, args):\n",
" lhs, op, rhs = args\n",
" # either just an expression\n",
" if op is None and rhs is None:\n",
" return lhs\n",
" # or a comparison between two expressions\n",
" return BinaryOpApply(op=op, lhs=lhs, rhs=rhs)\n",
"\n",
" def addsub_expr(self, args):\n",
" # either just an expression\n",
" if len(args) == 1:\n",
" return args[0]\n",
" # or a sequence of left-associative operations\n",
" # (this looks a little crazy, but it's going to turn\n",
" # x + y + z into (x + y) + z and so on)\n",
" ex, *args = args\n",
" while len(args) > 0:\n",
" op, next_ex, *args = args\n",
" ex = BinaryOpApply(op, lhs=ex, rhs=next_ex)\n",
" return ex\n",
"\n",
" def muldiv_expr(self, args):\n",
" # same logic as addsub_expr\n",
" if len(args) == 1:\n",
" return args[0]\n",
" ex, *args = args\n",
" while len(args) > 0:\n",
" op, next_ex, *args = args\n",
" ex = BinaryOpApply(op=op, lhs=ex, rhs=next_ex)\n",
" return ex\n",
"\n",
" def funcall(self, args):\n",
" fn, fnargs = args\n",
" # we have to account for the way Lark handles [args], and ensure \n",
" # that we always end up with a list\n",
" if fnargs is None:\n",
" fnargs = []\n",
" elif not isinstance(fnargs, list):\n",
" fnargs = [fnargs]\n",
" # fn will be a Var, having been already transformed;\n",
" # however, functions can only have certain hard-coded names,\n",
" # so we just want the string\n",
" return FunCall(fn=fn.name, args=fnargs)\n",
"\n",
" def args(self, args):\n",
" # function args should just be in-lined into the FunCall tuple\n",
" return args\n",
" \n",
"with open('formula.lark') as f:\n",
" formula_parser = lark.Lark(f, parser='lalr', transformer=ASTBuilder())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "621d51b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BinaryOpApply(op=<BinaryOp.Div: 4>, lhs=BinaryOpApply(op=<BinaryOp.Div: 4>, lhs=Var(name='Oil'), rhs=BinaryOpApply(op=<BinaryOp.Sub: 2>, lhs=FunCall(fn='AddMonthsToDate', args=[Var(name='CurrentDate'), Constant(val=1.0)]), rhs=Var(name='CurrentDate'))), rhs=Constant(val=1000.0))\n"
]
}
],
"source": [
"example = '{Oil} / (AddMonthsToDate({CurrentDate},1) - {CurrentDate}) / 1000'\n",
"tree = formula_parser.parse(example)\n",
"print(tree)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "1a10e013",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.Lt: 5>, lhs=Var(name='MB'), rhs=Var(name='CumThreshold1')), Var(name='CumTranche1'), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.Lt: 5>, lhs=Var(name='MB'), rhs=Var(name='CumThreshold2')), Var(name='CumTranche2'), Var(name='CumTranche3')])])\n"
]
}
],
"source": [
"example = 'If({MB} < {CumThreshold1},{CumTranche1},If({MB} < {CumThreshold2},{CumTranche2},{CumTranche3}))'\n",
"tree = formula_parser.parse(example)\n",
"print(tree)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ce7d306f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=Var(name='StartHist'), rhs=Constant(val=0.0)), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=Var(name='MajSeg1'), rhs=Constant(val=0.0)), Var(name='DateTranche1'), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=FunCall(fn='MonthsBetweenDates', args=[Var(name='MajSeg1'), Var(name='CurrentDate')]), rhs=Var(name='MonthThreshold1')), Var(name='DateTranche1'), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=FunCall(fn='MonthsBetweenDates', args=[Var(name='MajSeg1'), Var(name='CurrentDate')]), rhs=Var(name='MonthThreshold2')), Var(name='DateTranche2'), Var(name='DateTranche3')])])]), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=FunCall(fn='MonthsBetweenDates', args=[Var(name='StartHist'), Var(name='CurrentDate')]), rhs=Var(name='MonthThreshold1')), Var(name='DateTranche1'), FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.LtEq: 7>, lhs=FunCall(fn='MonthsBetweenDates', args=[Var(name='StartHist'), Var(name='CurrentDate')]), rhs=Var(name='MonthThreshold2')), Var(name='DateTranche2'), Var(name='DateTranche3')])])])\n"
]
}
],
"source": [
"example = 'If({StartHist} <= 0,If({MajSeg1} <= 0,{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({MajSeg1},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3}))),If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold1},{DateTranche1},If(MonthsBetweenDates({StartHist},{CurrentDate}) <= {MonthThreshold2},{DateTranche2},{DateTranche3})))'\n",
"tree = formula_parser.parse(example)\n",
"print(tree)"
]
},
{
"cell_type": "markdown",
"id": "82c84ea3",
"metadata": {},
"source": [
"While we no longer have the Lark pretty-printer, it's clear that we've now got a much better structured representation of the formulas! We can now go back and add logic to the named tuple classes to evaluate the formulas or pretty-print them, recursively. Our \"straw-man\" implementation of an evaluator will pass around a dictionary of variable values, representing a \"well record\" or what-have-you as appropriate to the actual semantics.\n",
"\n",
"We'll re-define our named tuple classes to include these new methods."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f49f69fa",
"metadata": {},
"outputs": [],
"source": [
"class Constant(namedtuple('Constant', ['val'])):\n",
" __slots__ = ()\n",
"\n",
" def eval(self, var_dict):\n",
" return self.val\n",
"\n",
" def pprint(self):\n",
" return f'{self.val}'\n",
"\n",
"class Var(namedtuple('Var', ['name'])):\n",
" __slots__ = ()\n",
"\n",
" def eval(self, var_dict):\n",
" return var_dict[self.name]\n",
"\n",
" def pprint(self):\n",
" return f'{{{self.name}}}'\n",
"\n",
"class BinaryOpApply(namedtuple('BinaryOpApply', ['op', 'lhs', 'rhs'])):\n",
" __slots__ = ()\n",
"\n",
" def eval(self, var_dict):\n",
" lhs_val = self.lhs.eval(var_dict)\n",
" rhs_val = self.rhs.eval(var_dict)\n",
" if self.op == BinaryOp.Add:\n",
" return lhs_val + rhs_val\n",
" if self.op == BinaryOp.Sub:\n",
" return lhs_val - rhs_val\n",
" if self.op == BinaryOp.Mul:\n",
" return lhs_val * rhs_val\n",
" if self.op == BinaryOp.Div:\n",
" return lhs_val / rhs_val\n",
" if self.op == BinaryOp.Lt:\n",
" return lhs_val < rhs_val\n",
" if self.op == BinaryOp.Gt:\n",
" return lhs_val > rhs_val\n",
" if self.op == BinaryOp.LtEq:\n",
" return lhs_val <= rhs_val\n",
" if self.op == BinaryOp.GtEq:\n",
" return lhs_val >= rhs_val\n",
" if self.op == BinaryOp.Eq:\n",
" return lhs_val == rhs_val\n",
" raise ArgumentError('invalid binary operator')\n",
"\n",
" def pprint(self):\n",
" lhs_pp = self.lhs.pprint()\n",
" rhs_pp = self.rhs.pprint()\n",
" if self.op == BinaryOp.Add:\n",
" return f'({lhs_pp} + {rhs_pp})'\n",
" if self.op == BinaryOp.Sub:\n",
" return f'({lhs_pp} - {rhs_pp})'\n",
" if self.op == BinaryOp.Mul:\n",
" return f'({lhs_pp} x {rhs_pp})'\n",
" if self.op == BinaryOp.Div:\n",
" return f'({lhs_pp} / {rhs_pp})'\n",
" if self.op == BinaryOp.Lt:\n",
" return f'({lhs_pp} < {rhs_pp})'\n",
" if self.op == BinaryOp.Gt:\n",
" return f'({lhs_pp} > {rhs_pp})'\n",
" if self.op == BinaryOp.LtEq:\n",
" return f'({lhs_pp} <= {rhs_pp})'\n",
" if self.op == BinaryOp.GtEq:\n",
" return f'({lhs_pp} >= {rhs_pp})'\n",
" if self.op == BinaryOp.Eq:\n",
" return f'({lhs_pp} = {rhs_pp})'\n",
" raise ArgumentError('invalid binary operator')\n",
"\n",
"class UnaryOpApply(namedtuple('UnaryOpApply', ['op', 'expr'])):\n",
" __slots__ = ()\n",
"\n",
" def eval(self, var_dict):\n",
" expr_val = self.expr.eval(var_dict)\n",
" if self.op == UnaryOp.Pos:\n",
" return expr_val\n",
" if self.op == UnaryOp.Neg:\n",
" return -expr_val\n",
" raise ArgumentError('invalid unary operator')\n",
"\n",
" def pprint(self):\n",
" expr_pp = self.expr.pprint()\n",
" if self.op == UnaryOp.Pos:\n",
" return f'+{expr_pp}'\n",
" if self.op == UnaryOp.Neg:\n",
" return f'-{expr_pp}'\n",
" raise ArgumentError('invalid unary operator')\n",
"\n",
"class FunCall(namedtuple('FunCall', ['fn', 'args'])):\n",
" __slots__ = ()\n",
"\n",
" def eval(self, var_dict):\n",
" if self.fn == 'If':\n",
" # if has special rules - it only evaluates one or the\n",
" # other of its arguments!\n",
" cond, do_if, do_else = self.args\n",
" if cond.eval(var_dict):\n",
" return do_if.eval(var_dict)\n",
" return do_else.eval(var_dict)\n",
" if self.fn == 'Abs':\n",
" val, = self.args\n",
" return abs(val.eval(var_dict))\n",
" raise ArgumentError(f'function {self.fn} not yet implemented!')\n",
"\n",
" def pprint(self):\n",
" args = ', '.join(a.pprint() for a in self.args)\n",
" return f'{self.fn}({args})'"
]
},
{
"cell_type": "markdown",
"id": "d3d5e884",
"metadata": {},
"source": [
"That means we'll also need to re-create our transformer class and parser, to capture these updated definitions, but we won't make any changes to their source code:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "fcc6ceb5",
"metadata": {},
"outputs": [],
"source": [
"class ASTBuilder(lark.Transformer):\n",
" def var(self, args):\n",
" return Var(name=args[0].value)\n",
"\n",
" def const(self, args):\n",
" return Constant(float(args[0]))\n",
"\n",
" def sign_op(self, args):\n",
" if args[0] == '-':\n",
" return UnaryOp.Neg\n",
" elif args[0] == '+':\n",
" return UnaryOp.Pos\n",
" else:\n",
" raise ArgumentError('invalid unary operator')\n",
"\n",
" def addsub_op(self, args):\n",
" if args[0] == '+':\n",
" return BinaryOp.Add\n",
" elif args[0] == '-':\n",
" return BinaryOp.Sub\n",
" else:\n",
" raise ArgumentError('invalid add/sub operator')\n",
"\n",
" def muldiv_op(self, args):\n",
" if args[0] == 'x':\n",
" return BinaryOp.Mul\n",
" elif args[0] == '/':\n",
" return BinaryOp.Div\n",
" else:\n",
" raise ArgumentError('invalid mul/div operator')\n",
"\n",
" def comp_op(self, args):\n",
" if args[0] == '<':\n",
" return BinaryOp.Lt\n",
" elif args[0] == '>':\n",
" return BinaryOp.Gt\n",
" elif args[0] == '<=':\n",
" return BinaryOp.LtEq\n",
" elif args[0] == '>=':\n",
" return BinaryOp.GtEq\n",
" elif args[0] == '=':\n",
" return BinaryOp.Eq\n",
" else:\n",
" raise ArgumentError('invalid comparison operator')\n",
"\n",
" def comp_expr(self, args):\n",
" lhs, op, rhs = args\n",
" # either just an expression\n",
" if op is None and rhs is None:\n",
" return lhs\n",
" # or a comparison between two expressions\n",
" return BinaryOpApply(op=op, lhs=lhs, rhs=rhs)\n",
"\n",
" def addsub_expr(self, args):\n",
" # either just an expression\n",
" if len(args) == 1:\n",
" return args[0]\n",
" # or a sequence of left-associative operations\n",
" # (this looks a little crazy, but it's going to turn\n",
" # x + y + z into (x + y) + z and so on)\n",
" ex, *args = args\n",
" while len(args) > 0:\n",
" op, next_ex, *args = args\n",
" ex = BinaryOpApply(op, lhs=ex, rhs=next_ex)\n",
" return ex\n",
"\n",
" def muldiv_expr(self, args):\n",
" # same logic as addsub_expr\n",
" if len(args) == 1:\n",
" return args[0]\n",
" ex, *args = args\n",
" while len(args) > 0:\n",
" op, next_ex, *args = args\n",
" ex = BinaryOpApply(op=op, lhs=ex, rhs=next_ex)\n",
" return ex\n",
"\n",
" def funcall(self, args):\n",
" fn, fnargs = args\n",
" # we have to account for the way Lark handles [args], and ensure \n",
" # that we always end up with a list\n",
" if fnargs is None:\n",
" fnargs = []\n",
" elif not isinstance(fnargs, list):\n",
" fnargs = [fnargs]\n",
" # fn will be a Var, having been already transformed;\n",
" # however, functions can only have certain hard-coded names,\n",
" # so we just want the string\n",
" return FunCall(fn=fn.name, args=fnargs)\n",
"\n",
" def args(self, args):\n",
" # function args should just be in-lined into the FunCall tuple\n",
" return args\n",
" \n",
"with open('formula.lark') as f:\n",
" formula_parser = lark.Lark(f, parser='lalr', transformer=ASTBuilder())"
]
},
{
"cell_type": "markdown",
"id": "5658aa59",
"metadata": {},
"source": [
"We can now pretty-print and evaluate (with access to variable values from a dictionary) our formulas! Our pretty-printer adds redundant parentheses to clarify order of operations."
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "92f6fd57",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BinaryOpApply(op=<BinaryOp.Add: 1>, lhs=BinaryOpApply(op=<BinaryOp.Sub: 2>, lhs=BinaryOpApply(op=<BinaryOp.Add: 1>, lhs=Constant(val=1.0), rhs=BinaryOpApply(op=<BinaryOp.Mul: 3>, lhs=Constant(val=2.0), rhs=Constant(val=3.0))), rhs=BinaryOpApply(op=<BinaryOp.Div: 4>, lhs=Constant(val=7.0), rhs=Constant(val=5.0))), rhs=FunCall(fn='Abs', args=[BinaryOpApply(op=<BinaryOp.Sub: 2>, lhs=Constant(val=777.0), rhs=Constant(val=999.0))]))\n",
"\n",
"(((1.0 + (2.0 x 3.0)) - (7.0 / 5.0)) + Abs((777.0 - 999.0)))\n",
"\n",
"227.6\n"
]
}
],
"source": [
"some_math = formula_parser.parse('1 + 2 x 3 - 7 / 5 + Abs(777 - 999)')\n",
"print(some_math)\n",
"print()\n",
"print(some_math.pprint())\n",
"print()\n",
"print(some_math.eval({})) # empty variable dictionary"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "0e5557bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FunCall(fn='If', args=[BinaryOpApply(op=<BinaryOp.Gt: 6>, lhs=Var(name='X'), rhs=Constant(val=3.0)), BinaryOpApply(op=<BinaryOp.Mul: 3>, lhs=Var(name='X'), rhs=Constant(val=7.0)), BinaryOpApply(op=<BinaryOp.Sub: 2>, lhs=Var(name='X'), rhs=Constant(val=9.0))])\n",
"\n",
"If(({X} > 3.0), ({X} x 7.0), ({X} - 9.0))\n",
"\n",
"35.0\n",
"\n",
"-7.0\n"
]
}
],
"source": [
"use_a_variable = formula_parser.parse('If({X} > 3, {X} x 7, {X} - 9)')\n",
"print(use_a_variable)\n",
"print()\n",
"print(use_a_variable.pprint())\n",
"print()\n",
"print(use_a_variable.eval({'X': 5})) # provide a value for 'X'\n",
"print()\n",
"print(use_a_variable.eval({'X': 2})) # provide a value for 'X'"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "94819860",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BinaryOpApply(op=<BinaryOp.Add: 1>, lhs=BinaryOpApply(op=<BinaryOp.Add: 1>, lhs=Var(name='Royalty'), rhs=Var(name='GovProfOil')), rhs=Var(name='IncomeTax'))\n",
"\n",
"(({Royalty} + {GovProfOil}) + {IncomeTax})\n",
"\n",
"2340000.0\n"
]
}
],
"source": [
"multiple_vars = formula_parser.parse('{Royalty} + {GovProfOil} + {IncomeTax}')\n",
"print(multiple_vars)\n",
"print()\n",
"print(multiple_vars.pprint())\n",
"print()\n",
"print(multiple_vars.eval({\n",
" 'Royalty': 1.2e6,\n",
" 'GovProfOil': 2.3e5,\n",
" 'IncomeTax': 9.1e5\n",
"}))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f6eea00",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment