Skip to content

Instantly share code, notes, and snippets.

@RezSat
Created April 6, 2024 21:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RezSat/bc135ba632120600215ef897ebe9f43c to your computer and use it in GitHub Desktop.
Save RezSat/bc135ba632120600215ef897ebe9f43c to your computer and use it in GitHub Desktop.
Python implementation of the mathjs expression parser
from abc import ABC, abstractmethod
from fractions import Fraction
from decimal import Decimal
"""
THIS IS A DIRECT IMPLEMENTATION OF THE MATHJS PARSER.
NOTE: THERE ARE SO MANY NOTABLE BUGS IN HERE.
IMPLEMENT PURELY FOR THE PURPOSE OF UNDERSTANDING THE MATHJS PARSER.
PLEASE TAKE A VISIT TO MATHJS FOR MORE INFORMATION: https://github.com/josdejong/mathjs
"""
# Define valid input types
valid_input_types = {
'string': True,
'number': True,
'Decimal': True,
'Fraction': True
}
DEFAULT_CONFIG = {
# minimum relative difference between two compared values,
# used by all comparison functions
'epsilon': 1e-12,
# type of default matrix output. Choose 'matrix' (default) or 'array'
'matrix': 'Matrix',
# type of default number output. Choose 'number' (default), 'BigNumber', or 'Fraction'
'number': 'number',
# number of significant digits in BigNumbers
'precision': 64,
# predictable output type of functions. When true, output type depends only
# on the input types. When false (default), output type can vary depending
# on input values. For example `math.sqrt(-4)` returns `complex('2i')` when
# predictable is false, and returns `NaN` when true.
'predictable': False,
# random seed for seeded pseudo random number generation
# None = randomly seed
'randomSeed': None
}
MATRIX_OPTIONS = ['Matrix', 'Array'] # valid values for option matrix
NUMBER_OPTIONS = ['number', 'BigNumber', 'Fraction'] # valid values for option number
def config(options=None):
if options:
raise ValueError('The global config is readonly. Please create a math module instance if you want to change the default configuration.')
return DEFAULT_CONFIG.copy()
config.MATRIX_OPTIONS = MATRIX_OPTIONS
config.NUMBER_OPTIONS = NUMBER_OPTIONS
config.number = DEFAULT_CONFIG['number']
# Define functions for converting to each output type
def number(x):
return float(x)
def bignumber(x):
return Decimal(x)
def fraction(x):
return Fraction(x)
def no_bignumber(x):
raise NotImplementedError("BigNumber conversion not supported")
def no_fraction(x):
raise NotImplementedError("Fraction conversion not supported")
valid_output_types = {
'number': number,
'Decimal': bignumber,
'Fraction': fraction
}
# Define numeric conversion function
def numeric(value, output_type='number', check=None):
if check is not None:
raise SyntaxError('numeric() takes one or two arguments')
input_type = type(value).__name__
if input_type not in valid_input_types:
pass#raise TypeError('Cannot convert {} of type "{}"; valid input types are {}'.format(value, input_type, ', '.join(valid_input_types.keys())))
if output_type not in valid_output_types:
pass#raise TypeError('Cannot convert {} to type "{}"; valid output types are {}'.format(value, output_type, ', '.join(valid_output_types.keys())))
if output_type == input_type:
return value
else:
return valid_output_types[output_type](value)
# NODES
# class Node:
# @property
# def type(self):
# return 'Node'
# @property
# def is_node(self):
# return True
def is_accessor_node(x):
return getattr(x, 'isAccessorNode', False) and getattr(x.__class__.__bases__[0], 'isNode', False)
def is_constant_node(x):
return getattr(x, 'isConstantNode', False) and getattr(x.__class__.__bases__[0], 'isNode', False)
def is_function_node(x):
return getattr(x, 'isFunctionNode', False) and getattr(x.__class__.__bases__[0], 'isNode', False)
def is_operator_node(x):
return getattr(x, 'isOperatorNode', False) and getattr(x.__class__.__bases__[0], 'isNode', False)
def is_symbol_node(x):
return getattr(x, 'isSymbolNode', False) and getattr(x.__class__.__bases__[0], 'isNode', False)
def rule2_node(node):
return is_constant_node(node) or (
is_operator_node(node) and
len(node.args) == 1 and
is_constant_node(node.args[0]) and
node.op in '-+~'
)
def parse_string(expression):
return parse_start(expression, {})
def parse_array_or_matrix(expressions):
return parse_multiple(expressions, {})
def parse_string_with_object(expression, options):
extra_nodes = options.get('nodes', {})
return parse_start(expression, extra_nodes)
def parse_multiple_with_object(expressions, options):
return parse_multiple(expressions, options)
parse = {
str: parse_string,
list: parse_array_or_matrix,
tuple: parse_array_or_matrix,
(str, dict): parse_string_with_object,
(list, dict): parse_multiple_with_object
}
def parse_multiple(expressions, options={}):
extra_nodes = options.get('nodes', {})
# Define a recursive function to handle nested structures
def recursive_parse(elem):
if not isinstance(elem, str):
raise TypeError('String expected')
return parse_start(elem, extra_nodes)
# Apply the recursive_parse function to each element in the expressions
return [recursive_parse(elem) for elem in expressions]
class Node(ABC):
@property
@abstractmethod
def type(self):
pass
@property
def is_node(self):
return True
class IndexNode(Node):
def __init__(self, dimensions, dot_notation=False):
super().__init__()
self.dimensions = dimensions
self.dot_notation = dot_notation
# Validate input
if not isinstance(dimensions, list) or not all(isinstance(dim, Node) for dim in dimensions):
raise TypeError('List containing Nodes expected for parameter "dimensions"')
if self.dot_notation and not self.is_object_property():
raise ValueError('dotNotation only applicable for object properties')
@property
def type(self):
return self.name
@property
def is_index_node(self):
return True
def __repr__(self) -> str:
return str(
{
"dimensions": self.dimensions,
"dotNotation": self.dot_notation
}
)
class AccessorNode(Node):
def __init__(self, object, index):
if not isinstance(object, Node):
raise TypeError('Node expected for parameter "object"')
if not isinstance(index, IndexNode):
raise TypeError('IndexNode expected for parameter "index"')
self.object = object
self.index = index
@property
def name(self):
if self.index:
return self.index.get_object_property() if self.index.is_object_property() else ''
else:
return self.object.name if hasattr(self.object, 'name') else ''
@property
def type(self):
return self.name
@property
def is_accessor_node(self):
return True
def __repr__(self) -> str:
return str(
{
"object": self.object,
"index": self.index
}
)
class ArrayNode(Node):
def __init__(self, items=None):
super().__init__()
self.items = items or []
# Validate input
if not isinstance(self.items, list) or not all(isinstance(item, Node) for item in self.items):
raise TypeError('List containing Nodes expected')
@property
def type(self):
return self.name
@property
def is_array_node(self):
return True
def __repr__(self) -> str:
return str(
{
"items": self.items
}
)
class AssignmentNode(Node):
"""
Define a symbol, like `a=3.2`, update a property like `a.b=3.2`, or replace a subset of a matrix like `A[2,2]=42`.
"""
def __init__(self, object, index=None, value=None):
super().__init__()
self.object = object
self.index = index
self.value = value or index
# Validate input
if not isinstance(object, (SymbolNode, AccessorNode)):
raise TypeError('SymbolNode or AccessorNode expected as "object"')
if isinstance(object, SymbolNode) and object.name == 'end':
raise ValueError('Cannot assign to symbol "end"')
if self.index and not isinstance(self.index, IndexNode):
raise TypeError('IndexNode expected as "index"')
if not isinstance(self.value, Node):
raise TypeError('Node expected as "value"')
@property
def name(self):
if self.index:
return self.index.object_property if self.index.is_object_property else ''
else:
return self.object.name or ''
@property
def type(self):
return name
@property
def is_assignment_node(self):
return True
def __repr__(self) -> str:
return str(
{
"object": self.object,
"index": self.index,
"value": self.value
}
)
class BlockNode(Node):
"""
Holds a set with blocks
"""
def __init__(self, blocks):
super().__init__()
# Validate input, copy blocks
if not isinstance(blocks, list):
raise TypeError('List expected')
self.blocks = []
for block in blocks:
node = block.get('node')
visible = block.get('visible', True)
if not isinstance(node, Node):
raise TypeError('Property "node" must be a Node')
if not isinstance(visible, bool):
raise TypeError('Property "visible" must be a boolean')
self.blocks.append({'node': node, 'visible': visible})
@property
def type(self):
return name
@property
def is_block_node(self):
return True
def __repr__(self) -> str:
return str(
{
"blocks": self.blocks
}
)
class ConditionalNode(Node):
"""
A lazy evaluating conditional operator: 'condition ? trueExpr : falseExpr'
"""
def __init__(self, condition, true_expr, false_expr):
super().__init__()
if not isinstance(condition, Node):
raise TypeError('Parameter condition must be a Node')
if not isinstance(true_expr, Node):
raise TypeError('Parameter trueExpr must be a Node')
if not isinstance(false_expr, Node):
raise TypeError('Parameter falseExpr must be a Node')
self.condition = condition
self.true_expr = true_expr
self.false_expr = false_expr
@property
def type(self):
return name
@property
def is_conditional_node(self):
return True
def __repr__(self) -> str:
return str(
{
"condition": self.condition,
"trueExpr": self.true_expr,
"falseExpr": self.false_expr
}
)
class ConstantNode(Node):
"""
A ConstantNode holds a constant value like a number or string.
"""
def __init__(self, value):
super().__init__()
self.value = value
@property
def type(self):
return name
@property
def is_constant_node(self):
return True
def is_percentage(self):
return False
def __repr__(self) -> str:
return str(
{
"value": self.value
}
)
keywords = {'end'}
class FunctionAssignmentNode(Node):
"""
Function assignment
"""
def __init__(self, name, params, expr):
super().__init__()
# Validate input
if not isinstance(name, str):
raise TypeError('String expected for parameter "name"')
if not isinstance(params, list):
raise TypeError('List expected for parameter "params"')
if not isinstance(expr, Node):
raise TypeError('Node expected for parameter "expr"')
if name in keywords:
raise ValueError('Illegal function name, "' + name + '" is a reserved keyword')
param_names = set()
self.params = []
self.types = []
for param in params:
if isinstance(param, str):
param_name = param
param_type = 'any'
elif isinstance(param, dict) and 'name' in param:
param_name = param['name']
param_type = param.get('type', 'any')
else:
raise TypeError('Invalid parameter format')
if param_name in param_names:
raise ValueError(f'Duplicate parameter name "{param_name}"')
else:
param_names.add(param_name)
self.params.append(param_name)
self.types.append(param_type)
self.name = name
self.expr = expr
@property
def type(self):
return name
@property
def is_function_assignment_node(self):
return True
def __repr__(self) -> str:
return str(
{
"name": self.name,
"params": self.params,
"expr": self.expr
}
)
class FunctionNode(Node):
"""
Invoke a list with arguments on a node
"""
def __init__(self, fn, args):
super().__init__()
if isinstance(fn, str):
fn = SymbolNode(fn)
# Validate input
if not isinstance(fn, Node):
raise TypeError('Node expected as parameter "fn"')
if not isinstance(args, list) or not all(isinstance(arg, Node) for arg in args):
raise TypeError('List of nodes expected for parameter "args"')
self.fn = fn
self.args = args
@property
def name(self):
return self.fn.name if hasattr(self.fn, 'name') else ''
@property
def type(self):
return 'FunctionNode'
@property
def is_function_node(self):
return True
def __repr__(self) -> str:
return str(
{
"fn": self.fn,
"args": self.args
}
)
class ObjectNode(Node):
"""
Holds an object with keys/values
"""
def __init__(self, properties=None):
super().__init__()
self.properties = properties or {}
# Validate input
if properties:
if not isinstance(properties, dict) or not all(isinstance(value, Node) for value in properties.values()):
raise TypeError('Dictionary containing Nodes expected')
@property
def type(self):
return 'ObjectNode'
@property
def is_object_node(self):
return True
def __repr__(self) -> str:
return str(
{
"properties": self.properties
}
)
class OperatorNode(Node):
"""
An operator with two arguments, like 2+3
"""
def __init__(self, op, fn, args, implicit=False, is_percentage=False):
super().__init__()
# Validate input
if not isinstance(op, str):
raise TypeError('String expected for parameter "op"')
if not isinstance(fn, str):
raise TypeError('String expected for parameter "fn"')
if not isinstance(args, list) or not all(isinstance(arg, Node) for arg in args):
raise TypeError('List containing Nodes expected for parameter "args"')
self.implicit = implicit
self.is_percentage = is_percentage
self.op = op
self.fn = fn
self.args = args or []
@property
def type(self):
return 'OperatorNode'
@property
def is_operator_node(self):
return True
def __repr__(self) -> str:
return str(
{
"op": self.op,
"fn": self.fn,
"args": self.args,
"implicit": self.implicit,
"isPercentage": self.is_percentage
}
)
class ParenthesisNode(Node):
"""
A parenthesis node describes manual parenthesis from the user input
"""
def __init__(self, content):
super().__init__()
# Validate input
if not isinstance(content, Node):
raise TypeError('Node expected for parameter "content"')
self.content = content
@property
def type(self):
return 'ParenthesisNode'
@property
def is_parenthesis_node(self):
return True
def __repr__(self) -> str:
return str(
{
"content": self.content
}
)
class RangeNode(Node):
"""
Create a range
"""
def __init__(self, start, end, step=None):
super().__init__()
# Validate inputs
if not isinstance(start, Node):
raise TypeError('Node expected for start')
if not isinstance(end, Node):
raise TypeError('Node expected for end')
if step is not None and not isinstance(step, Node):
raise TypeError('Node expected for step')
if len(locals()) > 4:
raise ValueError('Too many arguments')
self.start = start # Included lower-bound
self.end = end # Included upper-bound
self.step = step # Optional step
@property
def type(self):
return 'RangeNode'
@property
def is_range_node(self):
return True
def __repr__(self) -> str:
return str(
{
"start": self.start,
"end": self.end,
"step": self.step
}
)
class RelationalNode(Node):
"""
A node representing a chained conditional expression, such as 'x > y > z'
"""
def __init__(self, conditionals, params):
super().__init__()
if not isinstance(conditionals, list):
raise TypeError('Parameter conditionals must be a list')
if not isinstance(params, list):
raise TypeError('Parameter params must be a list')
if len(conditionals) != len(params) - 1:
raise TypeError('Parameter params must contain exactly one more element than parameter conditionals')
self.conditionals = conditionals
self.params = params
@property
def type(self):
return 'RelationalNode'
@property
def is_relational_node(self):
return True
def __repr__(self) -> str:
return str(
{
"conditionals": self.conditionals,
"params": self.params
}
)
class SymbolNode(Node):
"""
A symbol node can hold and resolve a symbol
"""
def __init__(self, name):
super().__init__()
if not isinstance(name, str):
raise TypeError('String expected for parameter "name"')
self.name = name
@property
def type(self):
return 'SymbolNode'
@property
def is_symbol_node(self):
return True
def __repr__(self) -> str:
return str(
{
"name": self.name
}
)
TOKENTYPE = {
'NULL': 0,
'DELIMITER': 1,
'NUMBER': 2,
'SYMBOL': 3,
'UNKNOWN': 4
}
# map with all delimiters
DELIMITERS = {
',': True,
'(': True,
')': True,
'[': True,
']': True,
'{': True,
'}': True,
'"': True,
"'": True,
';': True,
'+': True,
'-': True,
'*': True,
'.*': True,
'/': True,
'./': True,
'%': True,
'^': True,
'.^': True,
'~': True,
'!': True,
'&': True,
'|': True,
'^|': True,
'=': True,
':': True,
'?': True,
'==': True,
'!=': True,
'<': True,
'>': True,
'<=': True,
'>=': True,
'<<': True,
'>>': True,
'>>>': True
}
# map with all named delimiters
NAMED_DELIMITERS = {
'mod': True,
'to': True,
'in': True,
'and': True,
'xor': True,
'or': True,
'not': True
}
CONSTANTS = {
'true': True,
'false': False,
'null': None,
'undefined': None
}
NUMERIC_CONSTANTS = [
'NaN',
'Infinity'
]
ESCAPE_CHARACTERS = {
'"': '"',
"'": "'",
'\\': '\\',
'/': '/',
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t'
# note that \u is handled separately in parseStringToken()
}
def initial_state():
return {
'extraNodes': {}, # current extra nodes, must be careful not to mutate
'expression': '', # current expression
'comment': '', # last parsed comment
'index': 0, # current index in expr
'token': '', # current token
'tokenType': TOKENTYPE['NULL'], # type of the token
'nestingLevel': 0, # level of nesting inside parameters, used to ignore newline characters
'conditionalLevel': None # when a conditional is being parsed, the level of the conditional is stored here
}
def current_string(state, length=1):
return state['expression'][state['index']:state['index']+length]
def current_character(state):
return current_string(state, 1)
def next(state):
state['index'] += 1
def prev_character(state):
return state['expression'][state['index'] - 1]
def next_character(state):
try:
return state['expression'][state['index'] + 1]
except IndexError:
return None
def get_token(state):
state["tokenType"] = TOKENTYPE['NULL']
state["token"] = ""
state["comment"] = ""
# skip over ignored characters
while True:
# comments
if current_character(state) == '#':
while current_character(state) != '\n' and current_character(state) != '':
state["comment"] += current_character(state)
next(state)
# whitespace: space, tab, and newline when inside parameters
if is_whitespace(current_character(state), state["nestingLevel"]):
next(state)
else:
break
# check for end of expression
if current_character(state) == '':
# token is still empty
state["tokenType"] = TOKENTYPE['DELIMITER']
return
# check for new line character
if current_character(state) == '\n' and not state["nestingLevel"]:
state["tokenType"] = TOKENTYPE['DELIMITER']
state["token"] = current_character(state)
next(state)
return
c1 = current_character(state)
c2 = current_string(state, 2)
c3 = current_string(state, 3)
if len(c3) == 3 and DELIMITERS.get(c3):
state["tokenType"] = TOKENTYPE['DELIMITER']
state["token"] = c3
next(state)
next(state)
next(state)
return
# check for delimiters consisting of 2 characters
if len(c2) == 2 and DELIMITERS.get(c2):
state["tokenType"] = TOKENTYPE['DELIMITER']
state["token"] = c2
next(state)
next(state)
return
# check for delimiters consisting of 1 character
if DELIMITERS.get(c1):
state["tokenType"] = TOKENTYPE['DELIMITER']
state["token"] = c1
next(state)
return
# check for a number
if is_digit_dot(c1):
state["tokenType"] = TOKENTYPE['NUMBER']
# check for binary, octal, or hex
c2 = current_string(state, 2)
if c2 == '0b' or c2 == '0o' or c2 == '0x':
state["token"] += current_character(state)
next(state)
state["token"] += current_character(state)
next(state)
while is_hex_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
if current_character(state) == '.':
# this number has a radix point
state["token"] += '.'
next(state)
# get the digits after the radix
while is_hex_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
elif current_character(state) == 'i':
# this number has a word size suffix
state["token"] += 'i'
next(state)
# get the word size
while is_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
return
# get number, can have a single dot
if current_character(state) == '.':
state["token"] += current_character(state)
next(state)
if not is_digit(current_character(state)):
# this is no number, it is just a dot (can be dot notation)
state["tokenType"] = TOKENTYPE['DELIMITER']
return
else:
while is_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
if is_decimal_mark(current_character(state), next_character(state)):
state["token"] += current_character(state)
next(state)
while is_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
# check for exponential notation like "2.3e-4", "1.23e50" or "2e+4"
if current_character(state) == 'E' or current_character(state) == 'e':
if is_digit(next_character(state)) or next_character(state) == '-' or next_character(state) == '+':
state["token"] += current_character(state)
next(state)
if current_character(state) == '+' or current_character(state) == '-':
state["token"] += current_character(state)
next(state)
# Scientific notation MUST be followed by an exponent
if not is_digit(current_character(state)):
raise create_syntax_error(state, 'Digit expected, got "' + current_character(state) + '"')
while is_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
if is_decimal_mark(current_character(state), next_character(state)):
raise create_syntax_error(state, 'Digit expected, got "' + current_character(state) + '"')
elif next_character(state) == '.':
next(state)
raise create_syntax_error(state, 'Digit expected, got "' + current_character(state) + '"')
return
# check for variables, functions, named operators
if is_alpha(current_character(state), prev_character(state), next_character(state)):
while is_alpha(current_character(state), prev_character(state), next_character(state)) or is_digit(current_character(state)):
state["token"] += current_character(state)
next(state)
if state["token"] in NAMED_DELIMITERS:
state["tokenType"] = TOKENTYPE['DELIMITER']
else:
state["tokenType"] = TOKENTYPE['SYMBOL']
return
# something unknown is found, wrong characters -> a syntax error
state["tokenType"] = TOKENTYPE['UNKNOWN']
while current_character(state) != '':
state["token"] += current_character(state)
next(state)
raise create_syntax_error(state, 'Syntax error in part "' + state["token"] + '"')
def get_token_skip_newline(state):
while True:
get_token(state)
if state["token"] != '\n':
break
def open_params(state):
state["nestingLevel"] += 1
def close_params(state):
state["nestingLevel"] -= 1
def is_alpha(c, c_prev, c_next):
return is_valid_latin_or_greek(c) or is_valid_math_symbol(c, c_next) or is_valid_math_symbol(c_prev, c)
def is_valid_latin_or_greek(c):
return c.isalpha() or c in ['_', '$'] or '\u00C0' <= c <= '\u02AF' or '\u0370' <= c <= '\u03FF' or '\u2100' <= c <= '\u214F'
def is_valid_math_symbol(high, low):
return high == '\uD835' and '\uDC00' <= low <= '\uDFFF' and low not in ['\uDC55', '\uDC9D', '\uDCA0', '\uDCA1', '\uDCA3', '\uDCA4', '\uDCA7', '\uDCA8', '\uDCAD', '\uDCBA', '\uDCBC', '\uDCC4', '\uDD06', '\uDD0B', '\uDD0C', '\uDD15', '\uDD1D', '\uDD3A', '\uDD3F', '\uDD45', '\uDD47', '\uDD48', '\uDD49', '\uDD51', '\uDEA6', '\uDEA7', '\uDFCC', '\uDFCD']
def is_whitespace(c, nesting_level):
return c in [' ', '\t'] or (c == '\n' and nesting_level > 0)
def is_decimal_mark(c, c_next):
return c == '.' and c_next not in ['/', '*', '^']
def is_digit_dot(c):
return c.isdigit() or c == '.'
def is_digit(c):
return c.isdigit()
def is_hex_digit(c):
return c.isdigit() or ('a' <= c <= 'f') or ('A' <= c <= 'F')
def parse_start(expression, extra_nodes):
state = initial_state()
state.update({"expression": expression, "extraNodes": extra_nodes})
get_token(state)
node = parse_block(state)
# check for garbage at the end of the expression
# an expression ends with an empty character '' and tokenType DELIMITER
if state["token"] != '':
if state["tokenType"] == TOKENTYPE['DELIMITER']:
# user entered a non-existing operator like "//"
# TODO: give hints for aliases, for example with "<>" give as hint "did you mean !== ?"
raise create_error(state, 'Unexpected operator ' + state["token"])
else:
raise create_syntax_error(state, 'Unexpected part "' + state["token"] + '"')
return node
def parse_block(state):
node = None
blocks = []
visible = None
if state["token"] != '' and state["token"] != '\n' and state["token"] != ';':
node = parse_assignment(state)
if state["comment"]:
node.comment = state["comment"]
# TODO: simplify this loop
while state["token"] == '\n' or state["token"] == ';':
if len(blocks) == 0 and node:
visible = (state["token"] != ';')
blocks.append({"node": node, "visible": visible})
get_token(state)
if state["token"] != '\n' and state["token"] != ';' and state["token"] != '':
node = parse_assignment(state)
if state["comment"]:
node.comment = state["comment"]
visible = (state["token"] != ';')
blocks.append({"node": node, "visible": visible})
if len(blocks) > 0:
return BlockNode(blocks)
else:
if not node:
node = ConstantNode(None)
if state["comment"]:
node.comment = state["comment"]
return node
def parse_assignment(state):
name = None
args = None
value = None
valid = None
node = parse_conditional(state)
if state["token"] == '=':
if is_symbol_node(node):
# parse a variable assignment like 'a = 2/3'
name = node.name
get_token_skip_newline(state)
value = parse_assignment(state)
return AssignmentNode(SymbolNode(name), value)
elif is_accessor_node(node):
# parse a matrix subset assignment like 'A[1,2] = 4'
get_token_skip_newline(state)
value = parse_assignment(state)
return AssignmentNode(node.object, node.index, value)
elif is_function_node(node) and is_symbol_node(node.fn):
# parse function assignment like 'f(x) = x^2'
valid = True
args = []
name = node.name
for arg in node.args:
if is_symbol_node(arg):
args.append(arg.name)
else:
valid = False
if valid:
get_token_skip_newline(state)
value = parse_assignment(state)
return FunctionAssignmentNode(name, args, value)
raise create_syntax_error(state, 'Invalid left hand side of assignment operator =')
return node
def parse_conditional(state):
node = parse_logical_or(state)
while state["token"] == '?':
# set a conditional level, the range operator will be ignored as long
# as conditionalLevel === state.nestingLevel.
prev = state["conditionalLevel"]
state["conditionalLevel"] = state["nestingLevel"]
get_token_skip_newline(state)
condition = node
true_expr = parse_assignment(state)
if state["token"] != ':':
raise create_syntax_error(state, 'False part of conditional expression expected')
state["conditionalLevel"] = None
get_token_skip_newline(state)
false_expr = parse_assignment(state) # Note: check for conditional operator again, right associativity
node = ConditionalNode(condition, true_expr, false_expr)
# restore the previous conditional level
state["conditionalLevel"] = prev
return node
def parse_logical_or(state):
node = parse_logical_xor(state)
while state["token"] == 'or':
get_token_skip_newline(state)
node = OperatorNode('or', 'or', [node, parse_logical_xor(state)])
return node
def parse_logical_xor(state):
node = parse_logical_and(state)
while state["token"] == 'xor':
get_token_skip_newline(state)
node = OperatorNode('xor', 'xor', [node, parse_logical_and(state)])
return node
def parse_logical_and(state):
node = parse_bitwise_or(state)
while state["token"] == 'and':
get_token_skip_newline(state)
node = OperatorNode('and', 'and', [node, parse_bitwise_or(state)])
return node
def parse_bitwise_or(state):
node = parse_bitwise_xor(state)
while state["token"] == '|':
get_token_skip_newline(state)
node = OperatorNode('|', 'bitOr', [node, parse_bitwise_xor(state)])
return node
def parse_bitwise_xor(state):
node = parse_bitwise_and(state)
while state["token"] == '^|':
get_token_skip_newline(state)
node = OperatorNode('^|', 'bitXor', [node, parse_bitwise_and(state)])
return node
def parse_bitwise_and(state):
node = parse_relational(state)
while state["token"] == '&':
get_token_skip_newline(state)
node = OperatorNode('&', 'bitAnd', [node, parse_relational(state)])
return node
def parse_relational(state):
params = [parse_shift(state)]
conditionals = []
operators = {
'==': 'equal',
'!=': 'unequal',
'<': 'smaller',
'>': 'larger',
'<=': 'smallerEq',
'>=': 'largerEq'
}
while state["token"] in operators:
cond = {"name": state["token"], "fn": operators[state["token"]]}
conditionals.append(cond)
get_token_skip_newline(state)
params.append(parse_shift(state))
if len(params) == 1:
return params[0]
elif len(params) == 2:
return OperatorNode(conditionals[0]["name"], conditionals[0]["fn"], params)
else:
return RelationalNode([c["fn"] for c in conditionals], params)
def parse_shift(state):
node = parse_conversion(state)
operators = {
'<<': 'leftShift',
'>>': 'rightArithShift',
'>>>': 'rightLogShift'
}
while state["token"] in operators:
name = state["token"]
fn = operators[name]
get_token_skip_newline(state)
params = [node, parse_conversion(state)]
node = OperatorNode(name, fn, params)
return node
def parse_conversion(state):
node = parse_range(state)
operators = {
'to': 'to',
'in': 'to' # alias of 'to'
}
while state["token"] in operators:
name = state["token"]
fn = operators[name]
get_token_skip_newline(state)
if name == 'in' and state["token"] == '':
# end of expression -> this is the unit 'in' ('inch')
node = OperatorNode('*', 'multiply', [node, SymbolNode('in')], True)
else:
# operator 'a to b' or 'a in b'
params = [node, parse_range(state)]
node = OperatorNode(name, fn, params)
return node
def parse_range(state):
if state["token"] == ':':
# implicit start=1 (one-based)
node = ConstantNode(1)
else:
# explicit start
node = parse_add_subtract(state)
params = []
if state["token"] == ':' and (state["conditionalLevel"] != state["nestingLevel"]):
# we ignore the range operator when a conditional operator is being processed on the same level
params.append(node)
# parse step and end
while state["token"] == ':' and len(params) < 3:
get_token_skip_newline(state)
if state["token"] in [')', ']', ',', '']:
# implicit end
params.append(SymbolNode('end'))
else:
# explicit end
params.append(parse_add_subtract(state))
if len(params) == 3:
# params = [start, step, end]
node = RangeNode(params[0], params[2], params[1]) # start, end, step
else:
# params = [start, end]
node = RangeNode(params[0], params[1]) # start, end
return node
def parse_add_subtract(state):
node = parse_multiply_divide(state)
operators = {
'+': 'add',
'-': 'subtract'
}
while state["token"] in operators:
name = state["token"]
fn = operators[name]
get_token_skip_newline(state)
right_node = parse_multiply_divide(state)
params = [node, right_node] if not right_node.is_percentage else [node, OperatorNode('*', 'multiply', [node, right_node])]
node = OperatorNode(name, fn, params)
return node
def parse_multiply_divide(state):
node = parse_implicit_multiplication(state)
last = node
operators = {
'*': 'multiply',
'.*': 'dotMultiply',
'/': 'divide',
'./': 'dotDivide'
}
while True:
if state["token"] in operators:
# explicit operators
name = state["token"]
fn = operators[name]
get_token_skip_newline(state)
last = parse_implicit_multiplication(state)
node = OperatorNode(name, fn, [node, last])
else:
break
return node
def parse_implicit_multiplication(state):
node = parse_rule2(state)
last = node
while True:
if (state["tokenType"] == TOKENTYPE['SYMBOL'] or
(state["token"] == 'in' and is_constant_node(node)) or
(state["tokenType"] == TOKENTYPE['NUMBER'] and
not is_constant_node(last) and
(not is_operator_node(last) or last.op == '!')) or
(state["token"] == '(')):
# parse implicit multiplication
last = parse_rule2(state)
node = OperatorNode('*', 'multiply', [node, last], True) # implicit
else:
break
return node
def parse_rule2(state):
node = parse_percentage(state)
last = node
token_states = []
while True:
# Match the "number /" part of the pattern "number / number symbol"
if state["token"] == '/' and rule2_node(last):
# Look ahead to see if the next token is a number
token_states.append(state.copy())
get_token_skip_newline(state)
# Match the "number / number" part of the pattern
if state["tokenType"] == TOKENTYPE['NUMBER']:
# Look ahead again
token_states.append(state.copy())
get_token_skip_newline(state)
# Match the "symbol" part of the pattern, or a left parenthesis
if state["tokenType"] == TOKENTYPE['SYMBOL'] or state["token"] == '(':
# We've matched the pattern "number / number symbol".
# Rewind once and build the "number / number" node; the symbol will be consumed later
state = token_states.pop()
token_states.pop()
last = parse_percentage(state)
node = OperatorNode('/', 'divide', [node, last])
else:
# Not a match, so rewind
token_states.pop()
state = token_states.pop()
break
else:
# Not a match, so rewind
state = token_states.pop()
break
else:
break
return node
def parse_percentage(state):
node = parse_unary(state)
operators = {'%': 'mod', 'mod': 'mod'}
while state["token"] in operators:
name = state["token"]
fn = operators[name]
get_token_skip_newline(state)
if name == '%' and state["tokenType"] == TOKENTYPE['DELIMITER'] and state["token"] != '(':
# If the expression contains only %, then treat that as /100
node = OperatorNode('/', 'divide', [node, ConstantNode(100)], False, True)
else:
params = [node, parse_unary(state)]
node = OperatorNode(name, fn, params)
return node
def parse_unary(state):
operators = {'-': 'unaryMinus', '+': 'unaryPlus', '~': 'bitNot', 'not': 'not'}
if state["token"] in operators:
fn = operators[state["token"]]
name = state["token"]
get_token_skip_newline(state)
params = [parse_unary(state)]
return OperatorNode(name, fn, params)
return parse_pow(state)
def parse_pow(state):
node = parse_left_hand_operators(state)
if state["token"] == '^' or state["token"] == '.^':
name = state["token"]
fn = 'pow' if name == '^' else 'dotPow'
get_token_skip_newline(state)
params = [node, parse_unary(state)] # Go back to unary, we can have '2^-3'
node = OperatorNode(name, fn, params)
return node
def parse_left_hand_operators(state):
node = parse_custom_nodes(state)
operators = {'!': 'factorial', '\'': 'ctranspose'}
while state["token"] in operators:
name = state["token"]
fn = operators[name]
get_token(state)
params = [node]
node = OperatorNode(name, fn, params)
node = parse_accessors(state, node)
return node
def parse_custom_nodes(state):
params = []
if state["tokenType"] == TOKENTYPE['SYMBOL'] and state["token"] in state["extraNodes"]:
CustomNode = state["extraNodes"][state["token"]]
get_token(state)
# parse parameters
if state["token"] == '(':
params = []
open_params(state)
get_token(state)
if state["token"] != ')':
params.append(parse_assignment(state))
# parse a list with parameters
while state["token"] == ',':
get_token(state)
params.append(parse_assignment(state))
if state["token"] != ')':
raise ValueError('Parenthesis ) expected')
close_params(state)
get_token(state)
# create a new custom node
return CustomNode(params)
return parse_symbol(state)
def parse_symbol(state):
name = state["token"]
if state["tokenType"] == TOKENTYPE['SYMBOL'] or (state["tokenType"] == TOKENTYPE['DELIMITER'] and state["token"] in NAMED_DELIMITERS):
get_token(state)
if name in CONSTANTS:
node = ConstantNode(CONSTANTS[name])
elif name in NUMERIC_CONSTANTS:
node = ConstantNode(numeric(name, 'number'))
else:
node = SymbolNode(name)
# parse function parameters and matrix index
node = parse_accessors(state, node)
return node
return parse_string(state)
def parse_accessors(state, node, types=None):
while state["token"] in ('(', '[', '.') and (not types or state["token"] in types):
params = []
if state["token"] == '(':
if is_symbol_node(node) or is_accessor_node(node):
open_params(state)
get_token(state)
if state["token"] != ')':
params.append(parse_assignment(state))
while state["token"] == ',':
get_token(state)
params.append(parse_assignment(state))
if state["token"] != ')':
raise ValueError('Parenthesis ) expected')
close_params(state)
get_token(state)
node = FunctionNode(node, params)
else:
return node
elif state["token"] == '[':
open_params(state)
get_token(state)
if state["token"] != ']':
params.append(parse_assignment(state))
while state["token"] == ',':
get_token(state)
params.append(parse_assignment(state))
if state["token"] != ']':
raise ValueError('Parenthesis ] expected')
close_params(state)
get_token(state)
node = AccessorNode(node, IndexNode(params))
else:
get_token(state)
if state["token"] in NAMED_DELIMITERS or state["tokenType"] == TOKENTYPE['SYMBOL']:
params.append(ConstantNode(state["token"]))
get_token(state)
dot_notation = True
node = AccessorNode(node, IndexNode(params, dot_notation))
return node
def parse_string(state):
if state["token"] in ('"', "'"):
str_value = parse_string_token(state, state["token"])
node = ConstantNode(str_value)
node = parse_accessors(state, node)
return node
return parse_matrix(state)
def parse_string_token(state, quote):
str_value = ''
while current_character(state) != '' and current_character(state) != quote:
if current_character(state) == '\\':
next_character(state)
char = current_character(state)
escape_char = ESCAPE_CHARACTERS.get(char)
if escape_char is not None:
str_value += escape_char
state["index"] += 1
elif char == 'u':
unicode_char = state["expression"][state["index"] + 1: state["index"] + 5]
if len(unicode_char) == 4 and all(c in '0123456789abcdefABCDEF' for c in unicode_char):
str_value += chr(int(unicode_char, 16))
state["index"] += 5
else:
raise ValueError(f'Invalid unicode character \\u{unicode_char}')
else:
raise ValueError(f'Bad escape character \\{char}')
else:
str_value += current_character(state)
next_character(state)
get_token(state)
if state["token"] != quote:
raise ValueError(f'End of string {quote} expected')
get_token(state)
return str_value
def parse_matrix(state):
if state["token"] == '[':
open_params(state)
get_token(state)
if state["token"] != ']':
row = parse_row(state)
if state["token"] == ';':
rows = 1
params = [row]
while state["token"] == ';':
get_token(state)
if state["token"] != ']':
params.append(parse_row(state))
rows += 1
if state["token"] != ']':
raise ValueError('End of matrix ] expected')
close_params(state)
get_token(state)
cols = len(params[0]["items"])
for r in range(1, rows):
if len(params[r]["items"]) != cols:
raise ValueError(f'Column dimensions mismatch ({len(params[r]["items"])} !== {cols})')
array = ArrayNode(params)
else:
if state["token"] != ']':
raise ValueError('End of matrix ] expected')
close_params(state)
get_token(state)
array = row
else:
close_params(state)
get_token(state)
array = ArrayNode([])
return parse_accessors(state, array)
return parse_object(state)
def parse_row(state):
params = [parse_assignment(state)]
length = 1
while state["token"] == ',':
get_token(state)
if state["token"] != ']' and state["token"] != ';':
params.append(parse_assignment(state))
length += 1
return ArrayNode(params)
def parse_object(state):
if state["token"] == '{':
open_params(state)
properties = {}
while True:
get_token(state)
if state["token"] != '}':
if state["token"] == '"' or state["token"] == "'":
key = parse_string_token(state, state["token"])
elif state["tokenType"] == TOKENTYPE['SYMBOL'] or (state["tokenType"] == TOKENTYPE['DELIMITER'] and state["token"] in NAMED_DELIMITERS):
key = state["token"]
get_token(state)
else:
raise ValueError('Symbol or string expected as object key')
if state["token"] != ':':
raise ValueError('Colon : expected after object key')
get_token(state)
properties[key] = parse_assignment(state)
if state["token"] != ',':
break
if state["token"] != '}':
raise ValueError('Comma , or bracket } expected after object value')
close_params(state)
get_token(state)
node = ObjectNode(properties)
node = parse_accessors(state, node)
return node
return parse_number(state)
def parse_number(state):
if state["tokenType"] == TOKENTYPE['NUMBER']:
number_str = state["token"]
get_token(state)
return ConstantNode(numeric(number_str, config.number))
return parse_parentheses(state)
def parse_parentheses(state):
node = None
if state["token"] == '(':
open_params(state)
get_token(state)
node = parse_assignment(state)
if state["token"] != ')':
raise ValueError('Parenthesis ) expected')
close_params(state)
get_token(state)
node = ParenthesisNode(node)
node = parse_accessors(state, node)
return node
return parse_end(state)
def parse_end(state):
if state["token"] == '':
raise ValueError('Unexpected end of expression')
else:
raise ValueError('Value expected')
def col(state):
return state["index"] - len(state["token"]) + 1
def create_syntax_error(state, message):
c = col(state)
error = SyntaxError(message + ' (char ' + str(c) + ')')
error.char = c
return error
def create_error(state, message):
c = col(state)
error = SyntaxError(message + ' (char ' + str(c) + ')')
error.char = c
return error
@RezSat
Copy link
Author

RezSat commented Apr 6, 2024

Example usage: print(parse_start('2x+5', {}))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment