Last active
October 28, 2020 22:34
-
-
Save Tran-Antoine/b4e9f84f00d8f45c0ab654795a4ebb3d to your computer and use it in GitHub Desktop.
A RegexParsers implementation to parse mathematical expressions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.util.parsing.combinator.RegexParsers | |
class ExpressionParser extends RegexParsers { | |
/** | |
* Parses a mathematical expression. | |
* @return any kind of mathematical expression | |
*/ | |
def expression: Parser[Expression] = allExpressions(0) | |
/** | |
* Parses either a sum or subtraction (which is simply considered as a specific kind of addition). | |
* Are also included redundant `+` such as `+7`, and negative members such as `-5`. | |
* @return a sum or a subtraction | |
*/ | |
def sum: Parser[Expression] = explicitSum | implicitSum | |
/** | |
* Parses an explicit sum, defined as a sum or a subtraction where both left and right terms are explicitly | |
* specified. For instance, `x+8` and `6-2` are explicit sums. | |
* @return an explicit sum | |
*/ | |
def explicitSum: Parser[Expression] = | |
allExpressions(1) ~ SUM ~ allExpressions(0) ^^ { | |
case a ~ "+" ~ b => Sum(a, b) | |
case a ~ "-" ~ b => Sub(a, b) | |
} | |
/** | |
* Parses an implicit sum, defined as a sum or a subtraction where only the right term is specified, the left one | |
* being assumed to be 0. For instance, `+5` and `-y` are implicit sums. | |
* | |
* @return an implicit sum. Implicit sums return the expression itself, whereas implicit subtractions return | |
* a number with the negated value found if the expression is a number, otherwise a multiplication | |
* between `-1` and the expression found. | |
*/ | |
def implicitSum: Parser[Expression] = { | |
SUM ~ allExpressions(0) ^^ { | |
case "+" ~ expr => expr | |
case "-" ~ Number(value) => Number(-value) | |
case "-" ~ expr => Mult(Number(-1), expr) | |
} | |
} | |
/** | |
* Parses either a multiplication or a division (which is considered as a specific kind of multiplication). | |
* Are also included so called implicit multiplications, defined as multiplication where the `*` operator is implicit. | |
* @return any type of multiplication | |
*/ | |
def mult: Parser[Expression] = explicitMult | implicitMult | |
/** | |
* Parses an explicit multiplication (including divisions), defined as a multiplication where both factors are connected | |
* with an explicit `*` specified. For instance, `5*x` and `y*12` are explicit multiplications. | |
* @return an explicit multiplication | |
*/ | |
def explicitMult: Parser[Expression] = | |
allExpressions(2) ~ MULT ~ allExpressions(1) ^^ { | |
case a ~ "*" ~ b => Mult(a, b) | |
case a ~ "/" ~ b => Div(a, b) | |
} | |
/** | |
* Parses an implicit multiplication (divisions excluded), defined as a multiplication where both factors are stuck | |
* together with no explicit `*` specified. For instance, `5x` and `xy` are implicit multiplications. | |
* @return an implicit multiplication | |
*/ | |
def implicitMult: Parser[Expression] = allExpressions(2) ~ allExpressions(1) ^^ unpack(Mult) | |
/** | |
* Parses a power expression, syntactically defined as `a^b`. | |
* @return a power expression | |
*/ | |
def pow: Parser[Expression] = (allExpressions(3) <~ POW) ~ allExpressions(2) ^^ unpack(Pow) | |
/** | |
* Parses any pre-defined function, syntactically defined as `func_name(arg0, arg1, ...)`. Functions are allowed to | |
* require no parameter. | |
* @return a function expression | |
*/ | |
def function: Parser[Expression] = FUNC_NAME ~ ("(" ~> functionArgs <~")") ^^ unpack(Function) | |
/** | |
* Parses a potentially empty list of arguments for a function. Arguments are assumed to be separated by commas. | |
* For instance, the empty set and "`5, x`" are valid argument sets. | |
* @return a list of expressions | |
*/ | |
def functionArgs: Parser[List[Expression]] = repsep(allExpressions(0), ",") | |
/** | |
* Parses any mathematical expression that's surrounded by brackets, to enforce operation priority. | |
* @return any mathematical expression | |
*/ | |
def brackets: Parser[Expression] = "(" ~> allExpressions(0) <~ ")" | |
/** | |
* Parses a char. Any char matched is converted to a `Variable` | |
* @return a variable object | |
*/ | |
def char: Parser[Expression] = CHAR ^^ Variable | |
/** | |
* Parses an unsigned numbers. For parsing simplification purposes, signed numbers don't exist and are thus never considered | |
* throughout the parsing process. For instance, in the expression `-4-5`, `-4` is parsed as an implicit subtraction between 0 and 4, and | |
* `5` is nothing but the second term of the subtraction between `-4` and `5`. | |
* @return a parser matching a {@link Number} object | |
*/ | |
def unsignedNumber: Parser[Expression] = UNSIGNED_NUMBER ^^ Number | |
/** | |
* Lexer parsing either the sum or the subtraction operator | |
* @return a parser matching either `+` or `-` | |
*/ | |
def SUM: Parser[String] = "+" | "-" | |
/** | |
* Lexer parsing either the multiplication or the division operator | |
* @return a parser matching `*` or `/` | |
*/ | |
def MULT: Parser[String] = "*" | "/" | |
/** | |
* Lexer parsing the power operator | |
* @return a parser matching `^` | |
*/ | |
def POW: Parser[String] = "^" | |
/** | |
* Lexer parsing pre-defined function names | |
* @return a parser matching any function name that was pre-defined | |
*/ | |
def FUNC_NAME: Parser[String] = | |
"sin" | "cos" | "tan" | // trigonometric functions | |
"fac" | "binomial" | // probability functions | |
"sqrt"| "root" | // root functions | |
"exp" | "ln" | "log" | // exponential functions | |
"sum" | "sub" | "mult" | "div" | "pow" // infix supported functions | |
/** | |
* Lexer parsing an unsigned number | |
* @return a parser matching an unsigned number | |
*/ | |
def UNSIGNED_NUMBER: Parser[Int] = """[0-9]+""".r ^^ {_.toInt} | |
/** | |
* Lexer parsing a char | |
* @return a parser matching a char | |
*/ | |
def CHAR: Parser[String] = """[a-zA-Z]""".r | |
/** | |
* Computes an expression parser defined as the disjunction of all operators that have higher or equal precedence to the minimal | |
* precedence specified. For instance, a multiplication is a binary operator requiring a left expression whose precedence is | |
* higher than itself (a power expression, a function, a bracket expression or a number/variable), and a right expression whose | |
* precedence is higher or equal to itself (all the mentioned above + multiplication expressions). | |
* <p> | |
* Examples: | |
* - `3^x * 12` will be matched as the multiplication between `3^x` and `12` | |
* - `2+3 * 5` will <strong>not</strong> be matched as the multiplication between `2+3` and `5`, because since multiplication | |
* has higher precedence than addition, `2+3` is not a valid left term. | |
* | |
* @param minPrecedence the minimal required preference of the disjoint operators. | |
* @return a parser matching expressions that is the result of the disjunction between other operators | |
*/ | |
def allExpressions(minPrecedence: Int): Parser[Expression] = { | |
// Using suppliers instead of direct references to avoid infinite recursion | |
val allExpressions: Seq[() => Parser[Expression]] = Seq( | |
()=> sum, | |
()=> mult, | |
()=> pow, | |
()=> function, | |
()=> brackets, | |
()=> char, () => unsignedNumber) | |
(minPrecedence until allExpressions.length) map {allExpressions(_)} reduce((p1, p2) => p1() | p2()) | |
} | |
/** | |
* Generates a function that unpacks two elements connected by sequential composition (`~`) into an object. | |
* @param f the function generating the object from the two elements | |
* @tparam T the type of the first element | |
* @tparam U the type of the second element | |
* @tparam V the type of the object created by `f` | |
* @return a function that takes `a~b` in argument and returns `f(a, b)` | |
*/ | |
def unpack[T, U, V](f: (T, U) => V): (T ~ U) => V = {case a ~ b => f(a, b)} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment