Skip to content

Instantly share code, notes, and snippets.

@Tran-Antoine
Last active October 28, 2020 22:34
Show Gist options
  • Save Tran-Antoine/b4e9f84f00d8f45c0ab654795a4ebb3d to your computer and use it in GitHub Desktop.
Save Tran-Antoine/b4e9f84f00d8f45c0ab654795a4ebb3d to your computer and use it in GitHub Desktop.
A RegexParsers implementation to parse mathematical expressions.
import scala.util.parsing.combinator.RegexParsers
class ExpressionParser extends RegexParsers {
/**
* Parses a mathematical expression.
* @return any kind of mathematical expression
*/
def expression: Parser[Expression] = allExpressions(0)
/**
* Parses either a sum or subtraction (which is simply considered as a specific kind of addition).
* Are also included redundant `+` such as `+7`, and negative members such as `-5`.
* @return a sum or a subtraction
*/
def sum: Parser[Expression] = explicitSum | implicitSum
/**
* Parses an explicit sum, defined as a sum or a subtraction where both left and right terms are explicitly
* specified. For instance, `x+8` and `6-2` are explicit sums.
* @return an explicit sum
*/
def explicitSum: Parser[Expression] =
allExpressions(1) ~ SUM ~ allExpressions(0) ^^ {
case a ~ "+" ~ b => Sum(a, b)
case a ~ "-" ~ b => Sub(a, b)
}
/**
* Parses an implicit sum, defined as a sum or a subtraction where only the right term is specified, the left one
* being assumed to be 0. For instance, `+5` and `-y` are implicit sums.
*
* @return an implicit sum. Implicit sums return the expression itself, whereas implicit subtractions return
* a number with the negated value found if the expression is a number, otherwise a multiplication
* between `-1` and the expression found.
*/
def implicitSum: Parser[Expression] = {
SUM ~ allExpressions(0) ^^ {
case "+" ~ expr => expr
case "-" ~ Number(value) => Number(-value)
case "-" ~ expr => Mult(Number(-1), expr)
}
}
/**
* Parses either a multiplication or a division (which is considered as a specific kind of multiplication).
* Are also included so called implicit multiplications, defined as multiplication where the `*` operator is implicit.
* @return any type of multiplication
*/
def mult: Parser[Expression] = explicitMult | implicitMult
/**
* Parses an explicit multiplication (including divisions), defined as a multiplication where both factors are connected
* with an explicit `*` specified. For instance, `5*x` and `y*12` are explicit multiplications.
* @return an explicit multiplication
*/
def explicitMult: Parser[Expression] =
allExpressions(2) ~ MULT ~ allExpressions(1) ^^ {
case a ~ "*" ~ b => Mult(a, b)
case a ~ "/" ~ b => Div(a, b)
}
/**
* Parses an implicit multiplication (divisions excluded), defined as a multiplication where both factors are stuck
* together with no explicit `*` specified. For instance, `5x` and `xy` are implicit multiplications.
* @return an implicit multiplication
*/
def implicitMult: Parser[Expression] = allExpressions(2) ~ allExpressions(1) ^^ unpack(Mult)
/**
* Parses a power expression, syntactically defined as `a^b`.
* @return a power expression
*/
def pow: Parser[Expression] = (allExpressions(3) <~ POW) ~ allExpressions(2) ^^ unpack(Pow)
/**
* Parses any pre-defined function, syntactically defined as `func_name(arg0, arg1, ...)`. Functions are allowed to
* require no parameter.
* @return a function expression
*/
def function: Parser[Expression] = FUNC_NAME ~ ("(" ~> functionArgs <~")") ^^ unpack(Function)
/**
* Parses a potentially empty list of arguments for a function. Arguments are assumed to be separated by commas.
* For instance, the empty set and "`5, x`" are valid argument sets.
* @return a list of expressions
*/
def functionArgs: Parser[List[Expression]] = repsep(allExpressions(0), ",")
/**
* Parses any mathematical expression that's surrounded by brackets, to enforce operation priority.
* @return any mathematical expression
*/
def brackets: Parser[Expression] = "(" ~> allExpressions(0) <~ ")"
/**
* Parses a char. Any char matched is converted to a `Variable`
* @return a variable object
*/
def char: Parser[Expression] = CHAR ^^ Variable
/**
* Parses an unsigned numbers. For parsing simplification purposes, signed numbers don't exist and are thus never considered
* throughout the parsing process. For instance, in the expression `-4-5`, `-4` is parsed as an implicit subtraction between 0 and 4, and
* `5` is nothing but the second term of the subtraction between `-4` and `5`.
* @return a parser matching a {@link Number} object
*/
def unsignedNumber: Parser[Expression] = UNSIGNED_NUMBER ^^ Number
/**
* Lexer parsing either the sum or the subtraction operator
* @return a parser matching either `+` or `-`
*/
def SUM: Parser[String] = "+" | "-"
/**
* Lexer parsing either the multiplication or the division operator
* @return a parser matching `*` or `/`
*/
def MULT: Parser[String] = "*" | "/"
/**
* Lexer parsing the power operator
* @return a parser matching `^`
*/
def POW: Parser[String] = "^"
/**
* Lexer parsing pre-defined function names
* @return a parser matching any function name that was pre-defined
*/
def FUNC_NAME: Parser[String] =
"sin" | "cos" | "tan" | // trigonometric functions
"fac" | "binomial" | // probability functions
"sqrt"| "root" | // root functions
"exp" | "ln" | "log" | // exponential functions
"sum" | "sub" | "mult" | "div" | "pow" // infix supported functions
/**
* Lexer parsing an unsigned number
* @return a parser matching an unsigned number
*/
def UNSIGNED_NUMBER: Parser[Int] = """[0-9]+""".r ^^ {_.toInt}
/**
* Lexer parsing a char
* @return a parser matching a char
*/
def CHAR: Parser[String] = """[a-zA-Z]""".r
/**
* Computes an expression parser defined as the disjunction of all operators that have higher or equal precedence to the minimal
* precedence specified. For instance, a multiplication is a binary operator requiring a left expression whose precedence is
* higher than itself (a power expression, a function, a bracket expression or a number/variable), and a right expression whose
* precedence is higher or equal to itself (all the mentioned above + multiplication expressions).
* <p>
* Examples:
* - `3^x * 12` will be matched as the multiplication between `3^x` and `12`
* - `2+3 * 5` will <strong>not</strong> be matched as the multiplication between `2+3` and `5`, because since multiplication
* has higher precedence than addition, `2+3` is not a valid left term.
*
* @param minPrecedence the minimal required preference of the disjoint operators.
* @return a parser matching expressions that is the result of the disjunction between other operators
*/
def allExpressions(minPrecedence: Int): Parser[Expression] = {
// Using suppliers instead of direct references to avoid infinite recursion
val allExpressions: Seq[() => Parser[Expression]] = Seq(
()=> sum,
()=> mult,
()=> pow,
()=> function,
()=> brackets,
()=> char, () => unsignedNumber)
(minPrecedence until allExpressions.length) map {allExpressions(_)} reduce((p1, p2) => p1() | p2())
}
/**
* Generates a function that unpacks two elements connected by sequential composition (`~`) into an object.
* @param f the function generating the object from the two elements
* @tparam T the type of the first element
* @tparam U the type of the second element
* @tparam V the type of the object created by `f`
* @return a function that takes `a~b` in argument and returns `f(a, b)`
*/
def unpack[T, U, V](f: (T, U) => V): (T ~ U) => V = {case a ~ b => f(a, b)}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment