kokoro-aya/MathExprParser.scala

## MathExprParser.scala
package parser

import lexer.{LeftParenToken, RightParenToken, OperToken, Token, tokenize}
import sig.{Binary, Binop, Exp}

// A simple example to build a recursive descent parser in Scala,
// in a rather imperative coding style

// Mixing with the FP paradigm, this imperative coding style lead to very verbose code with lots of
// nested pattern matching and if-else guards. But at least the code should works well.
// If we can fill out every corner cases.

// This is not what we are looking for to build a larger complex language, but it should give a sufficient
// small working example of how things are done without mutating our list in place.


/*
       expr ::= factor exprSub
       exprSub ::= ε | (+|-) factor exprSub
       factor ::= term factorSub
       factorSub ::= ε | (*|/) term factorSub
       term ::= '(' expr ')' | literal
* */

def parse(tokens: List[Token]): Exp =
  val res = parseExpr(tokens)
  if res._2.isEmpty then
    res._1
  else
    throw IllegalStateException("There are remained tokens: " + res._2.toString)


def parseExpr(tokens: List[Token]): (Exp, List[Token]) =
  val (factorExp, afterFactor) = parseFactor(tokens)
  if afterFactor.isEmpty then
    (factorExp, afterFactor)
  else
    val (subOpt, afterSub) = parseExprSub(afterFactor)
    subOpt match
      case Some(o, right) => (Binop(o, factorExp, right), afterSub)
      case None => (factorExp, afterFactor)

def parseExprSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match
  case OperToken(o: Binary) if List(Binary.ADD, Binary.SUB).contains(o) =>
    val (factorExp, after) = parseFactor(tokens.drop(1))
    if after.isEmpty then
      (Some(o, factorExp), after)
    else
      val (nextSubOpt, nextAfter) = parseExprSub(after)
      nextSubOpt match
        case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter)
        case _ => (Some(o, factorExp), after)
  case _ => (None, tokens)

def parseFactor(tokens: List[Token]): (Exp, List[Token]) =
  val (termExp, afterTerm) = parseTerm(tokens)
  if afterTerm.isEmpty then
    (termExp, afterTerm)
  else
    val (subOpt, afterSub) = parseFactorSub(afterTerm)
    subOpt match
      case Some(o, right) => (Binop(o, termExp, right), afterSub)
      case None => (termExp, afterTerm)

def parseFactorSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match
  case OperToken(o: Binary) if List(Binary.MULT, Binary.DIV).contains(o) =>
    val (termExp, after) = parseTerm(tokens.drop(1))
    if after.isEmpty then
      (Some(o, termExp), after)
    else
      val (nextSubOpt, nextAfter) = parseFactorSub(after)
      nextSubOpt match
        case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, termExp, rightExp)), nextAfter)
        case _ => (Some(o, termExp), after)
  case _ => (None, tokens)

def parseTerm(tokens: List[Token]): (Exp, List[Token]) = tokens.head match
  case LeftParenToken =>
    val (exprExp, afterExpr) = parseExpr(tokens.drop(1))
    if afterExpr.isEmpty then
      throw IllegalStateException("Expect a right parenthesis")
    afterExpr.head match
      case RightParenToken =>
        (exprExp, afterExpr.tail)
      case _ => throw IllegalStateException("Unmatched left parenthesis")
  case _ => parseLiteral(tokens)

def parseLiteral(tokens: List[Token]) = parseInt(tokens)


@main
def main(): Unit =
  val text =
    """
      3 + 4 * 6 + 7 / 6
      """.stripMargin

  // This also works: 3 + 4 * (6 + 7 * 2) / 6
  // This will raise an error: 3 + 4 * ( 6 + 7
  // This will also raise an error: 3 + 4 * (6 + (    or    3 + 4 * ) (
  // And this: 3 + 4 + + + *

  val tokens = tokenize(text)
  val res = parse(tokens)

  println(res)

## ParseAnyLevelExpr.scala
val OPERATORS = Map(
  1 -> List(Binary.ADD, Binary.SUB),
  2 -> List(Binary.MULT, Binary.DIV),
)

// Level indicate the priority of an operator, which is and index to a list of symbols


def parse(tokens: List[Token]): Exp =
  val res = parseExprOfLevel(1, tokens)
  if res._2.isEmpty then
    res._1
  else
    throw IllegalStateException("There are remained tokens: " + res._2.toString)


def parseExprOfLevel(level: Int, tokens: List[Token]): (Exp, List[Token]) =
  val (factorExp, afterFactor) =
    if level < 2 then parseExprOfLevel(level + 1, tokens) else parseTerm(tokens)
  if afterFactor.isEmpty then
    (factorExp, afterFactor)
  else
    val (subOpt, afterSub) = parseExprSubOfLevel(level, afterFactor)
    subOpt match
      case Some(o, right) => (Binop(o, factorExp, right), afterSub)
      case None => (factorExp, afterFactor)

def parseExprSubOfLevel(level: Int, tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) =
  if tokens.isEmpty then
    throw IllegalStateException("expect a complement of expr")
  else tokens.head match
    case OperToken(o: Binary) if OPERATORS(level).contains(o) =>
      val (factorExp, after) = if level < 2 then parseExprOfLevel(level + 1, tokens.drop(1)) else parseTerm(tokens.drop(1))
      if after.isEmpty then
        (Some(o, factorExp), after)
      else
        val (nextSubOpt, nextAfter) = parseExprSubOfLevel(level, after)
        nextSubOpt match
          case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter)
          case _ => (Some(o, factorExp), after)
    case _ => (None, tokens)

## Signatures.scala
package sig

trait Op

enum Binary extends Op {
  case ADD, SUB, MULT, DIV, MOD
}

sealed class Exp

case class I(value: Int) extends Exp
case class Binop(op: Binary, left: Exp, right: Exp) extends Exp
// other case class omitted

## Tokens.scala
package parser

sealed trait Token
case class IntToken(value: Int) extends Token
case class OperToken(op: Op) extends Token
case object LeftParenToken extends Token
case object RightParenToken extends Token
// other case class omitted
	package parser

	import lexer.{LeftParenToken, RightParenToken, OperToken, Token, tokenize}
	import sig.{Binary, Binop, Exp}

	// A simple example to build a recursive descent parser in Scala,
	// in a rather imperative coding style

	// Mixing with the FP paradigm, this imperative coding style lead to very verbose code with lots of
	// nested pattern matching and if-else guards. But at least the code should works well.
	// If we can fill out every corner cases.

	// This is not what we are looking for to build a larger complex language, but it should give a sufficient
	// small working example of how things are done without mutating our list in place.


	/*
	expr ::= factor exprSub
	exprSub ::= ε \| (+\|-) factor exprSub
	factor ::= term factorSub
	factorSub ::= ε \| (*\|/) term factorSub
	term ::= '(' expr ')' \| literal
	* */

	def parse(tokens: List[Token]): Exp =
	val res = parseExpr(tokens)
	if res._2.isEmpty then
	res._1
	else
	throw IllegalStateException("There are remained tokens: " + res._2.toString)


	def parseExpr(tokens: List[Token]): (Exp, List[Token]) =
	val (factorExp, afterFactor) = parseFactor(tokens)
	if afterFactor.isEmpty then
	(factorExp, afterFactor)
	else
	val (subOpt, afterSub) = parseExprSub(afterFactor)
	subOpt match
	case Some(o, right) => (Binop(o, factorExp, right), afterSub)
	case None => (factorExp, afterFactor)

	def parseExprSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match
	case OperToken(o: Binary) if List(Binary.ADD, Binary.SUB).contains(o) =>
	val (factorExp, after) = parseFactor(tokens.drop(1))
	if after.isEmpty then
	(Some(o, factorExp), after)
	else
	val (nextSubOpt, nextAfter) = parseExprSub(after)
	nextSubOpt match
	case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter)
	case _ => (Some(o, factorExp), after)
	case _ => (None, tokens)

	def parseFactor(tokens: List[Token]): (Exp, List[Token]) =
	val (termExp, afterTerm) = parseTerm(tokens)
	if afterTerm.isEmpty then
	(termExp, afterTerm)
	else
	val (subOpt, afterSub) = parseFactorSub(afterTerm)
	subOpt match
	case Some(o, right) => (Binop(o, termExp, right), afterSub)
	case None => (termExp, afterTerm)

	def parseFactorSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match
	case OperToken(o: Binary) if List(Binary.MULT, Binary.DIV).contains(o) =>
	val (termExp, after) = parseTerm(tokens.drop(1))
	if after.isEmpty then
	(Some(o, termExp), after)
	else
	val (nextSubOpt, nextAfter) = parseFactorSub(after)
	nextSubOpt match
	case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, termExp, rightExp)), nextAfter)
	case _ => (Some(o, termExp), after)
	case _ => (None, tokens)

	def parseTerm(tokens: List[Token]): (Exp, List[Token]) = tokens.head match
	case LeftParenToken =>
	val (exprExp, afterExpr) = parseExpr(tokens.drop(1))
	if afterExpr.isEmpty then
	throw IllegalStateException("Expect a right parenthesis")
	afterExpr.head match
	case RightParenToken =>
	(exprExp, afterExpr.tail)
	case _ => throw IllegalStateException("Unmatched left parenthesis")
	case _ => parseLiteral(tokens)

	def parseLiteral(tokens: List[Token]) = parseInt(tokens)


	@main
	def main(): Unit =
	val text =
	"""
	3 + 4 * 6 + 7 / 6
	""".stripMargin

	// This also works: 3 + 4 * (6 + 7 * 2) / 6
	// This will raise an error: 3 + 4 * ( 6 + 7
	// This will also raise an error: 3 + 4 * (6 + ( or 3 + 4 * ) (
	// And this: 3 + 4 + + + *

	val tokens = tokenize(text)
	val res = parse(tokens)

	println(res)
	val OPERATORS = Map(
	1 -> List(Binary.ADD, Binary.SUB),
	2 -> List(Binary.MULT, Binary.DIV),
	)

	// Level indicate the priority of an operator, which is and index to a list of symbols


	def parse(tokens: List[Token]): Exp =
	val res = parseExprOfLevel(1, tokens)
	if res._2.isEmpty then
	res._1
	else
	throw IllegalStateException("There are remained tokens: " + res._2.toString)


	def parseExprOfLevel(level: Int, tokens: List[Token]): (Exp, List[Token]) =
	val (factorExp, afterFactor) =
	if level < 2 then parseExprOfLevel(level + 1, tokens) else parseTerm(tokens)
	if afterFactor.isEmpty then
	(factorExp, afterFactor)
	else
	val (subOpt, afterSub) = parseExprSubOfLevel(level, afterFactor)
	subOpt match
	case Some(o, right) => (Binop(o, factorExp, right), afterSub)
	case None => (factorExp, afterFactor)

	def parseExprSubOfLevel(level: Int, tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) =
	if tokens.isEmpty then
	throw IllegalStateException("expect a complement of expr")
	else tokens.head match
	case OperToken(o: Binary) if OPERATORS(level).contains(o) =>
	val (factorExp, after) = if level < 2 then parseExprOfLevel(level + 1, tokens.drop(1)) else parseTerm(tokens.drop(1))
	if after.isEmpty then
	(Some(o, factorExp), after)
	else
	val (nextSubOpt, nextAfter) = parseExprSubOfLevel(level, after)
	nextSubOpt match
	case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter)
	case _ => (Some(o, factorExp), after)
	case _ => (None, tokens)
	package sig

	trait Op

	enum Binary extends Op {
	case ADD, SUB, MULT, DIV, MOD
	}

	sealed class Exp

	case class I(value: Int) extends Exp
	case class Binop(op: Binary, left: Exp, right: Exp) extends Exp
	// other case class omitted
	package parser

	sealed trait Token
	case class IntToken(value: Int) extends Token
	case class OperToken(op: Op) extends Token
	case object LeftParenToken extends Token
	case object RightParenToken extends Token
	// other case class omitted