Last active
April 15, 2024 20:57
-
-
Save kokoro-aya/218ce6b3ee050fbdeee072730d380b6c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package parser | |
import lexer.{LeftParenToken, RightParenToken, OperToken, Token, tokenize} | |
import sig.{Binary, Binop, Exp} | |
// A simple example to build a recursive descent parser in Scala, | |
// in a rather imperative coding style | |
// Mixing with the FP paradigm, this imperative coding style lead to very verbose code with lots of | |
// nested pattern matching and if-else guards. But at least the code should works well. | |
// If we can fill out every corner cases. | |
// This is not what we are looking for to build a larger complex language, but it should give a sufficient | |
// small working example of how things are done without mutating our list in place. | |
/* | |
expr ::= factor exprSub | |
exprSub ::= ε | (+|-) factor exprSub | |
factor ::= term factorSub | |
factorSub ::= ε | (*|/) term factorSub | |
term ::= '(' expr ')' | literal | |
* */ | |
def parse(tokens: List[Token]): Exp = | |
val res = parseExpr(tokens) | |
if res._2.isEmpty then | |
res._1 | |
else | |
throw IllegalStateException("There are remained tokens: " + res._2.toString) | |
def parseExpr(tokens: List[Token]): (Exp, List[Token]) = | |
val (factorExp, afterFactor) = parseFactor(tokens) | |
if afterFactor.isEmpty then | |
(factorExp, afterFactor) | |
else | |
val (subOpt, afterSub) = parseExprSub(afterFactor) | |
subOpt match | |
case Some(o, right) => (Binop(o, factorExp, right), afterSub) | |
case None => (factorExp, afterFactor) | |
def parseExprSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match | |
case OperToken(o: Binary) if List(Binary.ADD, Binary.SUB).contains(o) => | |
val (factorExp, after) = parseFactor(tokens.drop(1)) | |
if after.isEmpty then | |
(Some(o, factorExp), after) | |
else | |
val (nextSubOpt, nextAfter) = parseExprSub(after) | |
nextSubOpt match | |
case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter) | |
case _ => (Some(o, factorExp), after) | |
case _ => (None, tokens) | |
def parseFactor(tokens: List[Token]): (Exp, List[Token]) = | |
val (termExp, afterTerm) = parseTerm(tokens) | |
if afterTerm.isEmpty then | |
(termExp, afterTerm) | |
else | |
val (subOpt, afterSub) = parseFactorSub(afterTerm) | |
subOpt match | |
case Some(o, right) => (Binop(o, termExp, right), afterSub) | |
case None => (termExp, afterTerm) | |
def parseFactorSub(tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = tokens.head match | |
case OperToken(o: Binary) if List(Binary.MULT, Binary.DIV).contains(o) => | |
val (termExp, after) = parseTerm(tokens.drop(1)) | |
if after.isEmpty then | |
(Some(o, termExp), after) | |
else | |
val (nextSubOpt, nextAfter) = parseFactorSub(after) | |
nextSubOpt match | |
case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, termExp, rightExp)), nextAfter) | |
case _ => (Some(o, termExp), after) | |
case _ => (None, tokens) | |
def parseTerm(tokens: List[Token]): (Exp, List[Token]) = tokens.head match | |
case LeftParenToken => | |
val (exprExp, afterExpr) = parseExpr(tokens.drop(1)) | |
if afterExpr.isEmpty then | |
throw IllegalStateException("Expect a right parenthesis") | |
afterExpr.head match | |
case RightParenToken => | |
(exprExp, afterExpr.tail) | |
case _ => throw IllegalStateException("Unmatched left parenthesis") | |
case _ => parseLiteral(tokens) | |
def parseLiteral(tokens: List[Token]) = parseInt(tokens) | |
@main | |
def main(): Unit = | |
val text = | |
""" | |
3 + 4 * 6 + 7 / 6 | |
""".stripMargin | |
// This also works: 3 + 4 * (6 + 7 * 2) / 6 | |
// This will raise an error: 3 + 4 * ( 6 + 7 | |
// This will also raise an error: 3 + 4 * (6 + ( or 3 + 4 * ) ( | |
// And this: 3 + 4 + + + * | |
val tokens = tokenize(text) | |
val res = parse(tokens) | |
println(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val OPERATORS = Map( | |
1 -> List(Binary.ADD, Binary.SUB), | |
2 -> List(Binary.MULT, Binary.DIV), | |
) | |
// Level indicate the priority of an operator, which is and index to a list of symbols | |
def parse(tokens: List[Token]): Exp = | |
val res = parseExprOfLevel(1, tokens) | |
if res._2.isEmpty then | |
res._1 | |
else | |
throw IllegalStateException("There are remained tokens: " + res._2.toString) | |
def parseExprOfLevel(level: Int, tokens: List[Token]): (Exp, List[Token]) = | |
val (factorExp, afterFactor) = | |
if level < 2 then parseExprOfLevel(level + 1, tokens) else parseTerm(tokens) | |
if afterFactor.isEmpty then | |
(factorExp, afterFactor) | |
else | |
val (subOpt, afterSub) = parseExprSubOfLevel(level, afterFactor) | |
subOpt match | |
case Some(o, right) => (Binop(o, factorExp, right), afterSub) | |
case None => (factorExp, afterFactor) | |
def parseExprSubOfLevel(level: Int, tokens: List[Token]): (Option[(Binary, Exp)], List[Token]) = | |
if tokens.isEmpty then | |
throw IllegalStateException("expect a complement of expr") | |
else tokens.head match | |
case OperToken(o: Binary) if OPERATORS(level).contains(o) => | |
val (factorExp, after) = if level < 2 then parseExprOfLevel(level + 1, tokens.drop(1)) else parseTerm(tokens.drop(1)) | |
if after.isEmpty then | |
(Some(o, factorExp), after) | |
else | |
val (nextSubOpt, nextAfter) = parseExprSubOfLevel(level, after) | |
nextSubOpt match | |
case Some(innerOp, rightExp) => (Some(o, Binop(innerOp, factorExp, rightExp)), nextAfter) | |
case _ => (Some(o, factorExp), after) | |
case _ => (None, tokens) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package sig | |
trait Op | |
enum Binary extends Op { | |
case ADD, SUB, MULT, DIV, MOD | |
} | |
sealed class Exp | |
case class I(value: Int) extends Exp | |
case class Binop(op: Binary, left: Exp, right: Exp) extends Exp | |
// other case class omitted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package parser | |
sealed trait Token | |
case class IntToken(value: Int) extends Token | |
case class OperToken(op: Op) extends Token | |
case object LeftParenToken extends Token | |
case object RightParenToken extends Token | |
// other case class omitted |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment