Last active
December 31, 2017 15:52
-
-
Save yuk1ty/b6ab0859742a0b00aa833925531bba99 to your computer and use it in GitHub Desktop.
言語実装パターン2章 LL(1)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import exception.ParseException | |
import token._ | |
trait Lexer { | |
protected val input: String | |
protected var p: Int | |
protected var current: Char | |
protected var eof: Boolean = false | |
def nextToken(): Either[Exception, ListToken] | |
protected def consume(): Unit = { | |
p = p + 1 | |
if (p >= input.length) eof = true | |
else current = input.charAt(p) | |
} | |
protected def matching(x: Char): Either[Exception, Unit] = { | |
if (current == x) { | |
consume() | |
Right(()) | |
} else { | |
Left(ParseException()) | |
} | |
} | |
} | |
case class ListLexer(input: String) extends Lexer { | |
override protected var p: Int = 0 | |
override protected var current: Char = input.charAt(p) | |
override def nextToken(): Either[Exception, ListToken] = { | |
while (eof) { | |
current match { | |
case ' ' | '\t' | '\r' | '\n' => whitespace(); | |
case ',' => | |
consume() | |
Right(ListToken(Comma, ",")) | |
case '[' => | |
consume() | |
Right(ListToken(Lbrack, "[")) | |
case ']' => | |
consume() | |
Right(ListToken(Rbrack, "]")) | |
case _ if isLetter => Right(name()) | |
case _ => Left(ParseException()) | |
} | |
} | |
Right(ListToken(EofType, "<EOF>")) | |
} | |
private[this] def whitespace(): Unit = { | |
do { | |
consume() | |
} while (current == ' ' || current == '\t' || current == '\r' || current == '\n') | |
} | |
private[this] def name(): ListToken = { | |
val buf = StringBuilder.newBuilder | |
do { | |
buf.append(current) | |
consume() | |
} while (isLetter) | |
ListToken(Name, buf.toString()) | |
} | |
private[this] def isLetter: Boolean = current.isLetter | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import exception.ParseException | |
import token._ | |
trait Parser { | |
protected val input: Lexer | |
protected var lookahead: Option[ListToken] | |
protected def matching(x: TokenType): Either[Exception, Unit] = { | |
val consumed = for { | |
unwrappedLookahead <- lookahead | |
} yield { | |
if (unwrappedLookahead.tokenType == x) consume() | |
else Left(ParseException()) | |
} | |
consumed match { | |
case Some(inner) => inner | |
case None => Left(ParseException()) | |
} | |
} | |
protected def consume(): Either[Exception, Unit] = { | |
input.nextToken() match { | |
case Right(inner) => | |
lookahead = Some(inner) | |
Right(()) | |
case Left(e) => Left(e) | |
} | |
} | |
} | |
case class ListParser(input: Lexer) extends Parser { | |
override protected var lookahead: Option[ListToken] = None | |
def list(): Either[Exception, Unit] = { | |
matching(Lbrack) | |
.flatMap(_ => elements()) | |
.flatMap(_ => matching(Rbrack)) | |
.getOrElse(Left(_)) | |
} | |
private[this] def elements(): Either[Exception, Unit] = { | |
element() | |
for { | |
unwrappedLookahead <- lookahead | |
} yield { | |
while (unwrappedLookahead.tokenType == Comma) { | |
element() match { | |
case Right(_) => Right(()) | |
case Left(e) => return Left(e) | |
} | |
} | |
} | |
Right(()) | |
} | |
private[this] def element(): Either[Exception, Unit] = { | |
for { | |
unwrappedLookahead <- lookahead | |
} yield { | |
if (unwrappedLookahead.tokenType == Name) Right(matching(Name)) | |
else if (unwrappedLookahead.tokenType == Lbrack) Right(list()) | |
else Left(ParseException()) | |
} | |
Right(()) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package token | |
case class ListToken(`type`: TokenType, text: String) { | |
override def toString: String = s"<'$text', ${`type`.tokenName}>" | |
} | |
trait TokenType { | |
def tokenName: String | |
} | |
case object Name extends TokenType { | |
override def tokenName: String = "NAME" | |
} | |
case object Comma extends TokenType { | |
override def tokenName: String = "COMMA" | |
} | |
case object Lbrack extends TokenType { | |
override def tokenName: String = "LBRACK" | |
} | |
case object Rbrack extends TokenType { | |
override def tokenName: String = "RBRACK" | |
} | |
case object EofType extends TokenType { | |
override def tokenName: String = "EOF" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment