Created
March 9, 2016 01:20
-
-
Save phenan/dca37c0fd0fa575f67d8 to your computer and use it in GitHub Desktop.
Bottom-up packrat parser combinator supporting left recursion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package lldp | |
import scala.collection.immutable.PagedSeq | |
import scala.language.higherKinds | |
import scala.language.implicitConversions | |
import scala.language.reflectiveCalls | |
/** | |
* Created by ichikawa on 2016/03/07. | |
*/ | |
trait Parsers [E, S] { | |
def start: PackratParser[E, S] | |
def parse (in: Reader[E]): ParseResult[E, S] = Parsers.lookup(in, runParsers(in), start) | |
def elem (e: E): Parser[E, E] = new TerminalParser(e.toString, _ == e) | |
def elem (kind: String, f: E => Boolean): Parser[E, E] = new TerminalParser(kind, f) | |
def success [R] (value: R): Parser[E, R] = new PureParser(value) | |
implicit def memoized [R] (parser: => Parser[E, R]): PackratParser[E, R] = new PackratParser[E, R](parser) | |
private lazy val packratParsers: List[PackratParser[E, _]] = collectParsers(List(start), Set.empty, Nil) | |
private def collectParsers (unchecked: List[Parser[E, _]], checked: Set[Parser[E, _]], parsers: List[PackratParser[E, _]]): List[PackratParser[E, _]] = unchecked match { | |
case p :: ps if checked.contains(p) => collectParsers(ps, checked, parsers) | |
case (p: PackratParser[E, _]) :: ps => collectParsers(ps :+ p.parser, checked + p, p :: parsers) | |
case p :: ps => collectParsers(ps ++ p.children, checked + p, parsers) | |
case Nil => parsers | |
} | |
private def runParsers (in: Reader[E]): Parsers.ResultTable[E] = runParsers(in, identity[Parsers.ResultTable[E]]) | |
private def runParsers (in: Reader[E], cnt: Parsers.ResultTable[E] => Parsers.ResultTable[E]): Parsers.ResultTable[E] = { | |
if (in.hasNext) runParsers(in.tail, table => cnt(table + (in -> runParsers(in, table)))) | |
else cnt(Map(in -> failResults)) | |
} | |
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E]): Parsers.ParseResults[E] = { | |
runParsers(in, table + (in -> failResults), failResults) | |
} | |
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E], results: Parsers.ParseResults[E]): Parsers.ParseResults[E] = { | |
val newResults = runParsers(in, table, packratParsers, results) | |
if (newResults != results) runParsers(in, table + (in -> newResults), newResults) | |
else results | |
} | |
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E], parsers: List[PackratParser[E, _]], results: Parsers.ParseResults[E]): Parsers.ParseResults[E] = parsers match { | |
case p :: rest => | |
val result = p.parser.runParser(in, table) | |
if (result.isBetterThan(Parsers.lookup(in, table, p))) runParsers(in, table, rest, results + (p -> result)) | |
else runParsers(in, table, rest, results) | |
case Nil => results | |
} | |
private lazy val failResults: Parsers.ParseResults[E] = packratParsers.foldLeft[Parsers.ParseResults[E]] (Parsers.emptyResults[E]) { | |
(pm, p) => pm + (p -> ParseFailure[E]("parse failure")) | |
} | |
} | |
object Parsers { | |
protected[lldp] type ParseResults[E] = PMap[({ type Key[T] = PackratParser[E, T] })#Key, ({ type Value[T] = ParseResult[E, T] })#Value] | |
protected[lldp] type ResultTable[E] = Map[Reader[E], ParseResults[E]] | |
protected[lldp] def emptyResults[E]: ParseResults[E] = PMap.empty[({ type Key[T] = PackratParser[E, T] })#Key, ({ type Value[T] = ParseResult[E, T] })#Value] | |
protected[lldp] def lookup [E, R] (in: Reader[E], table: Parsers.ResultTable[E], parser: PackratParser[E, R]): ParseResult[E, R] = { | |
table.getOrElse(in, emptyResults[E]).getOrElse(parser, ParseFailure("parse failure")) | |
} | |
} | |
trait Reader [E] { | |
def head: Option[E] | |
def tail: Reader[E] | |
def hasNext: Boolean = head.nonEmpty | |
def >= (that: Reader[E]): Boolean = { | |
if (this == that) true | |
else if (! that.hasNext) false | |
else >= (that.tail) | |
} | |
def <= (that: Reader[E]): Boolean = that >= this | |
def > (that: Reader[E]): Boolean = ! (this <= that) | |
def < (that: Reader[E]): Boolean = ! (this >= that) | |
} | |
case class PagedSeqReader [E] (seq: PagedSeq[E], index: Int) extends Reader[E] { | |
def head: Option[E] = { | |
if (seq.isDefinedAt(index)) Some(seq(index)) | |
else None | |
} | |
def tail: Reader[E] = { | |
if (seq.isDefinedAt(index)) PagedSeqReader(seq, index + 1) | |
else this | |
} | |
override def >= (that: Reader[E]): Boolean = that match { | |
case PagedSeqReader(s, ind) if seq == s => index >= ind | |
case _ => super.>=(that) | |
} | |
} | |
case class StringReader (seq: String, index: Int) extends Reader[Char] { | |
def head: Option[Char] = { | |
if (seq.isDefinedAt(index)) Some(seq.charAt(index)) | |
else None | |
} | |
def tail: Reader[Char] = { | |
if (seq.isDefinedAt(index)) StringReader(seq, index + 1) | |
else this | |
} | |
override def >= (that: Reader[Char]): Boolean = that match { | |
case StringReader(s, ind) if seq == s => index >= ind | |
case _ => super.>=(that) | |
} | |
} | |
sealed trait Parser [E, +R] { | |
def ~ [U] (that: => Parser[E, U]): Parser[E, (R, U)] = new SequentialParser(this, that, (r1: R, r2: U) => (r1, r2)) | |
def ~> [U] (that: => Parser[E, U]): Parser[E, U] = new SequentialParser(this, that, (_: R, r2: U) => r2) | |
def <~ [U] (that: => Parser[E, U]): Parser[E, R] = new SequentialParser[E, R, U, R](this, that, (r1, _) => r1) | |
def | [U >: R] (that: Parser[E, U]): Parser[E, U] = new ParallelParser[E, R, U, U](this, that, (r1, r2) => r1.orElse(r2)) | |
def ||| [U >: R] (that: Parser[E, U]): Parser[E, U] = new ParallelParser[E, R, U, U](this, that, (r1, r2) => r1.best(r2)) | |
def ^^ [U] (f: R => U): Parser[E, U] = new MapParser(this, f) | |
def * : Parser[E, List[R]] = this.+ | new PureParser(Nil) | |
def + : Parser[E, List[R]] = { | |
lazy val rep: Parser[E, List[R]] = this ~ rep ^^ { case (x, xs) => x :: xs } | this ^^ { List(_) } | |
rep | |
} | |
protected[lldp] def children: List[Parser[E, _]] | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] | |
} | |
class PureParser [E, +R] (value: R) extends Parser[E, R] { | |
protected[lldp] def children: List[Parser[E, _]] = Nil | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = ParseSuccess(value, in) | |
} | |
class TerminalParser [E] (kind: String, f: E => Boolean) extends Parser[E, E] { | |
protected[lldp] def children: List[Parser[E, _]] = Nil | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, E] = { | |
in.head.filter(f).fold[ParseResult[E, E]](ParseFailure("fail to parse terminal " + kind))(e => ParseSuccess(e, in.tail)) | |
} | |
} | |
class SequentialParser [E, R1, R2, +R] (leftParser: => Parser[E, R1], rightParser: => Parser[E, R2], f: (R1, R2) => R) extends Parser[E, R] { | |
lazy val left = leftParser | |
lazy val right = rightParser | |
protected[lldp] def children: List[Parser[E, _]] = List(left, right) | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = { | |
left.runParser(in, table).andThen((in2, r1) => right.runParser(in2, table).map(r2 => f(r1, r2))) | |
} | |
} | |
class ParallelParser [E, R1, R2, +R] (leftParser: => Parser[E, R1], rightParser: => Parser[E, R2], f: (ParseResult[E, R1], ParseResult[E, R2]) => ParseResult[E, R]) extends Parser[E, R] { | |
lazy val left = leftParser | |
lazy val right = rightParser | |
protected[lldp] def children: List[Parser[E, _]] = List(left, right) | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = { | |
f(left.runParser(in, table), right.runParser(in, table)) | |
} | |
} | |
class MapParser [E, T, R] (thunk: => Parser[E, T], f: T => R) extends Parser[E, R] { | |
lazy val parser = thunk | |
protected[lldp] def children: List[Parser[E, _]] = List(parser) | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = parser.runParser(in, table).map(f) | |
} | |
class PackratParser [E, +R] (thunk: => Parser[E, R]) extends Parser[E, R] { | |
lazy val parser = thunk | |
protected[lldp] def children: List[Parser[E, _]] = List(parser) | |
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = Parsers.lookup(in, table, this) | |
} | |
sealed trait ParseResult [E, +R] { | |
def map [U] (f: R => U): ParseResult[E, U] | |
def andThen [U] (f: (Reader[E], R) => ParseResult[E, U]): ParseResult[E, U] | |
def isBetterThan [U >: R] (that: ParseResult[E, U]): Boolean | |
def best [U >: R] (that: ParseResult[E, U]): ParseResult[E, U] = { | |
if (isBetterThan(that)) this | |
else that | |
} | |
def orElse [U >: R] (that: ParseResult[E, U]): ParseResult[E, U] | |
} | |
case class ParseSuccess [E, +R] (value: R, in: Reader[E]) extends ParseResult[E, R] { | |
def map [U] (f: R => U): ParseResult[E, U] = ParseSuccess(f(value), in) | |
def andThen[U] (f: (Reader[E], R) => ParseResult[E, U]): ParseResult[E, U] = f(in, value) | |
def isBetterThan [U >: R] (that: ParseResult[E, U]): Boolean = that match { | |
case ParseSuccess(u, in2) => in > in2 | |
case ParseFailure(_) => true | |
} | |
def orElse [U >: R] (that: ParseResult[E, U]): ParseResult[E, U] = this | |
} | |
case class ParseFailure [E] (msg: String) extends ParseResult[E, Nothing] { | |
def map [U] (f: Nothing => U): ParseResult[E, U] = this | |
def andThen [U] (f: (Reader[E], Nothing) => ParseResult[E, U]): ParseResult[E, U] = this | |
def isBetterThan[U >: Nothing](that: ParseResult[E, U]): Boolean = false | |
def orElse [U] (that: ParseResult[E, U]): ParseResult[E, U] = that | |
} | |
trait PMap [K[_], V[_]] { | |
def get [T] (key: K[T]): Option[V[T]] | |
def + [T] (kv: (K[T], V[T])): PMap[K, V] | |
def getOrElse [T] (key: K[T], default: => V[T]): V[T] | |
} | |
object PMap { | |
def empty[K[_], V[_]]: PMap[K, V] = new PMapImpl[K, V](Map.empty) | |
private case class PMapImpl [K[_], V[_]](map: Map[K[_], V[_]]) extends PMap[K, V] { | |
def get[T](key: K[T]): Option[V[T]] = map.get(key).map(_.asInstanceOf[V[T]]) | |
def +[T](kv: (K[T], V[T])): PMap[K, V] = new PMapImpl[K, V](map + kv) | |
def getOrElse [T] (key: K[T], default: => V[T]): V[T] = map.getOrElse(key, default).asInstanceOf[V[T]] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment