Skip to content

Instantly share code, notes, and snippets.

@phenan
Created March 9, 2016 01:20
Show Gist options
  • Save phenan/dca37c0fd0fa575f67d8 to your computer and use it in GitHub Desktop.
Save phenan/dca37c0fd0fa575f67d8 to your computer and use it in GitHub Desktop.
Bottom-up packrat parser combinator supporting left recursion
package lldp
import scala.collection.immutable.PagedSeq
import scala.language.higherKinds
import scala.language.implicitConversions
import scala.language.reflectiveCalls
/**
* Created by ichikawa on 2016/03/07.
*/
trait Parsers [E, S] {
def start: PackratParser[E, S]
def parse (in: Reader[E]): ParseResult[E, S] = Parsers.lookup(in, runParsers(in), start)
def elem (e: E): Parser[E, E] = new TerminalParser(e.toString, _ == e)
def elem (kind: String, f: E => Boolean): Parser[E, E] = new TerminalParser(kind, f)
def success [R] (value: R): Parser[E, R] = new PureParser(value)
implicit def memoized [R] (parser: => Parser[E, R]): PackratParser[E, R] = new PackratParser[E, R](parser)
private lazy val packratParsers: List[PackratParser[E, _]] = collectParsers(List(start), Set.empty, Nil)
private def collectParsers (unchecked: List[Parser[E, _]], checked: Set[Parser[E, _]], parsers: List[PackratParser[E, _]]): List[PackratParser[E, _]] = unchecked match {
case p :: ps if checked.contains(p) => collectParsers(ps, checked, parsers)
case (p: PackratParser[E, _]) :: ps => collectParsers(ps :+ p.parser, checked + p, p :: parsers)
case p :: ps => collectParsers(ps ++ p.children, checked + p, parsers)
case Nil => parsers
}
private def runParsers (in: Reader[E]): Parsers.ResultTable[E] = runParsers(in, identity[Parsers.ResultTable[E]])
private def runParsers (in: Reader[E], cnt: Parsers.ResultTable[E] => Parsers.ResultTable[E]): Parsers.ResultTable[E] = {
if (in.hasNext) runParsers(in.tail, table => cnt(table + (in -> runParsers(in, table))))
else cnt(Map(in -> failResults))
}
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E]): Parsers.ParseResults[E] = {
runParsers(in, table + (in -> failResults), failResults)
}
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E], results: Parsers.ParseResults[E]): Parsers.ParseResults[E] = {
val newResults = runParsers(in, table, packratParsers, results)
if (newResults != results) runParsers(in, table + (in -> newResults), newResults)
else results
}
private def runParsers (in: Reader[E], table: Parsers.ResultTable[E], parsers: List[PackratParser[E, _]], results: Parsers.ParseResults[E]): Parsers.ParseResults[E] = parsers match {
case p :: rest =>
val result = p.parser.runParser(in, table)
if (result.isBetterThan(Parsers.lookup(in, table, p))) runParsers(in, table, rest, results + (p -> result))
else runParsers(in, table, rest, results)
case Nil => results
}
private lazy val failResults: Parsers.ParseResults[E] = packratParsers.foldLeft[Parsers.ParseResults[E]] (Parsers.emptyResults[E]) {
(pm, p) => pm + (p -> ParseFailure[E]("parse failure"))
}
}
object Parsers {
protected[lldp] type ParseResults[E] = PMap[({ type Key[T] = PackratParser[E, T] })#Key, ({ type Value[T] = ParseResult[E, T] })#Value]
protected[lldp] type ResultTable[E] = Map[Reader[E], ParseResults[E]]
protected[lldp] def emptyResults[E]: ParseResults[E] = PMap.empty[({ type Key[T] = PackratParser[E, T] })#Key, ({ type Value[T] = ParseResult[E, T] })#Value]
protected[lldp] def lookup [E, R] (in: Reader[E], table: Parsers.ResultTable[E], parser: PackratParser[E, R]): ParseResult[E, R] = {
table.getOrElse(in, emptyResults[E]).getOrElse(parser, ParseFailure("parse failure"))
}
}
trait Reader [E] {
def head: Option[E]
def tail: Reader[E]
def hasNext: Boolean = head.nonEmpty
def >= (that: Reader[E]): Boolean = {
if (this == that) true
else if (! that.hasNext) false
else >= (that.tail)
}
def <= (that: Reader[E]): Boolean = that >= this
def > (that: Reader[E]): Boolean = ! (this <= that)
def < (that: Reader[E]): Boolean = ! (this >= that)
}
case class PagedSeqReader [E] (seq: PagedSeq[E], index: Int) extends Reader[E] {
def head: Option[E] = {
if (seq.isDefinedAt(index)) Some(seq(index))
else None
}
def tail: Reader[E] = {
if (seq.isDefinedAt(index)) PagedSeqReader(seq, index + 1)
else this
}
override def >= (that: Reader[E]): Boolean = that match {
case PagedSeqReader(s, ind) if seq == s => index >= ind
case _ => super.>=(that)
}
}
case class StringReader (seq: String, index: Int) extends Reader[Char] {
def head: Option[Char] = {
if (seq.isDefinedAt(index)) Some(seq.charAt(index))
else None
}
def tail: Reader[Char] = {
if (seq.isDefinedAt(index)) StringReader(seq, index + 1)
else this
}
override def >= (that: Reader[Char]): Boolean = that match {
case StringReader(s, ind) if seq == s => index >= ind
case _ => super.>=(that)
}
}
sealed trait Parser [E, +R] {
def ~ [U] (that: => Parser[E, U]): Parser[E, (R, U)] = new SequentialParser(this, that, (r1: R, r2: U) => (r1, r2))
def ~> [U] (that: => Parser[E, U]): Parser[E, U] = new SequentialParser(this, that, (_: R, r2: U) => r2)
def <~ [U] (that: => Parser[E, U]): Parser[E, R] = new SequentialParser[E, R, U, R](this, that, (r1, _) => r1)
def | [U >: R] (that: Parser[E, U]): Parser[E, U] = new ParallelParser[E, R, U, U](this, that, (r1, r2) => r1.orElse(r2))
def ||| [U >: R] (that: Parser[E, U]): Parser[E, U] = new ParallelParser[E, R, U, U](this, that, (r1, r2) => r1.best(r2))
def ^^ [U] (f: R => U): Parser[E, U] = new MapParser(this, f)
def * : Parser[E, List[R]] = this.+ | new PureParser(Nil)
def + : Parser[E, List[R]] = {
lazy val rep: Parser[E, List[R]] = this ~ rep ^^ { case (x, xs) => x :: xs } | this ^^ { List(_) }
rep
}
protected[lldp] def children: List[Parser[E, _]]
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R]
}
class PureParser [E, +R] (value: R) extends Parser[E, R] {
protected[lldp] def children: List[Parser[E, _]] = Nil
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = ParseSuccess(value, in)
}
class TerminalParser [E] (kind: String, f: E => Boolean) extends Parser[E, E] {
protected[lldp] def children: List[Parser[E, _]] = Nil
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, E] = {
in.head.filter(f).fold[ParseResult[E, E]](ParseFailure("fail to parse terminal " + kind))(e => ParseSuccess(e, in.tail))
}
}
class SequentialParser [E, R1, R2, +R] (leftParser: => Parser[E, R1], rightParser: => Parser[E, R2], f: (R1, R2) => R) extends Parser[E, R] {
lazy val left = leftParser
lazy val right = rightParser
protected[lldp] def children: List[Parser[E, _]] = List(left, right)
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = {
left.runParser(in, table).andThen((in2, r1) => right.runParser(in2, table).map(r2 => f(r1, r2)))
}
}
class ParallelParser [E, R1, R2, +R] (leftParser: => Parser[E, R1], rightParser: => Parser[E, R2], f: (ParseResult[E, R1], ParseResult[E, R2]) => ParseResult[E, R]) extends Parser[E, R] {
lazy val left = leftParser
lazy val right = rightParser
protected[lldp] def children: List[Parser[E, _]] = List(left, right)
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = {
f(left.runParser(in, table), right.runParser(in, table))
}
}
class MapParser [E, T, R] (thunk: => Parser[E, T], f: T => R) extends Parser[E, R] {
lazy val parser = thunk
protected[lldp] def children: List[Parser[E, _]] = List(parser)
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = parser.runParser(in, table).map(f)
}
class PackratParser [E, +R] (thunk: => Parser[E, R]) extends Parser[E, R] {
lazy val parser = thunk
protected[lldp] def children: List[Parser[E, _]] = List(parser)
protected[lldp] def runParser (in: Reader[E], table: Parsers.ResultTable[E]): ParseResult[E, R] = Parsers.lookup(in, table, this)
}
sealed trait ParseResult [E, +R] {
def map [U] (f: R => U): ParseResult[E, U]
def andThen [U] (f: (Reader[E], R) => ParseResult[E, U]): ParseResult[E, U]
def isBetterThan [U >: R] (that: ParseResult[E, U]): Boolean
def best [U >: R] (that: ParseResult[E, U]): ParseResult[E, U] = {
if (isBetterThan(that)) this
else that
}
def orElse [U >: R] (that: ParseResult[E, U]): ParseResult[E, U]
}
case class ParseSuccess [E, +R] (value: R, in: Reader[E]) extends ParseResult[E, R] {
def map [U] (f: R => U): ParseResult[E, U] = ParseSuccess(f(value), in)
def andThen[U] (f: (Reader[E], R) => ParseResult[E, U]): ParseResult[E, U] = f(in, value)
def isBetterThan [U >: R] (that: ParseResult[E, U]): Boolean = that match {
case ParseSuccess(u, in2) => in > in2
case ParseFailure(_) => true
}
def orElse [U >: R] (that: ParseResult[E, U]): ParseResult[E, U] = this
}
case class ParseFailure [E] (msg: String) extends ParseResult[E, Nothing] {
def map [U] (f: Nothing => U): ParseResult[E, U] = this
def andThen [U] (f: (Reader[E], Nothing) => ParseResult[E, U]): ParseResult[E, U] = this
def isBetterThan[U >: Nothing](that: ParseResult[E, U]): Boolean = false
def orElse [U] (that: ParseResult[E, U]): ParseResult[E, U] = that
}
trait PMap [K[_], V[_]] {
def get [T] (key: K[T]): Option[V[T]]
def + [T] (kv: (K[T], V[T])): PMap[K, V]
def getOrElse [T] (key: K[T], default: => V[T]): V[T]
}
object PMap {
def empty[K[_], V[_]]: PMap[K, V] = new PMapImpl[K, V](Map.empty)
private case class PMapImpl [K[_], V[_]](map: Map[K[_], V[_]]) extends PMap[K, V] {
def get[T](key: K[T]): Option[V[T]] = map.get(key).map(_.asInstanceOf[V[T]])
def +[T](kv: (K[T], V[T])): PMap[K, V] = new PMapImpl[K, V](map + kv)
def getOrElse [T] (key: K[T], default: => V[T]): V[T] = map.getOrElse(key, default).asInstanceOf[V[T]]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment