Skip to content

Instantly share code, notes, and snippets.

@RaasAhsan
Last active January 8, 2016 22:55
Show Gist options
  • Save RaasAhsan/07b830455e2ca4c615fa to your computer and use it in GitHub Desktop.
Save RaasAhsan/07b830455e2ca4c615fa to your computer and use it in GitHub Desktop.
A tiny parser combinator library that supports sequencing.
package ch6
import scala.util.matching.Regex
object ParserTesting {
sealed trait ParseResult[+A, +F]
case class ParseSuccess[A](parsed: A, unparsed: String) extends ParseResult[A, Nothing]
case class ParseFailure[F](error: F) extends ParseResult[Nothing, F]
case class Parser[+A, F](parse: String => ParseResult[A, F]) {
def map[B](f: A => B): Parser[B, F] = Parser({ s: String =>
parse(s) match {
case ParseSuccess(a, b) => ParseSuccess(f(a), b)
case ParseFailure(e) => ParseFailure(e)
}
})
def flatMap[B](f: A => Parser[B, F]): Parser[B, F] = Parser({ s: String =>
parse(s) match {
case ParseSuccess(a, b) => f(a).parse(b)
case ParseFailure(e) => ParseFailure(e)
}
})
def <*>[B](p: Parser[A => B, F]): Parser[B, F] = Parser.seq(p, this).map {
case (f, a) => f(a)
}
}
object Parser {
def success(): Parser[Unit, String] = Parser({ s: String =>
ParseSuccess(Unit, s)
})
def string(a: String): Parser[String, String] = Parser({ s: String =>
if (s.startsWith(a)) ParseSuccess(a, s.substring(a.length))
else ParseFailure(s"Expected `${a}`. Found `${s}`")
})
def regexp(r: String): Parser[String, String] = Parser({ s: String =>
val regex = s"^${r}".r
regex.findFirstIn(s) match {
case Some(ret) => ParseSuccess(ret, s.substring(ret.length))
case None => ParseFailure(s"Expected `${r}`.")
}
})
def eof(): Parser[Unit, String] = Parser({ s: String =>
if (s.length > 0) ParseFailure(s"Expected end of string.")
else ParseSuccess(Unit, "")
})
def many[A](p: Parser[A, String]): Parser[List[A], String] = {
Parser.or(Parser.seq(p, many(p)).map{ case (a, l) => a :: l }, Parser.success().map(_ => Nil))
}
def seq[A, B, F](a: Parser[A, F], b: => Parser[B, F]): Parser[(A, B), F] = {
for {
x <- a
y <- b
} yield (x, y)
}
def or[P, A <: P, B <: P](a: Parser[A, String], b: => Parser[B, String]): Parser[P, String] = Parser({ s: String =>
a.parse(s) match {
case ParseSuccess(u, v) => ParseSuccess(u, v)
case _ => b.parse(s) match {
case ParseSuccess(u, v) => ParseSuccess(u, v)
case _ => ParseFailure("Expected a or b.")
}
}
})
}
sealed trait XmlNode
case class XmlElement(name: String, children: List[XmlNode]) extends XmlNode
case class XmlString(value: String) extends XmlNode
object XmlParser {
def xmlName: Parser[String, String] = Parser.regexp("[A-Za-z]+")
def xmlString: Parser[XmlString, String] = Parser.regexp("[A-Za-z0-9]+").map(XmlString)
def xmlElement: Parser[XmlElement, String] = for {
_ <- Parser.string("<")
name <- xmlName
_ <- Parser.string(">")
xmlBody <- xmlBody
_ <- Parser.string("</")
endName <- xmlName
_ <- Parser.string(">")
} yield XmlElement(name, xmlBody)
def xmlBody: Parser[List[XmlNode], String] = Parser.many(Parser.or(
xmlElement,
xmlString
))
}
def main(args: Array[String]) {
// This is essentially sequencing
// val a = for {
// x <- Parser.string("a")
// y <- Parser.string("b")
// z <- Parser.or(Parser.string("c"), Parser.string("d"))
// v <- Parser.regexp("[0-9]")
// _ <- Parser.eof()
// } yield x + y + z + v
//
// println(a.parse("abc0")) // ParseSuccess(abc0,1)
// println(a.parse("abd1")) // ParseSuccess(abd1,)
// println(a.parse("abe")) // ParseFailure(bad error message imo)
println(XmlParser.xmlElement.parse("<div><span>abc<p>stuff</p></span></div>"))
//ParseSuccess(XmlElement(div,List(XmlElement(span,List(XmlString(abc), XmlElement(p,List(XmlString(stuff))))))),)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment