Last active
January 8, 2016 22:55
-
-
Save RaasAhsan/07b830455e2ca4c615fa to your computer and use it in GitHub Desktop.
A tiny parser combinator library that supports sequencing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ch6 | |
import scala.util.matching.Regex | |
object ParserTesting { | |
sealed trait ParseResult[+A, +F] | |
case class ParseSuccess[A](parsed: A, unparsed: String) extends ParseResult[A, Nothing] | |
case class ParseFailure[F](error: F) extends ParseResult[Nothing, F] | |
case class Parser[+A, F](parse: String => ParseResult[A, F]) { | |
def map[B](f: A => B): Parser[B, F] = Parser({ s: String => | |
parse(s) match { | |
case ParseSuccess(a, b) => ParseSuccess(f(a), b) | |
case ParseFailure(e) => ParseFailure(e) | |
} | |
}) | |
def flatMap[B](f: A => Parser[B, F]): Parser[B, F] = Parser({ s: String => | |
parse(s) match { | |
case ParseSuccess(a, b) => f(a).parse(b) | |
case ParseFailure(e) => ParseFailure(e) | |
} | |
}) | |
def <*>[B](p: Parser[A => B, F]): Parser[B, F] = Parser.seq(p, this).map { | |
case (f, a) => f(a) | |
} | |
} | |
object Parser { | |
def success(): Parser[Unit, String] = Parser({ s: String => | |
ParseSuccess(Unit, s) | |
}) | |
def string(a: String): Parser[String, String] = Parser({ s: String => | |
if (s.startsWith(a)) ParseSuccess(a, s.substring(a.length)) | |
else ParseFailure(s"Expected `${a}`. Found `${s}`") | |
}) | |
def regexp(r: String): Parser[String, String] = Parser({ s: String => | |
val regex = s"^${r}".r | |
regex.findFirstIn(s) match { | |
case Some(ret) => ParseSuccess(ret, s.substring(ret.length)) | |
case None => ParseFailure(s"Expected `${r}`.") | |
} | |
}) | |
def eof(): Parser[Unit, String] = Parser({ s: String => | |
if (s.length > 0) ParseFailure(s"Expected end of string.") | |
else ParseSuccess(Unit, "") | |
}) | |
def many[A](p: Parser[A, String]): Parser[List[A], String] = { | |
Parser.or(Parser.seq(p, many(p)).map{ case (a, l) => a :: l }, Parser.success().map(_ => Nil)) | |
} | |
def seq[A, B, F](a: Parser[A, F], b: => Parser[B, F]): Parser[(A, B), F] = { | |
for { | |
x <- a | |
y <- b | |
} yield (x, y) | |
} | |
def or[P, A <: P, B <: P](a: Parser[A, String], b: => Parser[B, String]): Parser[P, String] = Parser({ s: String => | |
a.parse(s) match { | |
case ParseSuccess(u, v) => ParseSuccess(u, v) | |
case _ => b.parse(s) match { | |
case ParseSuccess(u, v) => ParseSuccess(u, v) | |
case _ => ParseFailure("Expected a or b.") | |
} | |
} | |
}) | |
} | |
sealed trait XmlNode | |
case class XmlElement(name: String, children: List[XmlNode]) extends XmlNode | |
case class XmlString(value: String) extends XmlNode | |
object XmlParser { | |
def xmlName: Parser[String, String] = Parser.regexp("[A-Za-z]+") | |
def xmlString: Parser[XmlString, String] = Parser.regexp("[A-Za-z0-9]+").map(XmlString) | |
def xmlElement: Parser[XmlElement, String] = for { | |
_ <- Parser.string("<") | |
name <- xmlName | |
_ <- Parser.string(">") | |
xmlBody <- xmlBody | |
_ <- Parser.string("</") | |
endName <- xmlName | |
_ <- Parser.string(">") | |
} yield XmlElement(name, xmlBody) | |
def xmlBody: Parser[List[XmlNode], String] = Parser.many(Parser.or( | |
xmlElement, | |
xmlString | |
)) | |
} | |
def main(args: Array[String]) { | |
// This is essentially sequencing | |
// val a = for { | |
// x <- Parser.string("a") | |
// y <- Parser.string("b") | |
// z <- Parser.or(Parser.string("c"), Parser.string("d")) | |
// v <- Parser.regexp("[0-9]") | |
// _ <- Parser.eof() | |
// } yield x + y + z + v | |
// | |
// println(a.parse("abc0")) // ParseSuccess(abc0,1) | |
// println(a.parse("abd1")) // ParseSuccess(abd1,) | |
// println(a.parse("abe")) // ParseFailure(bad error message imo) | |
println(XmlParser.xmlElement.parse("<div><span>abc<p>stuff</p></span></div>")) | |
//ParseSuccess(XmlElement(div,List(XmlElement(span,List(XmlString(abc), XmlElement(p,List(XmlString(stuff))))))),) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment