Created
November 25, 2013 13:13
-
-
Save arjunguha/7641003 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cs691f.parsers | |
package attempt0 { | |
class Parser[A](val parse : String => Option[A]) { self => | |
def alt(other : Parser[A]) = | |
new Parser(str => self.parse(str) match { | |
case Some(x) => Some(x) | |
case None => other.parse(str) | |
}) | |
// Sequencing cannot be defined. The parse function needs to return | |
// the remainder of the String. If Strings were mutable, sequencing might | |
// might work, but mutable strings would cause other problems. | |
// def seq ... | |
def | (other: Parser[A]) = self.alt(other) | |
} | |
object Parser { | |
def id[A](v : A) = new Parser[A](str => Some(v)) | |
def fail[A] = new Parser[A](_ => None) | |
def char(ch : Char) = | |
new Parser[Char]({ | |
case str if str.length > 0 && str(0) == ch => Some(ch) | |
case _ => None | |
}) | |
} | |
object Examples { | |
import Parser._ | |
def digit = char('0') | char('1') | char('2') | char('3') | char('4') | | |
char('5') | char('6') | char('7') | char('8') | char('9') | |
def main() = { | |
println(s"${Console.BLUE}Running attempt0 examples...${Console.RESET}") | |
assert(char('A').parse("A") == Some('A')) | |
assert(char('A').parse("Z") == None) | |
assert(digit.parse("100") == Some('1')) | |
} | |
} | |
} | |
// Non-backtracking parser | |
package attempt1 { | |
class Parser[A](val parse : String => Option[(A, String)]) { self => | |
def alt(other : Parser[A]) = | |
new Parser(str => self.parse(str) match { | |
case Some((x, str)) => Some((x, str)) | |
case None => other.parse(str) | |
}) | |
def | (other: Parser[A]) = self.alt(other) | |
// This type would discard the A value: | |
// def seq[B](other : Parser[B]) : Parser[B] | |
// This type wouldn't allow B to depend on A: | |
// def seq[B](other : Parser[B]) : Parser[(A, B)] | |
def seq[B](other : A => Parser[B]) = | |
new Parser[B](str => self.parse(str) match { | |
case None => None | |
case Some((a,str)) => other(a).parse(str) | |
}) | |
// Needed by for { } expressions | |
def flatMap[B](other : A => Parser[B]) = seq(other) | |
// Needed by for { } expressions | |
def map[B](f : A => B) = | |
new Parser[B](str => self.parse(str) match { | |
case None => None | |
case Some ((a, str)) => Some((f(a), str)) | |
}) | |
def withFilter(pred : A => Boolean) = | |
new Parser[A](str => self.parse(str) match { | |
case Some((a, str)) => if (pred(a)) Some((a, str)) else None | |
case None => None | |
}) | |
def filter(pred : A => Boolean) = withFilter(pred) | |
} | |
object Parser { | |
def id[A](v : A) = new Parser[A](str => Some(v, str)) | |
def fail[A] = new Parser[A](_ => None) | |
def char(ch : Char) = | |
new Parser[Char]({ | |
case str if str.length > 0 && str(0) == ch => Some((ch, str.drop(1))) | |
case _ => None | |
}) | |
def string(chs: String) : Parser[String] = | |
new Parser[String]({ | |
case str if str.startsWith(chs) => Some((chs, str.drop(chs.length))) | |
case _ => None | |
}) | |
def eof = new Parser[Unit]({ | |
case "" => Some(((), "")) | |
case _ => None | |
}) | |
// This is a greedy parser | |
def many_clunky[A](p : Parser[A]) : Parser[List[A]] = | |
(p.seq(a => many_clunky(p).seq(as => id (a::as)))) alt id(Nil) | |
def many[A](p : Parser[A]) : Parser[List[A]] = | |
(for { x <- p; xs <- many(p) } yield (x :: xs)) | id(Nil) | |
def many1[A](p : Parser[A]) : Parser[List[A]] = for { | |
x <- many(p) | |
if (!x.isEmpty) | |
} yield x | |
} | |
object Examples { | |
import Parser._ | |
def digit = char('0') | char('1') | char('2') | char('3') | char('4') | | |
char('5') | char('6') | char('7') | char('8') | char('9') | |
def whitespace = many(char(' ') | char('\n') | char('\t')) | |
def number = for { | |
digits <- many1(digit) | |
_ <- whitespace | |
} yield (digits.mkString.toInt) | |
def token(str : String) = for { | |
_ <- string(str) | |
_ <- whitespace | |
} yield str | |
sealed abstract class Exp | |
case class Add(e1 : Exp, e2 : Exp) extends Exp | |
case class Mul(e1 : Exp, e2 : Exp) extends Exp | |
case class Num(n : Int) extends Exp | |
def paren : Parser[Exp] = | |
for { _ <- token("("); e <- exp; _ <- token(")") } yield e | |
def num : Parser[Exp] = | |
for { n <- number } yield Num(n) | |
def atom : Parser[Exp] = | |
paren | num | |
def mulRhs(e1 : Exp) : Parser[Exp] = | |
for { _ <- token("*"); e2 <- mul } yield Mul(e1, e2) | |
def mul : Parser[Exp] = | |
for { e1 <- atom; r <- mulRhs(e1) | id(e1) } yield r | |
def addRhs(e1 : Exp) : Parser[Exp] = | |
for { _ <- token("+"); e2 <- add } yield Add(e1, e2) | |
def add : Parser[Exp] = | |
for { e1 <- mul; r <- addRhs(e1) | id(e1) } yield r | |
def exp = add | |
def main() = { | |
println(s"${Console.BLUE}Running attempt1 examples...${Console.RESET}") | |
assert(char('A').parse("A") == Some('A', "")) | |
assert(char('A').parse("Z") == None) | |
assert(digit.parse("100") == Some('1', "00")) | |
assert(number.parse("100") == Some(100, "")) | |
assert(exp.parse("123") == Some(Num(123), "")) | |
assert(exp.parse("123 + 23") == Some(Add(Num(123), Num(23)), "")) | |
assert(exp.parse("4 * 5") == Some(Mul(Num(4), Num(5)), "")) | |
assert(exp.parse("4 * 5 + 3") == | |
Some(Add(Mul(Num(4), Num(5)), Num(3)), "")) | |
assert(exp.parse("4 * 5 + 3 * 7") == | |
Some(Add(Mul(Num(4), Num(5)), Mul(Num(3), Num(7))), "")) | |
assert(exp.parse("4 * (5 + 3) * 7") == | |
Some(Mul(Num(4), Mul(Add(Num(5), Num(3)), Num(7))), "")) | |
} | |
} | |
} | |
package attempt3 { | |
class Parser[A](val parse : String => Stream[(A, String)]) { self => | |
// Having a stream of possible parses allows the parser to backtrack. | |
// Try it yourself. | |
} | |
} | |
object Main extends App { | |
attempt0.Examples.main() | |
attempt1.Examples.main() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment