Last active
December 3, 2018 10:23
-
-
Save changlinli/644b9e99339cfef7b5ac7dd0957b6903 to your computer and use it in GitHub Desktop.
A very bare-bones stand-alone implementation of a Lisp-like parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This is a bare-bones implementation of some parser combinators to implement | |
* a simple Lisp-like parser. | |
* | |
* Note that I call it Lisp-like because I haven't yet defined any macro special | |
* forms in the AST. | |
* | |
* I also use unrestricted recursion here which will eventually blow the stack | |
* for sufficiently nested s-expressions. I could use trampolines instead to | |
* trade stack for heap and avoid a stack overflow. | |
*/ | |
object LispParser { | |
val exampleLispExpression0: LispAST = | |
unsafeExtractFirstResult(parseLispAST.parse("(define a 5)")) | |
// ListOfLists(List(SingleItem(define), SingleItem(a), SingleItem(5))) | |
val exampleLispExpression0AsNestedList: NestedList[String] = | |
formatAsListsOfStrings(exampleLispExpression0) | |
val exampleLispExpression1: LispAST = | |
unsafeExtractFirstResult(parseLispAST.parse("(define a (+ 5 6))")) | |
// ListOfLists(List(SingleItem(define), SingleItem(a), SingleItem(5))) | |
val exampleLispExpression1AsNestedList: NestedList[String] = | |
formatAsListsOfStrings(exampleLispExpression1) | |
val exampleLispExpression2: LispAST = | |
unsafeExtractFirstResult(parseLispAST.parse("(lambda a (if a 5 (+ 1 2)))")) | |
// ListOfLists(List(SingleItem(lambda), SingleItem(a), ListOfLists(List(SingleItem(if), | |
// SingleItem(a), SingleItem(5), ListOfLists(List(SingleItem(+), SingleItem(1), SingleItem(2))))))) | |
val exampleLispExpression2AsNestedList: NestedList[String] = | |
formatAsListsOfStrings(exampleLispExpression2) | |
val exampleLispExpression3: LispAST = | |
unsafeExtractFirstResult(parseLispAST.parse("(define increment (lambda a (+ a 1)))")) | |
val exampleLispExpression3AsNestedList: NestedList[String] = | |
// This gets too long to print... | |
formatAsListsOfStrings(exampleLispExpression3) | |
// Purely for display/demonstration services | |
def unsafeExtractFirstResult(parseResult: List[(LispAST, String)]): LispAST = | |
parseResult.head._1 | |
sealed trait LispAST | |
final case class LispNumberLiteral(rawNumber: BigDecimal) extends LispAST | |
final case class LispSymbol(rawString: String) extends LispAST | |
final case class LispDefine(name: LispSymbol, body: LispAST) extends LispAST | |
final case class LispLambda(argName: LispSymbol, body: LispAST) extends LispAST | |
final case class LispIf(condition: LispAST, thenBody: LispAST, elseBody: LispAST) extends LispAST | |
final case class LispApply(function: LispAST, args: List[LispAST]) extends LispAST | |
// BEGIN: This is for fulfilling the letter of the description for a Lisp parser | |
// for a statically typed language | |
sealed trait NestedList[+A] | |
final case class SingleItem[+A](item: A) extends NestedList[A] | |
final case class ListOfLists[+A](items: List[NestedList[A]]) extends NestedList[A] | |
def formatAsListsOfStrings(ast: LispAST): NestedList[String] = ast match { | |
case LispNumberLiteral(num) => | |
SingleItem(num.toString) | |
case LispSymbol(symbol) => | |
SingleItem(symbol) | |
case LispDefine(LispSymbol(name), body) => | |
ListOfLists( | |
List( | |
SingleItem("define"), | |
SingleItem(name), | |
formatAsListsOfStrings(body) | |
) | |
) | |
case LispLambda(LispSymbol(name), body) => | |
ListOfLists( | |
List( | |
SingleItem("lambda"), | |
SingleItem(name), | |
formatAsListsOfStrings(body) | |
) | |
) | |
case LispIf(condition, thenBody, elseBody) => | |
ListOfLists( | |
List( | |
SingleItem("if"), | |
formatAsListsOfStrings(condition), | |
formatAsListsOfStrings(thenBody), | |
formatAsListsOfStrings(elseBody) | |
) | |
) | |
case LispApply(function, args) => | |
ListOfLists(formatAsListsOfStrings(function) :: args.map(formatAsListsOfStrings)) | |
} | |
// END: This is for fulfilling the letter of the description for a Lisp parser | |
// for a statically typed language | |
def parseLispNumber: Parser[LispNumberLiteral] = parseNumber.map(LispNumberLiteral.apply) | |
def parseLispSymbol: Parser[LispSymbol] = parseValidIdentifier.map(LispSymbol.apply) | |
def parseLispDefine: Parser[LispDefine] = | |
parens{ | |
val getName = lookForKeyword("define") ~> consumeMandatoryWhitespace ~> parseLispSymbol <~ consumeMandatoryWhitespace | |
getName.map{(LispDefine(_, _)).curried}.applyP(parseLispAST <~ consumeOptionalWhitespace) | |
} | |
def parseLispLambda: Parser[LispLambda] = | |
parens(applyParser(parseLispAST)((lookForKeyword("lambda") ~> parseLispSymbol <~ consumeMandatoryWhitespace) | |
.map{(LispLambda(_, _)).curried})) | |
def parseLispIf: Parser[LispIf] = | |
parens{ | |
val ifCondition = lookForKeyword("if") ~> consumeMandatoryWhitespace ~> parseLispAST <~ consumeMandatoryWhitespace | |
ifCondition | |
.map((LispIf(_, _, _)).curried) | |
.applyP(parseLispAST <~ consumeMandatoryWhitespace) | |
.applyP(parseLispAST <~ consumeOptionalWhitespace) | |
} | |
def parseLispApply: Parser[LispApply] = | |
parens{ | |
alwaysSucceed((LispApply(_, _)).curried) | |
.applyP(parseLispAST <~ consumeMandatoryWhitespace) | |
.applyP(repeatUntilFailure(parseLispAST <~ consumeOptionalWhitespace)) | |
} | |
def parseLispAST: Parser[LispAST] = | |
parseLispNumber.widen[LispAST] | |
.alsoTry(parseLispSymbol.widen[LispAST]) | |
.alsoTry(parseLispDefine.widen[LispAST]) | |
.alsoTry(parseLispLambda.widen[LispAST]) | |
.alsoTry(parseLispIf.widen[LispAST]) | |
.alsoTry(parseLispApply.widen[LispAST]) | |
final case class Parser[A](parse: String => List[(A, String)]) { | |
def map[B](f: A => B): Parser[B] = Parser{ | |
input: String => parse(input).map{case (result, remaining) => (f(result), remaining)} | |
} | |
def alsoTry(alternative: => Parser[A]): Parser[A] = Parser.apply{ | |
input: String => | |
parse(input) ++ alternative.parse(input) | |
} | |
def onFailureTry(alternative: => Parser[A]): Parser[A] = Parser.apply{ | |
input: String => | |
parse(input) match { | |
case Nil => alternative.parse(input) | |
case nonEmpty => nonEmpty | |
} | |
} | |
def default(x: => A): Parser[A] = alsoTry(alwaysSucceed(x)) | |
def discard: Parser[Unit] = map(_ => ()) | |
def flatMap[B](f: A => Parser[B]): Parser[B] = Parser{ | |
input: String => | |
parse(input).flatMap{ | |
case (result, remaining) => | |
f(result).parse(remaining) | |
} | |
} | |
def ~>[B](next: Parser[B]): Parser[B] = discardLeft(this, next) | |
def <~[B](next: Parser[B]): Parser[A] = discardRight(this, next) | |
def widen[B >: A]: Parser[B] = map(identity[B]) | |
} | |
implicit class ParserOps[A, B](parser: Parser[A => B]) { | |
def applyP(input: Parser[A]): Parser[B] = applyParser(input)(parser) | |
} | |
def parseCharacter(char: Char): Parser[Char] = { | |
parseCharacterWithPredicate(_ == char) | |
} | |
def alwaysSucceed[A](output: A): Parser[A] = { | |
Parser.apply{ | |
input: String => List((output, input)) | |
} | |
} | |
def alwaysFail[A]: Parser[A] = { | |
Parser.apply{ | |
_: String => List.empty | |
} | |
} | |
def applyParser[A, B](valueP: => Parser[A])(functionP: Parser[A => B]): Parser[B] = Parser{ | |
input: String => | |
for { | |
parsedValueAndRemaining <- functionP.parse(input) | |
(parsedFunction, remainingAfterFunction) = parsedValueAndRemaining | |
parsedValueAndRemaining <- valueP.parse(remainingAfterFunction) | |
(parsedValue, remainingAfterFunction) = parsedValueAndRemaining | |
} yield (parsedFunction(parsedValue), remainingAfterFunction) | |
} | |
def zipP[A, B](p0: Parser[A], p1: Parser[B]): Parser[(A, B)] = { | |
val createPair = alwaysSucceed(((_: A, _: B)).curried) | |
applyParser(p1)(applyParser(p0)(createPair)) | |
} | |
def discardLeft[A, B](left: Parser[A], next: Parser[B]): Parser[B] = | |
zipP(left, next).map(_._2) | |
def discardRight[A, B](current: Parser[A], right: Parser[B]): Parser[A] = | |
zipP(current, right).map(_._1) | |
def leftParen: Parser[Char] = parseCharacter('(') | |
def rightParen: Parser[Char] = parseCharacter(')') | |
def parens[A](inner: Parser[A]): Parser[A] = { | |
bracket(leftParen, inner, rightParen) | |
} | |
def bracket[A, B](left: Parser[A], center: Parser[B], right: Parser[A]): Parser[B] = { | |
left ~> center <~ right | |
} | |
def parseWhitespaceChar: Parser[Char] = parseCharacter(' ') | |
.alsoTry(parseCharacter('\t')) | |
.alsoTry(parseCharacter('\n')) | |
def consumeMandatoryWhitespace: Parser[Unit] = | |
repeatAtLeastOnceUntilFailure(parseWhitespaceChar).map(_ => ()) | |
def consumeOptionalWhitespace: Parser[Unit] = | |
repeatUntilFailure(parseWhitespaceChar).map(_ => ()) | |
def repeatUntilFailure[A](single: => Parser[A]): Parser[List[A]] = Parser.apply{ | |
input: String => | |
single.parse(input).flatMap{ | |
case (result, rest) => | |
repeatUntilFailure(single).parse(rest) match { | |
case Nil => List((List(result), rest)) | |
case nonEmpty => nonEmpty.map{ | |
case (x, xs) => (result :: x, xs) | |
} | |
} | |
} match { | |
case Nil => List((List.empty[A], input)) | |
case nonEmpty => nonEmpty | |
} | |
} | |
def parseCharacterWithPredicate(predicate: Char => Boolean): Parser[Char] = { | |
Parser.apply{ | |
input: String => | |
input.headOption match { | |
case Some(inputChar) if predicate(inputChar) => List((inputChar, input.drop(1))) | |
case Some(_) => List.empty | |
case None => List.empty | |
} | |
} | |
} | |
final case class NonEmptyList[A](head: A, tail: List[A]) { | |
def toList: List[A] = head :: tail | |
} | |
object NonEmptyList { | |
def fromList[A](xs: List[A]): Option[NonEmptyList[A]] = xs match { | |
case x :: rest => Some(NonEmptyList(x, rest)) | |
case Nil => None | |
} | |
} | |
def repeatAtLeastOnceUntilFailure[A](single: Parser[A]): Parser[NonEmptyList[A]] = { | |
repeatUntilFailure(single).map(NonEmptyList.fromList).flatMap{ | |
case None => | |
alwaysFail | |
case Some(nonEmptyList) => | |
alwaysSucceed(nonEmptyList) | |
} | |
} | |
def parseDigitCharacter: Parser[Char] = parseCharacterWithPredicate(_.isDigit) | |
def parseNumber: Parser[BigDecimal] = { | |
parseNumericString.map(BigDecimal.apply) | |
} | |
def parseNonWhitespace: Parser[Char] = parseCharacterWithPredicate(c => !c.isWhitespace) | |
def isSymbol(c: Char): Boolean = c match { | |
case '!' => true | |
case '#' => true | |
case '$' => true | |
case '%' => true | |
case '&' => true | |
case '*' => true | |
case '+' => true | |
case '.' => true | |
case '/' => true | |
case '<' => true | |
case '=' => true | |
case '>' => true | |
case '?' => true | |
case '@' => true | |
case '\\' => true | |
case '^' => true | |
case '|' => true | |
case '-' => true | |
case '~' => true | |
case _ => false | |
} | |
def isAlphaOrSymbol(c: Char): Boolean = c.isLetter || isSymbol(c) | |
def parseAlphaOrSymbolString: Parser[String] = { | |
repeatAtLeastOnceUntilFailure(parseCharacterWithPredicate(isAlphaOrSymbol)) | |
.map(_.toList.mkString) | |
} | |
def parseNumericString: Parser[String] = | |
repeatAtLeastOnceUntilFailure(parseDigitCharacter) | |
.map(_.toList.mkString) | |
def optional[A](parser: Parser[A]): Parser[Option[A]] = | |
parser | |
.map(Some.apply) | |
.widen[Option[A]] | |
.onFailureTry(alwaysSucceed(None)) | |
def parseValidIdentifier: Parser[String] = { | |
val nonNumericPortion = parseAlphaOrSymbolString | |
val numericPart = optional(parseNumericString).map(_.getOrElse("")) | |
zipP(nonNumericPortion, numericPart).map{ | |
case (alphaString, numString) => alphaString ++ numString | |
} | |
} | |
def lookForKeyword(keyword: String): Parser[Unit] = { | |
keyword | |
.toList | |
.map{currentLetter => | |
parseCharacterWithPredicate(_ == currentLetter) | |
} | |
.foldLeft(alwaysSucceed(())){ | |
_ <~ _ | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment