Skip to content

Instantly share code, notes, and snippets.

@changlinli
Last active December 3, 2018 10:23
Show Gist options
  • Save changlinli/644b9e99339cfef7b5ac7dd0957b6903 to your computer and use it in GitHub Desktop.
Save changlinli/644b9e99339cfef7b5ac7dd0957b6903 to your computer and use it in GitHub Desktop.
A very bare-bones stand-alone implementation of a Lisp-like parser
/**
* This is a bare-bones implementation of some parser combinators to implement
* a simple Lisp-like parser.
*
* Note that I call it Lisp-like because I haven't yet defined any macro special
* forms in the AST.
*
* I also use unrestricted recursion here which will eventually blow the stack
* for sufficiently nested s-expressions. I could use trampolines instead to
* trade stack for heap and avoid a stack overflow.
*/
object LispParser {
val exampleLispExpression0: LispAST =
unsafeExtractFirstResult(parseLispAST.parse("(define a 5)"))
// ListOfLists(List(SingleItem(define), SingleItem(a), SingleItem(5)))
val exampleLispExpression0AsNestedList: NestedList[String] =
formatAsListsOfStrings(exampleLispExpression0)
val exampleLispExpression1: LispAST =
unsafeExtractFirstResult(parseLispAST.parse("(define a (+ 5 6))"))
// ListOfLists(List(SingleItem(define), SingleItem(a), SingleItem(5)))
val exampleLispExpression1AsNestedList: NestedList[String] =
formatAsListsOfStrings(exampleLispExpression1)
val exampleLispExpression2: LispAST =
unsafeExtractFirstResult(parseLispAST.parse("(lambda a (if a 5 (+ 1 2)))"))
// ListOfLists(List(SingleItem(lambda), SingleItem(a), ListOfLists(List(SingleItem(if),
// SingleItem(a), SingleItem(5), ListOfLists(List(SingleItem(+), SingleItem(1), SingleItem(2)))))))
val exampleLispExpression2AsNestedList: NestedList[String] =
formatAsListsOfStrings(exampleLispExpression2)
val exampleLispExpression3: LispAST =
unsafeExtractFirstResult(parseLispAST.parse("(define increment (lambda a (+ a 1)))"))
val exampleLispExpression3AsNestedList: NestedList[String] =
// This gets too long to print...
formatAsListsOfStrings(exampleLispExpression3)
// Purely for display/demonstration services
def unsafeExtractFirstResult(parseResult: List[(LispAST, String)]): LispAST =
parseResult.head._1
sealed trait LispAST
final case class LispNumberLiteral(rawNumber: BigDecimal) extends LispAST
final case class LispSymbol(rawString: String) extends LispAST
final case class LispDefine(name: LispSymbol, body: LispAST) extends LispAST
final case class LispLambda(argName: LispSymbol, body: LispAST) extends LispAST
final case class LispIf(condition: LispAST, thenBody: LispAST, elseBody: LispAST) extends LispAST
final case class LispApply(function: LispAST, args: List[LispAST]) extends LispAST
// BEGIN: This is for fulfilling the letter of the description for a Lisp parser
// for a statically typed language
sealed trait NestedList[+A]
final case class SingleItem[+A](item: A) extends NestedList[A]
final case class ListOfLists[+A](items: List[NestedList[A]]) extends NestedList[A]
def formatAsListsOfStrings(ast: LispAST): NestedList[String] = ast match {
case LispNumberLiteral(num) =>
SingleItem(num.toString)
case LispSymbol(symbol) =>
SingleItem(symbol)
case LispDefine(LispSymbol(name), body) =>
ListOfLists(
List(
SingleItem("define"),
SingleItem(name),
formatAsListsOfStrings(body)
)
)
case LispLambda(LispSymbol(name), body) =>
ListOfLists(
List(
SingleItem("lambda"),
SingleItem(name),
formatAsListsOfStrings(body)
)
)
case LispIf(condition, thenBody, elseBody) =>
ListOfLists(
List(
SingleItem("if"),
formatAsListsOfStrings(condition),
formatAsListsOfStrings(thenBody),
formatAsListsOfStrings(elseBody)
)
)
case LispApply(function, args) =>
ListOfLists(formatAsListsOfStrings(function) :: args.map(formatAsListsOfStrings))
}
// END: This is for fulfilling the letter of the description for a Lisp parser
// for a statically typed language
def parseLispNumber: Parser[LispNumberLiteral] = parseNumber.map(LispNumberLiteral.apply)
def parseLispSymbol: Parser[LispSymbol] = parseValidIdentifier.map(LispSymbol.apply)
def parseLispDefine: Parser[LispDefine] =
parens{
val getName = lookForKeyword("define") ~> consumeMandatoryWhitespace ~> parseLispSymbol <~ consumeMandatoryWhitespace
getName.map{(LispDefine(_, _)).curried}.applyP(parseLispAST <~ consumeOptionalWhitespace)
}
def parseLispLambda: Parser[LispLambda] =
parens(applyParser(parseLispAST)((lookForKeyword("lambda") ~> parseLispSymbol <~ consumeMandatoryWhitespace)
.map{(LispLambda(_, _)).curried}))
def parseLispIf: Parser[LispIf] =
parens{
val ifCondition = lookForKeyword("if") ~> consumeMandatoryWhitespace ~> parseLispAST <~ consumeMandatoryWhitespace
ifCondition
.map((LispIf(_, _, _)).curried)
.applyP(parseLispAST <~ consumeMandatoryWhitespace)
.applyP(parseLispAST <~ consumeOptionalWhitespace)
}
def parseLispApply: Parser[LispApply] =
parens{
alwaysSucceed((LispApply(_, _)).curried)
.applyP(parseLispAST <~ consumeMandatoryWhitespace)
.applyP(repeatUntilFailure(parseLispAST <~ consumeOptionalWhitespace))
}
def parseLispAST: Parser[LispAST] =
parseLispNumber.widen[LispAST]
.alsoTry(parseLispSymbol.widen[LispAST])
.alsoTry(parseLispDefine.widen[LispAST])
.alsoTry(parseLispLambda.widen[LispAST])
.alsoTry(parseLispIf.widen[LispAST])
.alsoTry(parseLispApply.widen[LispAST])
final case class Parser[A](parse: String => List[(A, String)]) {
def map[B](f: A => B): Parser[B] = Parser{
input: String => parse(input).map{case (result, remaining) => (f(result), remaining)}
}
def alsoTry(alternative: => Parser[A]): Parser[A] = Parser.apply{
input: String =>
parse(input) ++ alternative.parse(input)
}
def onFailureTry(alternative: => Parser[A]): Parser[A] = Parser.apply{
input: String =>
parse(input) match {
case Nil => alternative.parse(input)
case nonEmpty => nonEmpty
}
}
def default(x: => A): Parser[A] = alsoTry(alwaysSucceed(x))
def discard: Parser[Unit] = map(_ => ())
def flatMap[B](f: A => Parser[B]): Parser[B] = Parser{
input: String =>
parse(input).flatMap{
case (result, remaining) =>
f(result).parse(remaining)
}
}
def ~>[B](next: Parser[B]): Parser[B] = discardLeft(this, next)
def <~[B](next: Parser[B]): Parser[A] = discardRight(this, next)
def widen[B >: A]: Parser[B] = map(identity[B])
}
implicit class ParserOps[A, B](parser: Parser[A => B]) {
def applyP(input: Parser[A]): Parser[B] = applyParser(input)(parser)
}
def parseCharacter(char: Char): Parser[Char] = {
parseCharacterWithPredicate(_ == char)
}
def alwaysSucceed[A](output: A): Parser[A] = {
Parser.apply{
input: String => List((output, input))
}
}
def alwaysFail[A]: Parser[A] = {
Parser.apply{
_: String => List.empty
}
}
def applyParser[A, B](valueP: => Parser[A])(functionP: Parser[A => B]): Parser[B] = Parser{
input: String =>
for {
parsedValueAndRemaining <- functionP.parse(input)
(parsedFunction, remainingAfterFunction) = parsedValueAndRemaining
parsedValueAndRemaining <- valueP.parse(remainingAfterFunction)
(parsedValue, remainingAfterFunction) = parsedValueAndRemaining
} yield (parsedFunction(parsedValue), remainingAfterFunction)
}
def zipP[A, B](p0: Parser[A], p1: Parser[B]): Parser[(A, B)] = {
val createPair = alwaysSucceed(((_: A, _: B)).curried)
applyParser(p1)(applyParser(p0)(createPair))
}
def discardLeft[A, B](left: Parser[A], next: Parser[B]): Parser[B] =
zipP(left, next).map(_._2)
def discardRight[A, B](current: Parser[A], right: Parser[B]): Parser[A] =
zipP(current, right).map(_._1)
def leftParen: Parser[Char] = parseCharacter('(')
def rightParen: Parser[Char] = parseCharacter(')')
def parens[A](inner: Parser[A]): Parser[A] = {
bracket(leftParen, inner, rightParen)
}
def bracket[A, B](left: Parser[A], center: Parser[B], right: Parser[A]): Parser[B] = {
left ~> center <~ right
}
def parseWhitespaceChar: Parser[Char] = parseCharacter(' ')
.alsoTry(parseCharacter('\t'))
.alsoTry(parseCharacter('\n'))
def consumeMandatoryWhitespace: Parser[Unit] =
repeatAtLeastOnceUntilFailure(parseWhitespaceChar).map(_ => ())
def consumeOptionalWhitespace: Parser[Unit] =
repeatUntilFailure(parseWhitespaceChar).map(_ => ())
def repeatUntilFailure[A](single: => Parser[A]): Parser[List[A]] = Parser.apply{
input: String =>
single.parse(input).flatMap{
case (result, rest) =>
repeatUntilFailure(single).parse(rest) match {
case Nil => List((List(result), rest))
case nonEmpty => nonEmpty.map{
case (x, xs) => (result :: x, xs)
}
}
} match {
case Nil => List((List.empty[A], input))
case nonEmpty => nonEmpty
}
}
def parseCharacterWithPredicate(predicate: Char => Boolean): Parser[Char] = {
Parser.apply{
input: String =>
input.headOption match {
case Some(inputChar) if predicate(inputChar) => List((inputChar, input.drop(1)))
case Some(_) => List.empty
case None => List.empty
}
}
}
final case class NonEmptyList[A](head: A, tail: List[A]) {
def toList: List[A] = head :: tail
}
object NonEmptyList {
def fromList[A](xs: List[A]): Option[NonEmptyList[A]] = xs match {
case x :: rest => Some(NonEmptyList(x, rest))
case Nil => None
}
}
def repeatAtLeastOnceUntilFailure[A](single: Parser[A]): Parser[NonEmptyList[A]] = {
repeatUntilFailure(single).map(NonEmptyList.fromList).flatMap{
case None =>
alwaysFail
case Some(nonEmptyList) =>
alwaysSucceed(nonEmptyList)
}
}
def parseDigitCharacter: Parser[Char] = parseCharacterWithPredicate(_.isDigit)
def parseNumber: Parser[BigDecimal] = {
parseNumericString.map(BigDecimal.apply)
}
def parseNonWhitespace: Parser[Char] = parseCharacterWithPredicate(c => !c.isWhitespace)
def isSymbol(c: Char): Boolean = c match {
case '!' => true
case '#' => true
case '$' => true
case '%' => true
case '&' => true
case '*' => true
case '+' => true
case '.' => true
case '/' => true
case '<' => true
case '=' => true
case '>' => true
case '?' => true
case '@' => true
case '\\' => true
case '^' => true
case '|' => true
case '-' => true
case '~' => true
case _ => false
}
def isAlphaOrSymbol(c: Char): Boolean = c.isLetter || isSymbol(c)
def parseAlphaOrSymbolString: Parser[String] = {
repeatAtLeastOnceUntilFailure(parseCharacterWithPredicate(isAlphaOrSymbol))
.map(_.toList.mkString)
}
def parseNumericString: Parser[String] =
repeatAtLeastOnceUntilFailure(parseDigitCharacter)
.map(_.toList.mkString)
def optional[A](parser: Parser[A]): Parser[Option[A]] =
parser
.map(Some.apply)
.widen[Option[A]]
.onFailureTry(alwaysSucceed(None))
def parseValidIdentifier: Parser[String] = {
val nonNumericPortion = parseAlphaOrSymbolString
val numericPart = optional(parseNumericString).map(_.getOrElse(""))
zipP(nonNumericPortion, numericPart).map{
case (alphaString, numString) => alphaString ++ numString
}
}
def lookForKeyword(keyword: String): Parser[Unit] = {
keyword
.toList
.map{currentLetter =>
parseCharacterWithPredicate(_ == currentLetter)
}
.foldLeft(alwaysSucceed(())){
_ <~ _
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment