Skip to content

Instantly share code, notes, and snippets.

@jjst
Created April 4, 2016 19:03
Show Gist options
  • Save jjst/bf8d532a497f61fc881a48f978c7bb42 to your computer and use it in GitHub Desktop.
Save jjst/bf8d532a497f61fc881a48f978c7bb42 to your computer and use it in GitHub Desktop.
A LISP parser
trait ASTElement
case class Expression(elements: ASTElement*) extends ASTElement
case class Literal[T](value: T) extends ASTElement
case class Function(name: String) extends ASTElement
object LispParser {
def main(args: Array[String]) {
val ast = parse("(first (list 1 (+ 2 3) 9))")
assert(ast ==
Expression(
Function("first"),
Expression(
Function("list"),
Literal(1),
Expression(
Function("+"),
Literal(2),
Literal(3)),
Literal(9)
)
)
)
}
/**
* Parse lisp code and returns the corresponding AST (abstract syntax tree)
* @param code code to parse
* @return the root node of the AST
*/
def parse(code: String): ASTElement = code match {
case Patterns.List(listContent) => {
val listTokens = tokenize(listContent)
val astElements = listTokens map parse
Expression(astElements: _*)
}
case Patterns.IntegerLiteral(l) => Literal(l.toInt)
case Patterns.Function(name) => Function(name)
}
/**
* Tokenize elements of a space-separated list
* @param rawList the list to tokenize
* @return
*/
def tokenize(rawList: String): Seq[String] = {
/**
* Extract a sublist token from the list to tokenize
* @param str list to extract sublist from
* @return a tuple (extractedList, remainder) containing the extracted list and the remainder that still needs
* tokenization
*/
def extractListToken(str: String): (String, String) = {
var openedExpressions = 0
for((character, index) <- str.zipWithIndex) {
character match {
case '(' => openedExpressions += 1
case ')' => openedExpressions -= 1
case _ => ()
}
if (openedExpressions == 0) {
return (str.take(index + 1), str.drop(index + 2))
}
}
throw new RuntimeException("Malformed tokens: mismatched opening/closing parenthesis")
}
if(rawList.isEmpty) Nil
else {
val (token, remainder) = if(rawList.startsWith("(")) {
extractListToken(rawList)
} else {
val nextSpaceIndex = rawList.indexWhere(_ == ' ')
if (nextSpaceIndex == -1) {
(rawList, "")
} else {
(rawList.take(nextSpaceIndex), rawList.drop(nextSpaceIndex + 1))
}
}
token +: tokenize(remainder)
}
}
}
object Patterns {
val List = """\((.*)\)""".r
val IntegerLiteral = """(\d+)""".r
val Function = """(.+)""".r
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment