Created
April 4, 2016 19:03
-
-
Save jjst/bf8d532a497f61fc881a48f978c7bb42 to your computer and use it in GitHub Desktop.
A LISP parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
trait ASTElement | |
case class Expression(elements: ASTElement*) extends ASTElement | |
case class Literal[T](value: T) extends ASTElement | |
case class Function(name: String) extends ASTElement | |
object LispParser { | |
def main(args: Array[String]) { | |
val ast = parse("(first (list 1 (+ 2 3) 9))") | |
assert(ast == | |
Expression( | |
Function("first"), | |
Expression( | |
Function("list"), | |
Literal(1), | |
Expression( | |
Function("+"), | |
Literal(2), | |
Literal(3)), | |
Literal(9) | |
) | |
) | |
) | |
} | |
/** | |
* Parse lisp code and returns the corresponding AST (abstract syntax tree) | |
* @param code code to parse | |
* @return the root node of the AST | |
*/ | |
def parse(code: String): ASTElement = code match { | |
case Patterns.List(listContent) => { | |
val listTokens = tokenize(listContent) | |
val astElements = listTokens map parse | |
Expression(astElements: _*) | |
} | |
case Patterns.IntegerLiteral(l) => Literal(l.toInt) | |
case Patterns.Function(name) => Function(name) | |
} | |
/** | |
* Tokenize elements of a space-separated list | |
* @param rawList the list to tokenize | |
* @return | |
*/ | |
def tokenize(rawList: String): Seq[String] = { | |
/** | |
* Extract a sublist token from the list to tokenize | |
* @param str list to extract sublist from | |
* @return a tuple (extractedList, remainder) containing the extracted list and the remainder that still needs | |
* tokenization | |
*/ | |
def extractListToken(str: String): (String, String) = { | |
var openedExpressions = 0 | |
for((character, index) <- str.zipWithIndex) { | |
character match { | |
case '(' => openedExpressions += 1 | |
case ')' => openedExpressions -= 1 | |
case _ => () | |
} | |
if (openedExpressions == 0) { | |
return (str.take(index + 1), str.drop(index + 2)) | |
} | |
} | |
throw new RuntimeException("Malformed tokens: mismatched opening/closing parenthesis") | |
} | |
if(rawList.isEmpty) Nil | |
else { | |
val (token, remainder) = if(rawList.startsWith("(")) { | |
extractListToken(rawList) | |
} else { | |
val nextSpaceIndex = rawList.indexWhere(_ == ' ') | |
if (nextSpaceIndex == -1) { | |
(rawList, "") | |
} else { | |
(rawList.take(nextSpaceIndex), rawList.drop(nextSpaceIndex + 1)) | |
} | |
} | |
token +: tokenize(remainder) | |
} | |
} | |
} | |
object Patterns { | |
val List = """\((.*)\)""".r | |
val IntegerLiteral = """(\d+)""".r | |
val Function = """(.+)""".r | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment