Skip to content

Instantly share code, notes, and snippets.

@alllex
Created March 4, 2023 12:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alllex/afcaf4dd1d1c4b1a5f2fa825f471e9d3 to your computer and use it in GitHub Desktop.
Save alllex/afcaf4dd1d1c4b1a5f2fa825f471e9d3 to your computer and use it in GitHub Desktop.
Kotlin coroutine-based parser combinators implemented on top of DeepRecursiveFunction
data class Token(val value: String)
data class TokenMatch(val token: Token, val offset: Int)
sealed class SyntaxTree
data class Node(val children: List<SyntaxTree>) : SyntaxTree() {
constructor(vararg children: SyntaxTree) : this(children.toList())
override fun toString(): String = "Node($children)"
}
data class Lexeme(val match: TokenMatch, val text: String) : SyntaxTree() {
override fun toString(): String = "Lexeme('$text')"
}
sealed class ParseResult<out T>
data class ParsedValue<T>(val value: T) : ParseResult<T>()
abstract class ParseError : ParseResult<Nothing>()
inline infix fun <T, R> ParseResult<T>.map(f: (T) -> R): ParseResult<R> = when (this) {
is ParsedValue -> ParsedValue(f(this.value))
is ParseError -> this
}
data class MismatchedToken(val expected: Token, val found: TokenMatch) : ParseError()
data class NoMatchingToken(val fromIndex: Int) : ParseError()
class ParserState(private val inputTokens: List<String>, private val tokens: List<Token>) {
private var position: Int = 0
private fun findMatch(fromIndex: Int): TokenMatch? {
val inputTokenValue = inputTokens[fromIndex]
val foundToken = tokens.firstOrNull { it.value == inputTokenValue }
return foundToken?.let { TokenMatch(it, fromIndex) }
}
fun parseToken(token: Token): ParseResult<TokenMatch> {
val fromIndex = this.position
val match = findMatch(fromIndex)
?: return NoMatchingToken(fromIndex)
if (match.token != token) return MismatchedToken(token, match)
this.position = match.offset + 1 // because lexer has already tokenized input
return ParsedValue(match)
}
fun getText(tokenMatch: TokenMatch): String {
return inputTokens[tokenMatch.offset]
}
}
typealias Parser<T> = DeepRecursiveFunction<ParserState, ParseResult<T>>
typealias ParserScope<T> = DeepRecursiveScope<ParserState, ParseResult<T>>
fun tokenParser(token: Token): Parser<TokenMatch> = Parser { state ->
state.parseToken(token)
}
fun lexemeParser(tokenParser: Parser<TokenMatch>): Parser<SyntaxTree> = Parser { state ->
val t = tokenParser.callRecursive(state)
t.map { Lexeme(it, state.getText(it)) }
}
suspend inline fun <T, R> ParserScope<T>.combine(state: ParserState, parser1: Parser<T>, next: (T) -> ParseResult<R>): ParseResult<R> {
val v = parser1.callRecursive(state)
if (v is ParseError) return v
v as ParsedValue
return next(v.value)
}
fun <T> runParser(input: List<String>, tokens: List<Token>, parser: Parser<T>): ParseResult<T> {
val state = ParserState(input, tokens)
return parser(state)
}
fun main() {
val token1 = Token("a")
val token2 = Token("b")
val tokens = listOf(token1, token2)
val parser = object {
val parser1 = lexemeParser(tokenParser(token1))
val parser2 = lexemeParser(tokenParser(token2))
val ab = Parser<SyntaxTree> { state ->
combine(state, parser1) { t1 ->
combine(state, parser2) { t2 ->
ParsedValue(Node(t1, t2))
}
}
}
val root = ab
}
val goodInput = listOf("a", "b")
val goodResult = runParser(goodInput, tokens, parser.root)
println(goodResult)
// ParsedValue(value=Node([Lexeme('a'), Lexeme('b')]))
val badInput = listOf("a", "a")
val badResult = runParser(badInput, tokens, parser.root)
println(badResult)
// MismatchedToken(expected=Token(value=b), found=TokenMatch(token=Token(value=a), offset=1))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment