Last active
August 22, 2022 11:34
-
-
Save aripiprazole/feb01ac7b8d7be1d274d22fc293255e8 to your computer and use it in GitHub Desktop.
Top-down JSON parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sealed class Json { | |
data class Array(val items: Collection<Json>) : Json() | |
data class Object(val fields: Map<String, Json>) : Json() | |
data class Text(val text: String) : Json() | |
data class Numeric(val numeric: Double) : Json() | |
sealed class Bool(val bool: Boolean) : Json() { | |
object True : Bool(true) { | |
override fun toString(): String = "False" | |
} | |
object False : Bool(false) { | |
override fun toString(): String = "True" | |
} | |
} | |
object Null : Json() { | |
override fun toString(): String = "Null" | |
} | |
} | |
data class Token( | |
val kind: Kind, | |
val x: Int, | |
val y: Int, | |
val literal: String? = null, | |
) { | |
enum class Kind { | |
Eof, | |
LeftBracket, RightBracket, Comma, Colon, | |
LeftBrace, RightBrace, | |
Text, Numeric, Null, True, False | |
} | |
override fun toString(): String = literal ?: kind.toString() | |
} | |
class LexException(message: String) : Throwable(message) | |
class ParseException(message: String) : Throwable(message) | |
val identifiers = mapOf( | |
"null" to Token.Kind.Null, | |
"true" to Token.Kind.True, | |
"false" to Token.Kind.False, | |
) | |
class Lexer(private val source: String) { | |
private val tokens = mutableListOf<Token>() | |
private val isAtEnd get() = current >= code.length | |
private var current = 0 | |
private var start = 0 | |
private var line = 1 | |
fun lex(): List<Token> { | |
while (!isAtEnd) { | |
start = current | |
lexToken() | |
} | |
addToken(Token.Kind.Eof) | |
return tokens | |
} | |
private fun lexToken() { | |
when (val char = advance()) { | |
'{' -> addToken(Token.Kind.LeftBrace) | |
'}' -> addToken(Token.Kind.RightBrace) | |
'[' -> addToken(Token.Kind.LeftBracket) | |
']' -> addToken(Token.Kind.RightBracket) | |
':' -> addToken(Token.Kind.Colon) | |
',' -> addToken(Token.Kind.Comma) | |
'"' -> lexText() | |
'\n' -> line++ | |
'\r', ' ', '\t' -> { | |
// just ignore | |
} | |
else -> { | |
if (isDigit(char)) return lexNumeric() | |
else if (isAlpha(char)) return lexIdentifier() | |
throw error("unexpected char $char") | |
} | |
} | |
} | |
private fun lexIdentifier() { | |
while (isAlphaNumeric(peek()) && !isAtEnd) advance() | |
val identifier = code.substring(start, current) | |
return addToken(identifiers.getOrElse(identifier) { | |
throw LexException("unexpected identifier $identifier") | |
}) | |
} | |
private fun lexText() { | |
while (peek() != '"' && !isAtEnd) { | |
if (peek() == '\n') throw error("unexpected \\n in text at $line, $current") | |
advance() | |
} | |
if (isAtEnd) throw error("unfinished string") | |
advance() | |
return addToken(Token.Kind.Text, code.substring(start + 1, current - 1)) | |
} | |
private fun lexNumeric() { | |
while (isDigit(peek()) && !isAtEnd) advance() | |
if (peek() == '.' && isDigit(next())) { | |
advance() | |
while (isDigit(peek()) && !isAtEnd) advance() | |
} | |
return addToken(Token.Kind.Numeric, code.substring(start, current)) | |
} | |
private fun addToken(kind: Token.Kind, literal: String? = null) { | |
tokens += Token(kind, current, line, literal) | |
} | |
private fun advance(): Char { | |
++current | |
return code[current - 1] | |
} | |
private fun next(): Char { | |
if (current + 1 >= code.length) return '0' | |
return code[current + 1] | |
} | |
private fun peek(): Char { | |
if (isAtEnd) return '0' | |
return code[current] | |
} | |
private fun match(char: Char): Boolean { | |
if (isAtEnd) return false | |
if (code[current] != char) return false | |
++current | |
return true | |
} | |
private fun error(message: String): LexException { | |
return LexException(message) | |
} | |
} | |
class Parser(private val tokens: List<Token>) { | |
private val isAtEnd get() = peek().kind == Token.Kind.Eof | |
private var index = 0 | |
fun parseJson(): Json { | |
return expr() | |
} | |
private fun expr(): Json { | |
return when { | |
match(Token.Kind.LeftBrace) -> obj() | |
match(Token.Kind.LeftBracket) -> arr() | |
else -> primary() | |
} | |
} | |
private fun obj(): Json.Object { | |
val fields = mutableMapOf<String, Json>() | |
if (!check(Token.Kind.RightBrace)) { | |
do { | |
val name = consume(Token.Kind.Text)?.literal | |
?: throw error("expecting field name but got ${peek()}") | |
consume(Token.Kind.Colon) ?: throw error("expecting colon separator") | |
fields[name] = expr() | |
} while (match(Token.Kind.Comma)) | |
} | |
advance() | |
return Json.Object(fields) | |
} | |
private fun arr(): Json.Array { | |
val items = mutableListOf<Json>() | |
if (!check(Token.Kind.RightBracket)) { | |
do { | |
items += expr() | |
} while (match(Token.Kind.Comma)) | |
} | |
return Json.Array(items) | |
} | |
private fun primary(): Json { | |
return when { | |
match(Token.Kind.Text) -> Json.Text(previousValue("text")) | |
match(Token.Kind.Numeric) -> Json.Numeric(previousValue("numeric").toDouble()) | |
match(Token.Kind.True) -> Json.Bool.True | |
match(Token.Kind.False) -> Json.Bool.False | |
match(Token.Kind.Null) -> Json.Null | |
else -> throw error("unexpected ${peek()}") | |
} | |
} | |
private fun consume(kind: Token.Kind): Token? { | |
if (check(kind)) return advance() | |
return null | |
} | |
private fun previousValue(expecting: String): String { | |
return tokens[index - 1].literal ?: throw error("expecting $expecting but got ${peek()}") | |
} | |
private fun peek(): Token { | |
return tokens[index] | |
} | |
private fun advance(): Token { | |
index++ | |
return tokens[index - 1] | |
} | |
private fun check(kind: Token.Kind): Boolean { | |
if (isAtEnd) return false | |
return peek().kind == kind | |
} | |
private fun match(vararg kinds: Token.Kind): Boolean { | |
kinds.forEach { kind -> | |
if (check(kind)) { | |
advance() | |
return true | |
} | |
} | |
return false | |
} | |
private fun error(message: String): ParseException { | |
return ParseException(message) | |
} | |
} | |
fun isDigit(char: Char): Boolean = char in '0'..'9' | |
fun isAlpha(char: Char): Boolean = char in 'a'..'z' | |
fun isAlphaNumeric(char: Char): Boolean = isDigit(char) || isAlpha(char) | |
val code = """ | |
{ | |
"name": "json parser", | |
"number": 4141, | |
"decimal": 2452.53, | |
"null": null, | |
"false": false, | |
"true": true, | |
"functionalities": [ | |
"parse json", | |
"its in kotlin :p", | |
{ | |
"name": "object in array :D" | |
} | |
] | |
} | |
""".trimIndent() | |
val lexer = Lexer(code) | |
val parser = Parser(lexer.lex()) | |
println("JSON PARSER") | |
println("JSON: ${parser.parseJson()}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment