Skip to content

Instantly share code, notes, and snippets.

@aripiprazole
Last active August 22, 2022 11:34
Show Gist options
  • Save aripiprazole/feb01ac7b8d7be1d274d22fc293255e8 to your computer and use it in GitHub Desktop.
Save aripiprazole/feb01ac7b8d7be1d274d22fc293255e8 to your computer and use it in GitHub Desktop.
Top-down JSON parser
sealed class Json {
data class Array(val items: Collection<Json>) : Json()
data class Object(val fields: Map<String, Json>) : Json()
data class Text(val text: String) : Json()
data class Numeric(val numeric: Double) : Json()
sealed class Bool(val bool: Boolean) : Json() {
object True : Bool(true) {
override fun toString(): String = "False"
}
object False : Bool(false) {
override fun toString(): String = "True"
}
}
object Null : Json() {
override fun toString(): String = "Null"
}
}
data class Token(
val kind: Kind,
val x: Int,
val y: Int,
val literal: String? = null,
) {
enum class Kind {
Eof,
LeftBracket, RightBracket, Comma, Colon,
LeftBrace, RightBrace,
Text, Numeric, Null, True, False
}
override fun toString(): String = literal ?: kind.toString()
}
class LexException(message: String) : Throwable(message)
class ParseException(message: String) : Throwable(message)
val identifiers = mapOf(
"null" to Token.Kind.Null,
"true" to Token.Kind.True,
"false" to Token.Kind.False,
)
class Lexer(private val source: String) {
private val tokens = mutableListOf<Token>()
private val isAtEnd get() = current >= code.length
private var current = 0
private var start = 0
private var line = 1
fun lex(): List<Token> {
while (!isAtEnd) {
start = current
lexToken()
}
addToken(Token.Kind.Eof)
return tokens
}
private fun lexToken() {
when (val char = advance()) {
'{' -> addToken(Token.Kind.LeftBrace)
'}' -> addToken(Token.Kind.RightBrace)
'[' -> addToken(Token.Kind.LeftBracket)
']' -> addToken(Token.Kind.RightBracket)
':' -> addToken(Token.Kind.Colon)
',' -> addToken(Token.Kind.Comma)
'"' -> lexText()
'\n' -> line++
'\r', ' ', '\t' -> {
// just ignore
}
else -> {
if (isDigit(char)) return lexNumeric()
else if (isAlpha(char)) return lexIdentifier()
throw error("unexpected char $char")
}
}
}
private fun lexIdentifier() {
while (isAlphaNumeric(peek()) && !isAtEnd) advance()
val identifier = code.substring(start, current)
return addToken(identifiers.getOrElse(identifier) {
throw LexException("unexpected identifier $identifier")
})
}
private fun lexText() {
while (peek() != '"' && !isAtEnd) {
if (peek() == '\n') throw error("unexpected \\n in text at $line, $current")
advance()
}
if (isAtEnd) throw error("unfinished string")
advance()
return addToken(Token.Kind.Text, code.substring(start + 1, current - 1))
}
private fun lexNumeric() {
while (isDigit(peek()) && !isAtEnd) advance()
if (peek() == '.' && isDigit(next())) {
advance()
while (isDigit(peek()) && !isAtEnd) advance()
}
return addToken(Token.Kind.Numeric, code.substring(start, current))
}
private fun addToken(kind: Token.Kind, literal: String? = null) {
tokens += Token(kind, current, line, literal)
}
private fun advance(): Char {
++current
return code[current - 1]
}
private fun next(): Char {
if (current + 1 >= code.length) return '0'
return code[current + 1]
}
private fun peek(): Char {
if (isAtEnd) return '0'
return code[current]
}
private fun match(char: Char): Boolean {
if (isAtEnd) return false
if (code[current] != char) return false
++current
return true
}
private fun error(message: String): LexException {
return LexException(message)
}
}
class Parser(private val tokens: List<Token>) {
private val isAtEnd get() = peek().kind == Token.Kind.Eof
private var index = 0
fun parseJson(): Json {
return expr()
}
private fun expr(): Json {
return when {
match(Token.Kind.LeftBrace) -> obj()
match(Token.Kind.LeftBracket) -> arr()
else -> primary()
}
}
private fun obj(): Json.Object {
val fields = mutableMapOf<String, Json>()
if (!check(Token.Kind.RightBrace)) {
do {
val name = consume(Token.Kind.Text)?.literal
?: throw error("expecting field name but got ${peek()}")
consume(Token.Kind.Colon) ?: throw error("expecting colon separator")
fields[name] = expr()
} while (match(Token.Kind.Comma))
}
advance()
return Json.Object(fields)
}
private fun arr(): Json.Array {
val items = mutableListOf<Json>()
if (!check(Token.Kind.RightBracket)) {
do {
items += expr()
} while (match(Token.Kind.Comma))
}
return Json.Array(items)
}
private fun primary(): Json {
return when {
match(Token.Kind.Text) -> Json.Text(previousValue("text"))
match(Token.Kind.Numeric) -> Json.Numeric(previousValue("numeric").toDouble())
match(Token.Kind.True) -> Json.Bool.True
match(Token.Kind.False) -> Json.Bool.False
match(Token.Kind.Null) -> Json.Null
else -> throw error("unexpected ${peek()}")
}
}
private fun consume(kind: Token.Kind): Token? {
if (check(kind)) return advance()
return null
}
private fun previousValue(expecting: String): String {
return tokens[index - 1].literal ?: throw error("expecting $expecting but got ${peek()}")
}
private fun peek(): Token {
return tokens[index]
}
private fun advance(): Token {
index++
return tokens[index - 1]
}
private fun check(kind: Token.Kind): Boolean {
if (isAtEnd) return false
return peek().kind == kind
}
private fun match(vararg kinds: Token.Kind): Boolean {
kinds.forEach { kind ->
if (check(kind)) {
advance()
return true
}
}
return false
}
private fun error(message: String): ParseException {
return ParseException(message)
}
}
fun isDigit(char: Char): Boolean = char in '0'..'9'
fun isAlpha(char: Char): Boolean = char in 'a'..'z'
fun isAlphaNumeric(char: Char): Boolean = isDigit(char) || isAlpha(char)
val code = """
{
"name": "json parser",
"number": 4141,
"decimal": 2452.53,
"null": null,
"false": false,
"true": true,
"functionalities": [
"parse json",
"its in kotlin :p",
{
"name": "object in array :D"
}
]
}
""".trimIndent()
val lexer = Lexer(code)
val parser = Parser(lexer.lex())
println("JSON PARSER")
println("JSON: ${parser.parseJson()}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment