Skip to content

Instantly share code, notes, and snippets.

@silmeth
Last active October 13, 2017 19:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save silmeth/cab56d9a40a72f4ca39faaa603794280 to your computer and use it in GitHub Desktop.
Save silmeth/cab56d9a40a72f4ca39faaa603794280 to your computer and use it in GitHub Desktop.
Example of parsing (subset of) JSON using Kotlin better-parse (https://github.com/h0tk3y/better-parse) parser combinators
import com.github.h0tk3y.betterParse.grammar.parseToEnd
object Main {
@JvmStatic
fun main(args: Array<String>) {
val json = """
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": 100,
"Visible": true
},
"Animated" : false,
"IDs": [1.16E2, 943, 234, 38793],
"Array of objects": [{}, {"type": "object in an array"}, null],
"Escaped characters": "\n\r\"\t\\",
"Non-escaped unicode characters" : "Ążćřǫ × 38.0e5¹²³"
}
}
"""
val grammar = SimpleJsonGrammar()
val parsed = grammar.parseToEnd(json)
assert(parsed is Map<*, *>)
val map: Map<String, Any?> = parsed as Map<String, Any?>
assert(map["Width"] == 800.0)
assert(map["Title"] == "View from 15th Floor")
assert(map["IDs"] == listOf(116.0, 943.0, 234.0, 38793.0))
println(parsed)
}
}
import com.github.h0tk3y.betterParse.combinators.and
import com.github.h0tk3y.betterParse.combinators.asJust
import com.github.h0tk3y.betterParse.combinators.map
import com.github.h0tk3y.betterParse.combinators.optional
import com.github.h0tk3y.betterParse.combinators.or
import com.github.h0tk3y.betterParse.combinators.separated
import com.github.h0tk3y.betterParse.combinators.skip
import com.github.h0tk3y.betterParse.combinators.use
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.parser.Parser
class SimpleJsonGrammar : Grammar<Any?>() {
val stringLiteral by token("\"[^\\\\\"]*(\\\\[\"nrtbf\\\\][^\\\\\"]*)*\"")
val whiteSpace by token("\\s+", true)
val colon by token(":")
val openingBrace by token("\\{")
val closingBrace by token("\\}")
val openingBracket by token("\\[")
val closingBracket by token("\\]")
val nullToken by token("\\bnull\\b")
val trueToken by token("\\btrue\\b")
val falseToken by token("\\bfalse\\b")
val jsonNull: Parser<Any?> = nullToken asJust null
val jsonBool: Parser<Boolean> = (trueToken asJust true) or (falseToken asJust false)
val comma by token(",")
val integer by token("\\d+")
val dot by token("\\.")
val exponent by token("[eE]")
val string: Parser<String> = stringLiteral use { text.substring(1, text.lastIndex - 1) }
val exponentPart = skip(exponent) and integer
val floatingPointPart = skip(dot) and optional(integer)
val onlyFloatingPart = skip(dot) and integer
val number: Parser<Double> = ((integer and optional(floatingPointPart))
.map { (int, floatPart) ->
int.text + (floatPart?.let { ".${it.text}" } ?: "")
} or
(onlyFloatingPart map { ".${it.text}" } ) and
optional(exponentPart map { "e${it.text}" }))
.map { (p1, p2) ->
(p1 + (p2 ?: "")).toDouble()
}
val jsonPrimitiveValue: Parser<Any?> = jsonNull or jsonBool or string or number
val jsonObject: Parser<Map<String, Any?>> = (skip(openingBrace) and
separated(string and skip(colon) and parser(this::jsonValue), comma, true) and
skip(closingBrace))
.map {
it.terms.map {(key, v) -> Pair(key, v) }.toMap()
}
val jsonArray: Parser<List<Any?>> = (skip(openingBracket) and
separated(parser(this::jsonValue), comma, true) and
skip(closingBracket))
.map { it.terms }
val jsonValue: Parser<Any?> = jsonPrimitiveValue or jsonObject or jsonArray
override val rootParser = jsonValue
}
@silmeth
Copy link
Author

silmeth commented Oct 12, 2017

Escaped unicode characters (eg. "\u2192\uD83D\uDE00") are not parsed by this grammar atm.

@silmeth
Copy link
Author

silmeth commented Oct 12, 2017

Also, even though it correctly recognizes regular escaped special characters (eg. "This string,\ncalled \"inner quoted\"\nparses just fine") currently parses as:
This string,\ncalled \"inner quoted\"\nparses just fine
instead of expected:

This string,
called "inner quoted"
parses just fine

But this is easy to correct (one needs to handle escape characters during mapping of the string parser).

@silmeth
Copy link
Author

silmeth commented Oct 13, 2017

Improved version of this parser lives in this repo.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment