Skip to content

Instantly share code, notes, and snippets.

@dduan
Last active February 8, 2018 08:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dduan/8884d5bf1987f761f3a7973fb92d2afa to your computer and use it in GitHub Desktop.
Save dduan/8884d5bf1987f761f3a7973fb92d2afa to your computer and use it in GitHub Desktop.
A hand-made recursive descend JSON parser for fun.
enum State {
enum String {
case normal
case escape
case unicode(Int)
}
}
final class Context {
let contents: [UnicodeScalar]
var position: Int
init(_ contents: String) {
self.contents = Array(contents.unicodeScalars)
self.position = 0
}
@inline(__always)
func current() -> UnicodeScalar {
return self.contents[self.position]
}
var exhausted: Bool {
return self.contents.count == self.position
}
}
enum ParseError: Error {
case invalidEscapeCharacter
case invalidUnicodeEscapeSequence
case missingClosingQuoteInString
case invalidNumber
case unexpectedTerminationWhileParsingNumber(String)
case unexpectedTerminationWhileParsingTrue
case unexpectedTerminationWhileParsingFalse
case unexpectedTerminationWhileParsingNull
case invalidToken
case missingKey
case missingExpectedCharacter(UnicodeScalar)
case duplicateKey(String)
case invalidCharacter(UnicodeScalar)
}
private let escapeCharacterMap: [UnicodeScalar: UnicodeScalar] = [
"\"": "\"", "\\": "\\", "/": "/", "b": "\u{8}",
"f": "\u{c}", "n": "\n", "r": "\r", "t": "\t",
]
private let escapeCharacters = [UnicodeScalar](escapeCharacterMap.keys)
private let hexCharacters = [UnicodeScalar]("0123456789abcdef".unicodeScalars)
private let numberCharacters = [UnicodeScalar]("0123456789+-eE.".unicodeScalars)
@inline(__always)
func parseString(_ context: Context) throws -> (String, Context) {
var result = [UnicodeScalar]()
result.reserveCapacity(context.contents.count - context.position)
var state = State.String.normal
context.position += 1 // eat "
while context.current() != "\"" {
if context.position == context.contents.count {
throw ParseError.missingClosingQuoteInString
}
let c = context.current()
switch state {
case .normal:
if c == "\\" {
state = .escape
} else {
result.append(c)
}
case .escape:
if escapeCharacters.contains(c) {
result.append(escapeCharacterMap[c]!)
state = .normal
} else if c == "u" {
state = .unicode(0)
} else {
throw ParseError.invalidEscapeCharacter
}
case .unicode(let count):
if !hexCharacters.contains(c) {
throw ParseError.invalidUnicodeEscapeSequence
}
if count != 3 {
state = .unicode(count + 1)
} else {
let hexStart = context.position - 4
let hexEnd = context.position
let hexString = String(String.UnicodeScalarView(context.contents[hexStart..<hexEnd]))
guard let character = UnicodeScalar(Int(hexString, radix: 16)!) else {
throw ParseError.invalidUnicodeEscapeSequence
}
result.append(character)
state = .normal
}
}
context.position += 1
}
context.position += 1 // eat "
return (String(String.UnicodeScalarView(result)), context)
}
@inline(__always)
func parseNumber(_ context: Context) throws -> (Double, Context) {
let start = context.position
var end = start
while end < context.contents.count && numberCharacters.contains(context.contents[end]) {
end += 1
}
let scalars = context.contents[start..<end]
let numberString = String(String.UnicodeScalarView(scalars))
guard let number = Double(numberString) else {
throw ParseError.invalidNumber
}
context.position = end
return (number, context)
}
@discardableResult
@inline(__always)
func parseTrue(_ context: Context) throws -> (Bool, Context) {
let start = context.position
let end = start + 4
guard end < context.contents.count else {
throw ParseError.unexpectedTerminationWhileParsingTrue
}
guard Array(context.contents[start..<end]) == ["t", "r", "u", "e"] else {
throw ParseError.invalidToken
}
context.position = end
return (true, context)
}
@discardableResult
@inline(__always)
func parseFalse(_ context: Context) throws -> (Bool, Context) {
let start = context.position
let end = start + 5
guard end < context.contents.count else {
throw ParseError.unexpectedTerminationWhileParsingFalse
}
guard Array(context.contents[start..<end]) == ["f", "a", "l", "s", "e"] else {
throw ParseError.invalidToken
}
context.position = end
return (false, context)
}
@discardableResult
@inline(__always)
func parseNull(_ context: Context) throws -> Context {
let start = context.position
let end = start + 4
guard end < context.contents.endIndex else {
throw ParseError.unexpectedTerminationWhileParsingNull
}
guard Array(context.contents[start..<end]) == ["n", "u", "l", "l"] else {
throw ParseError.invalidToken
}
context.position = end
return context
}
@discardableResult
@inline(__always)
func skipWhitespace(_ context: Context) -> Context {
while context.current() == " "
|| context.current() == "\t"
|| context.current() == "\n"
|| context.current() == "\r"
{
context.position += 1
}
return context
}
indirect enum JSON {
case null
case bool(Bool)
case number(Double)
case string(String)
case array([JSON])
case object([String: JSON])
}
extension JSON: CustomStringConvertible {
var description: String {
switch self {
case .null:
return "null"
case .bool(let b):
return b ? "true" : "false"
case .number(let n):
return "\(n)"
case .string(let s):
return "\"\(s)\""
case .array(let array):
let arrayDescription = array
.map { "\($0)" }
.joined(separator: ", ")
return "[\(arrayDescription)]"
case .object(let d):
let dDescription = d
.map { "\($0): \($1)" }
.joined(separator: ", ")
return "{\(dDescription)}"
}
}
}
@discardableResult
@inline(__always)
func eat(_ code: UnicodeScalar, context: Context) throws -> Context {
guard context.current() == code else {
throw ParseError.missingExpectedCharacter(code)
}
context.position += 1
return context
}
func parseObject(_ context: Context) throws -> ([String: JSON], Context) {
var result = [String: JSON]()
// try eat("{", context: context)
context.position += 1
skipWhitespace(context)
while context.current() != "}" {
guard context.current() == "\"" else {
throw ParseError.missingKey
}
let (key, _) = try parseString(context)
skipWhitespace(context)
try eat(":", context: context)
let (value, _) = try parse(context)
guard !result.keys.contains(key) else {
throw ParseError.duplicateKey(key)
}
result[key] = value
skipWhitespace(context)
if context.current() != "}" {
try eat(",", context: context)
skipWhitespace(context)
}
}
context.position += 1 // eat "}"
return (result, context)
}
func parseArray(_ context: Context) throws -> ([JSON], Context) {
var result = [JSON]()
// try eat("[", context: context)
context.position += 1
skipWhitespace(context)
while context.current() != "]" {
let (value, _) = try parse(context)
result.append(value)
skipWhitespace(context)
if context.current() != "]" {
try eat(",", context: context)
skipWhitespace(context)
}
}
context.position += 1
return (result, context)
}
func parse(_ context: Context) throws -> (JSON, Context) {
skipWhitespace(context)
switch context.current() {
case "{":
let (object, _) = try parseObject(context)
return (.object(object), context)
case "[":
let (array, _) = try parseArray(context)
return (.array(array), context)
case "\"":
let (string, _) = try parseString(context)
return (.string(string), context)
case "t":
let _ = try parseTrue(context)
return (.bool(true), context)
case "f":
let _ = try parseTrue(context)
return (.bool(false), context)
case "n":
let _ = try parseNull(context)
return (.null, context)
case "-", "0"..."9":
let (n, _) = try parseNumber(context)
return (.number(n), context)
default:
throw ParseError.invalidCharacter(context.current())
}
}
import Foundation
let s = "{\n\"name\": \"John Doe\",\n\"age\": 43,\n\"phones\": [\n\"+44 1234567\",\n\"+44 2345678\"\n]\n}"
let context = Context(s)
let start = Date()
var n = 10000
while n != 0 {
context.position = 0
try parse(context)
n -= 1
}
let end = Date()
print(end.timeIntervalSince(start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment