Last active
February 8, 2018 08:52
-
-
Save dduan/8884d5bf1987f761f3a7973fb92d2afa to your computer and use it in GitHub Desktop.
A hand-made recursive descend JSON parser for fun.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
enum State { | |
enum String { | |
case normal | |
case escape | |
case unicode(Int) | |
} | |
} | |
final class Context { | |
let contents: [UnicodeScalar] | |
var position: Int | |
init(_ contents: String) { | |
self.contents = Array(contents.unicodeScalars) | |
self.position = 0 | |
} | |
@inline(__always) | |
func current() -> UnicodeScalar { | |
return self.contents[self.position] | |
} | |
var exhausted: Bool { | |
return self.contents.count == self.position | |
} | |
} | |
enum ParseError: Error { | |
case invalidEscapeCharacter | |
case invalidUnicodeEscapeSequence | |
case missingClosingQuoteInString | |
case invalidNumber | |
case unexpectedTerminationWhileParsingNumber(String) | |
case unexpectedTerminationWhileParsingTrue | |
case unexpectedTerminationWhileParsingFalse | |
case unexpectedTerminationWhileParsingNull | |
case invalidToken | |
case missingKey | |
case missingExpectedCharacter(UnicodeScalar) | |
case duplicateKey(String) | |
case invalidCharacter(UnicodeScalar) | |
} | |
private let escapeCharacterMap: [UnicodeScalar: UnicodeScalar] = [ | |
"\"": "\"", "\\": "\\", "/": "/", "b": "\u{8}", | |
"f": "\u{c}", "n": "\n", "r": "\r", "t": "\t", | |
] | |
private let escapeCharacters = [UnicodeScalar](escapeCharacterMap.keys) | |
private let hexCharacters = [UnicodeScalar]("0123456789abcdef".unicodeScalars) | |
private let numberCharacters = [UnicodeScalar]("0123456789+-eE.".unicodeScalars) | |
@inline(__always) | |
func parseString(_ context: Context) throws -> (String, Context) { | |
var result = [UnicodeScalar]() | |
result.reserveCapacity(context.contents.count - context.position) | |
var state = State.String.normal | |
context.position += 1 // eat " | |
while context.current() != "\"" { | |
if context.position == context.contents.count { | |
throw ParseError.missingClosingQuoteInString | |
} | |
let c = context.current() | |
switch state { | |
case .normal: | |
if c == "\\" { | |
state = .escape | |
} else { | |
result.append(c) | |
} | |
case .escape: | |
if escapeCharacters.contains(c) { | |
result.append(escapeCharacterMap[c]!) | |
state = .normal | |
} else if c == "u" { | |
state = .unicode(0) | |
} else { | |
throw ParseError.invalidEscapeCharacter | |
} | |
case .unicode(let count): | |
if !hexCharacters.contains(c) { | |
throw ParseError.invalidUnicodeEscapeSequence | |
} | |
if count != 3 { | |
state = .unicode(count + 1) | |
} else { | |
let hexStart = context.position - 4 | |
let hexEnd = context.position | |
let hexString = String(String.UnicodeScalarView(context.contents[hexStart..<hexEnd])) | |
guard let character = UnicodeScalar(Int(hexString, radix: 16)!) else { | |
throw ParseError.invalidUnicodeEscapeSequence | |
} | |
result.append(character) | |
state = .normal | |
} | |
} | |
context.position += 1 | |
} | |
context.position += 1 // eat " | |
return (String(String.UnicodeScalarView(result)), context) | |
} | |
@inline(__always) | |
func parseNumber(_ context: Context) throws -> (Double, Context) { | |
let start = context.position | |
var end = start | |
while end < context.contents.count && numberCharacters.contains(context.contents[end]) { | |
end += 1 | |
} | |
let scalars = context.contents[start..<end] | |
let numberString = String(String.UnicodeScalarView(scalars)) | |
guard let number = Double(numberString) else { | |
throw ParseError.invalidNumber | |
} | |
context.position = end | |
return (number, context) | |
} | |
@discardableResult | |
@inline(__always) | |
func parseTrue(_ context: Context) throws -> (Bool, Context) { | |
let start = context.position | |
let end = start + 4 | |
guard end < context.contents.count else { | |
throw ParseError.unexpectedTerminationWhileParsingTrue | |
} | |
guard Array(context.contents[start..<end]) == ["t", "r", "u", "e"] else { | |
throw ParseError.invalidToken | |
} | |
context.position = end | |
return (true, context) | |
} | |
@discardableResult | |
@inline(__always) | |
func parseFalse(_ context: Context) throws -> (Bool, Context) { | |
let start = context.position | |
let end = start + 5 | |
guard end < context.contents.count else { | |
throw ParseError.unexpectedTerminationWhileParsingFalse | |
} | |
guard Array(context.contents[start..<end]) == ["f", "a", "l", "s", "e"] else { | |
throw ParseError.invalidToken | |
} | |
context.position = end | |
return (false, context) | |
} | |
@discardableResult | |
@inline(__always) | |
func parseNull(_ context: Context) throws -> Context { | |
let start = context.position | |
let end = start + 4 | |
guard end < context.contents.endIndex else { | |
throw ParseError.unexpectedTerminationWhileParsingNull | |
} | |
guard Array(context.contents[start..<end]) == ["n", "u", "l", "l"] else { | |
throw ParseError.invalidToken | |
} | |
context.position = end | |
return context | |
} | |
@discardableResult | |
@inline(__always) | |
func skipWhitespace(_ context: Context) -> Context { | |
while context.current() == " " | |
|| context.current() == "\t" | |
|| context.current() == "\n" | |
|| context.current() == "\r" | |
{ | |
context.position += 1 | |
} | |
return context | |
} | |
indirect enum JSON { | |
case null | |
case bool(Bool) | |
case number(Double) | |
case string(String) | |
case array([JSON]) | |
case object([String: JSON]) | |
} | |
extension JSON: CustomStringConvertible { | |
var description: String { | |
switch self { | |
case .null: | |
return "null" | |
case .bool(let b): | |
return b ? "true" : "false" | |
case .number(let n): | |
return "\(n)" | |
case .string(let s): | |
return "\"\(s)\"" | |
case .array(let array): | |
let arrayDescription = array | |
.map { "\($0)" } | |
.joined(separator: ", ") | |
return "[\(arrayDescription)]" | |
case .object(let d): | |
let dDescription = d | |
.map { "\($0): \($1)" } | |
.joined(separator: ", ") | |
return "{\(dDescription)}" | |
} | |
} | |
} | |
@discardableResult | |
@inline(__always) | |
func eat(_ code: UnicodeScalar, context: Context) throws -> Context { | |
guard context.current() == code else { | |
throw ParseError.missingExpectedCharacter(code) | |
} | |
context.position += 1 | |
return context | |
} | |
func parseObject(_ context: Context) throws -> ([String: JSON], Context) { | |
var result = [String: JSON]() | |
// try eat("{", context: context) | |
context.position += 1 | |
skipWhitespace(context) | |
while context.current() != "}" { | |
guard context.current() == "\"" else { | |
throw ParseError.missingKey | |
} | |
let (key, _) = try parseString(context) | |
skipWhitespace(context) | |
try eat(":", context: context) | |
let (value, _) = try parse(context) | |
guard !result.keys.contains(key) else { | |
throw ParseError.duplicateKey(key) | |
} | |
result[key] = value | |
skipWhitespace(context) | |
if context.current() != "}" { | |
try eat(",", context: context) | |
skipWhitespace(context) | |
} | |
} | |
context.position += 1 // eat "}" | |
return (result, context) | |
} | |
func parseArray(_ context: Context) throws -> ([JSON], Context) { | |
var result = [JSON]() | |
// try eat("[", context: context) | |
context.position += 1 | |
skipWhitespace(context) | |
while context.current() != "]" { | |
let (value, _) = try parse(context) | |
result.append(value) | |
skipWhitespace(context) | |
if context.current() != "]" { | |
try eat(",", context: context) | |
skipWhitespace(context) | |
} | |
} | |
context.position += 1 | |
return (result, context) | |
} | |
func parse(_ context: Context) throws -> (JSON, Context) { | |
skipWhitespace(context) | |
switch context.current() { | |
case "{": | |
let (object, _) = try parseObject(context) | |
return (.object(object), context) | |
case "[": | |
let (array, _) = try parseArray(context) | |
return (.array(array), context) | |
case "\"": | |
let (string, _) = try parseString(context) | |
return (.string(string), context) | |
case "t": | |
let _ = try parseTrue(context) | |
return (.bool(true), context) | |
case "f": | |
let _ = try parseTrue(context) | |
return (.bool(false), context) | |
case "n": | |
let _ = try parseNull(context) | |
return (.null, context) | |
case "-", "0"..."9": | |
let (n, _) = try parseNumber(context) | |
return (.number(n), context) | |
default: | |
throw ParseError.invalidCharacter(context.current()) | |
} | |
} | |
import Foundation | |
let s = "{\n\"name\": \"John Doe\",\n\"age\": 43,\n\"phones\": [\n\"+44 1234567\",\n\"+44 2345678\"\n]\n}" | |
let context = Context(s) | |
let start = Date() | |
var n = 10000 | |
while n != 0 { | |
context.position = 0 | |
try parse(context) | |
n -= 1 | |
} | |
let end = Date() | |
print(end.timeIntervalSince(start)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment