Skip to content

Instantly share code, notes, and snippets.

@Jimmy-Prime
Last active March 25, 2021 08:41
Show Gist options
  • Save Jimmy-Prime/fbe5004203d8d0787804dcbf0bf18a82 to your computer and use it in GitHub Desktop.
Save Jimmy-Prime/fbe5004203d8d0787804dcbf0bf18a82 to your computer and use it in GitHub Desktop.
serialize json from scratch
import Foundation
// valid JSON format
// 1. null
// 2. String
// 3. Number
// 4. { "key": JSON, "key": JSON, ... }
// 5. [JSON, JSON, ...]
enum Status {
case startOfJSON
case inString
case inNumber
case waitForKey
case inKey
case endKey
case endObject
case done
}
enum Path {
case array
case object
case key(String)
}
struct State {
var status: Status = .startOfJSON
var paths: [Path] = []
var bufferStartIndex: Int = 0
var escapeNext: Bool = false
var index: Int = 0
}
extension UInt8 {
static var leftCurlyBracket: UInt8 { 123 }
static var rightCurlyBracket: UInt8 { 125 }
static var leftSquareBracket: UInt8 { 91 }
static var rightSquareBracket: UInt8 { 93 }
static var comma: UInt8 { 44 }
static var quotation: UInt8 { 34 }
static var colon: UInt8 { 58 }
static var tab: UInt8 { 9 }
static var newline: UInt8 { 10 }
static var space: UInt8 { 32 }
// carriage return
// other valid control characters?
static var escapeControl: UInt8 { 92 }
static var n: UInt8 { 110 }
static var u: UInt8 { 117 }
static var l: UInt8 { 108 }
var isDecimal: Bool {
(48...57).contains(self)
}
var isWhiteSpace: Bool {
self == .tab || self == .newline || self == .space
}
}
struct IllForm: Error {
let byte: UInt8
let state: State
}
class Serializer {
let data: Data
var state = State()
init(data: Data) {
self.data = data
}
func serialize() throws {
while state.index < data.count {
let byte = data[state.index]
// print("before: byte: \(byte), state: \(state)")
switch state.status {
case .startOfJSON:
try handleStartOfJSON(byte)
case .inString:
try handleInString(byte)
case .inNumber:
try handleInNumber(byte)
case .waitForKey:
try handleWaitForKey(byte)
case .inKey:
try handleInKey(byte)
case .endKey:
try handleEndKey(byte)
case .endObject:
try handleEndObject(byte)
case .done:
throw IllForm(byte: byte, state: state)
}
// print("after: byte: \(byte), state: \(state)")
// print("---")
state.index += 1
}
guard case .done = state.status else {
throw IllForm(byte: 0, state: state)
}
}
func handleStartOfJSON(_ byte: UInt8) throws {
switch byte {
case .leftCurlyBracket:
// start of object
state.status = .waitForKey
state.paths.append(.object)
case .leftSquareBracket:
// start of array
state.paths.append(.array)
case .quotation:
// start of string
state.status = .inString
state.bufferStartIndex = state.index + 1
case byte where byte.isDecimal:
// start of number
state.status = .inNumber
state.bufferStartIndex = state.index
case byte where byte.isWhiteSpace:
// ignore
break
case .n:
// check for null
guard data.count > state.index + 3,
data[state.index + 1] == .u,
data[state.index + 2] == .l,
data[state.index + 3] == .l else {
throw IllForm(byte: byte, state: state)
}
state.index += 3
state.status = .endObject
// print("get value null")
popLastKeyIfNeeded()
default:
throw IllForm(byte: byte, state: state)
}
}
func handleInString(_ byte: UInt8) throws {
if state.escapeNext {
state.escapeNext = false
return
}
switch byte {
case .quotation:
// end of string
state.status = .endObject
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else {
throw IllForm(byte: byte, state: state)
}
// print("get value: \(string)")
state.bufferStartIndex = 0
popLastKeyIfNeeded()
case .escapeControl:
state.escapeNext = true
fallthrough
default:
break
}
}
func handleInNumber(_ byte: UInt8) throws {
// TODO add floating point
// TODO add scientific notation
// TODO cannot have leading zero
switch byte {
case byte where byte.isDecimal:
break
default:
state.status = .endObject
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else {
throw IllForm(byte: byte, state: state)
}
// print("get value: \(string)")
state.bufferStartIndex = 0
popLastKeyIfNeeded()
try handleEndObject(byte)
}
}
func handleWaitForKey(_ byte: UInt8) throws {
switch byte {
case .quotation:
// start of key
state.status = .inKey
state.bufferStartIndex = state.index + 1
case byte where byte.isWhiteSpace:
// ignore
break
default:
throw IllForm(byte: byte, state: state)
}
}
func handleInKey(_ byte: UInt8) throws {
if state.escapeNext {
state.escapeNext = false
return
}
switch byte {
case .quotation:
// end of key
state.status = .endKey
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else {
throw IllForm(byte: byte, state: state)
}
// print("get key: \(string)")
state.paths.append(.key(string))
state.bufferStartIndex = 0
case .escapeControl:
state.escapeNext = true
fallthrough
default:
break
}
}
func handleEndKey(_ byte: UInt8) throws {
switch byte {
case .colon:
// handle value, and value is a JSON
state.status = .startOfJSON
case byte where byte.isWhiteSpace:
// ignore
break
default:
throw IllForm(byte: byte, state: state)
}
}
func handleEndObject(_ byte: UInt8) throws {
switch byte {
case byte where byte.isWhiteSpace:
// ignore
break
case .comma:
guard let lastPath = state.paths.last else {
throw IllForm(byte: byte, state: state)
}
switch lastPath {
case .array:
state.status = .startOfJSON
case .object:
state.status = .waitForKey
case .key:
throw IllForm(byte: byte, state: state)
}
case .rightCurlyBracket:
// end of outer object
guard case .object = state.paths.popLast() else {
throw IllForm(byte: byte, state: state)
}
popLastKeyIfNeeded()
// no status change
case .rightSquareBracket:
// end of outer array
guard case .array = state.paths.popLast() else {
throw IllForm(byte: byte, state: state)
}
popLastKeyIfNeeded()
default:
throw IllForm(byte: byte, state: state)
}
if state.paths.isEmpty {
state.status = .done
}
}
func popLastKeyIfNeeded() {
if case .key = state.paths.last {
state.paths.removeLast()
}
}
}
let string = """
{
"id": 100,
"name": "jimmylee",
"version": [0, 0, 1],
"avatar": null,
"prop": {
"start": "a",
"end": "b",
"favorite": [1, 2, 3],
"pref": {
"zone": "GMT",
"format": "yyyyMMMdd"
}
}
}
"""
let data = Data(string.utf8)
let serializer = Serializer(data: data)
try serializer.serialize()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment