Skip to content

Instantly share code, notes, and snippets.

@iOliverNguyen
Last active February 27, 2019 08:11
Show Gist options
  • Save iOliverNguyen/9fbabdf0d7aba3195d4d23d52665d61b to your computer and use it in GitHub Desktop.
Save iOliverNguyen/9fbabdf0d7aba3195d4d23d52665d61b to your computer and use it in GitHub Desktop.
Minimal JSON parser which works with correct input only
// Minimal JSON parser which works with correct input only.
// Usecase:
// 1. Walk through unstructured json
// 2. Transform unstructured json
// without fully unmarshalling it into a map[string]interface{}
//
// Caution: Behaviour is undefined on invalid json. Use on trusted input only.
package µjson
import (
"fmt"
"strings"
)
func Walk(s []byte, i int, fn func(st int, key, value string) bool) error {
var si, ei, st int
var key string
// fn returns false to skip a whole array or object
sst := 1024
// Trim the last newline
if len(s) > 0 && s[len(s)-1] == '\n' {
s = s[:len(s)-1]
}
value:
si = i
switch s[i] {
case 'n', 't': // null, true
i += 4
ei = i
if st <= sst {
fn(st, key, string(s[si:i]))
}
key = ""
goto closing
case 'f': // false
i += 5
ei = i
if st <= sst {
fn(st, key, string(s[si:i]))
}
key = ""
goto closing
case '{', '[':
if st <= sst && !fn(st, key, string(s[i])) {
sst = st
}
key = ""
st++
i++
if s[i] == '}' || s[i] == ']' {
goto closing
}
goto value
case '"': // scan string
for {
i++
switch s[i] {
case '\\': // \. - skip 2
i++
case '"': // end of string
i++
ei = i // space, ignore
for s[i] == ' ' ||
s[i] == '\t' ||
s[i] == '\n' ||
s[i] == '\r' {
i++
}
if s[i] != ':' {
if st <= sst {
fn(st, key, string(s[si:ei]))
}
key = ""
}
goto closing
}
}
case ' ', '\t', '\n', '\r': // space, ignore
i++
goto value
default: // scan number
for i < len(s) {
switch s[i] {
case ',', '}', ']', ' ', '\t', '\n', '\r':
ei = i
for s[i] == ' ' ||
s[i] == '\t' ||
s[i] == '\n' ||
s[i] == '\r' {
i++
}
if st <= sst {
fn(st, key, string(s[si:ei]))
}
key = ""
goto closing
}
i++
}
}
closing:
if i >= len(s) {
return nil
}
switch s[i] {
case ':':
key = string(s[si:ei])
i++
goto value
case ',':
i++
goto value
case ']', '}':
st--
if st == sst {
sst = 1024
} else {
fn(st, "", string(s[i]))
}
if st <= 0 {
return nil
}
i++
goto closing
case ' ', '\t', '\n', '\r':
i++ // space, ignore
goto closing
default:
return parseError(i, s[i], `expect ']', '}' or ','`)
}
}
func parseError(i int, c byte, msg string) error {
return fmt.Errorf("json error at %v '%c' 0x%2x: %v", i, c, c, msg)
}
func ShouldAddComma(value string, lastChar byte) bool {
return value != "}" && value != "]" &&
lastChar != ',' && lastChar != '{' && lastChar != '['
}
func Reconstruct(s []byte) ([]byte, error) {
b := make([]byte, 0, 1024)
err := Walk(s, 0, func(st int, key, value string) bool {
if len(b) != 0 && ShouldAddComma(value, b[len(b)-1]) {
b = append(b, ',')
}
if key != "" {
b = append(b, key...)
b = append(b, ':')
}
b = append(b, value...)
return true
})
return b, err
}
func FilterAndRename(b []byte, input []byte) (output []byte, _ error) {
err := Walk(input, 0, func(st int, key, value string) bool {
// Ignore fields with null value
if value == "null" {
return true
}
wrap := false
if key != "" {
// Remove quotes
key = key[1 : len(key)-1]
// Skip _ids
if strings.HasSuffix(key, "_ids") {
return false
}
// Rename external_ to x_
if strings.HasPrefix(key, "external_") {
key = "x_" + key[len("external_"):]
} else if (key == "id" || strings.HasSuffix(key, "_id")) &&
value[0] >= '0' && value[0] <= '9' {
wrap = true
}
}
if len(b) != 0 && ShouldAddComma(value, b[len(b)-1]) {
b = append(b, ',')
}
if key != "" {
b = append(b, '"')
b = append(b, key...)
b = append(b, '"')
b = append(b, ':')
}
if wrap {
b = append(b, '"')
b = append(b, value...)
b = append(b, '"')
} else {
b = append(b, value...)
}
return true
})
return b, err
}
package µjson
import (
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestWalk(t *testing.T) {
tests := []struct {
inp string
exp string
}{
{
`null`,
`
0 null`,
},
{
"null\n", // end with newline
`
0 null`,
},
{
`{}`,
`
0 {
0 }`,
},
{
`{"foo":""}`,
`
0 {
1 "foo" ""
0 }`,
},
{
`{"foo": ""}`, // Space
`
0 {
1 "foo" ""
0 }`,
},
{
`{"foo":"bar"}`,
`
0 {
1 "foo" "bar"
0 }`,
},
{
`{"foo":"bar","baz":""}`,
`
0 {
1 "foo" "bar"
1 "baz" ""
0 }`,
},
{
`{ "foo" : "bar" , "baz" : 2 }`, // Space
`
0 {
1 "foo" "bar"
1 "baz" 2
0 }`,
},
{
`{"foo":null}`,
`
0 {
1 "foo" null
0 }`,
},
{
`{"foo":123}`,
`
0 {
1 "foo" 123
0 }`,
},
{
`{"foo":-123}`,
`
0 {
1 "foo" -123
0 }`,
},
{
`{"foo":42.1}`,
`
0 {
1 "foo" 42.1
0 }`,
},
{
`{"foo":+0}`,
`
0 {
1 "foo" +0
0 }`,
},
{
`{"foo":"b\"ar"}`,
`
0 {
1 "foo" "b\"ar"
0 }`,
},
{
`{"😀":"🎶\""}`,
`
0 {
1 "😀" "🎶\""
0 }`,
},
{
`{"foo":{}}`,
`
0 {
1 "foo" {
1 }
0 }`,
},
{
`{"foo":{"bar":false,"baz":true,"quix":null}}`,
`
0 {
1 "foo" {
2 "bar" false
2 "baz" true
2 "quix" null
1 }
0 }`,
},
{
`{"1":{"1.1":{"1.1.1":"foo","1.1.2":"bar"},"1.2":{"1.2.1":"baz"}}}`,
`
0 {
1 "1" {
2 "1.1" {
3 "1.1.1" "foo"
3 "1.1.2" "bar"
2 }
2 "1.2" {
3 "1.2.1" "baz"
2 }
1 }
0 }`,
},
{
`[]`,
`
0 [
0 ]`,
},
{
`[null]`,
`
0 [
1 null
0 ]`,
},
{
`[0]`,
`
0 [
1 0
0 ]`,
},
{
`["foo"]`,
`
0 [
1 "foo"
0 ]`,
},
{
`["",""]`,
`
0 [
1 ""
1 ""
0 ]`,
},
{
`["foo","bar"]`,
`
0 [
1 "foo"
1 "bar"
0 ]`,
},
{
`[[]]`,
`
0 [
1 [
1 ]
0 ]`,
},
{
`[{},[]]`,
`
0 [
1 {
1 }
1 [
1 ]
0 ]`,
},
{
`{"foo":[]}`,
`
0 {
1 "foo" [
1 ]
0 }`,
},
{
`{"foo":[{"k":"v"}]}`,
`
0 {
1 "foo" [
2 {
3 "k" "v"
2 }
1 ]
0 }`,
},
{
`{"foo":[{"k1":"v1","k2":"v2"}]}`,
`
0 {
1 "foo" [
2 {
3 "k1" "v1"
3 "k2" "v2"
2 }
1 ]
0 }`,
},
{
`{"foo":[{"k1.1":"v1.1","k1.2":"v1.2"},{"k2.1":"v2.1"}],"bar":{}}`,
`
0 {
1 "foo" [
2 {
3 "k1.1" "v1.1"
3 "k1.2" "v1.2"
2 }
2 {
3 "k2.1" "v2.1"
2 }
1 ]
1 "bar" {
1 }
0 }`,
},
{
`{"1":[{"2":{"k1":"v1","k2":"v2"}}]}`,
`
0 {
1 "1" [
2 {
3 "2" {
4 "k1" "v1"
4 "k2" "v2"
3 }
2 }
1 ]
0 }`,
},
{
`{"1":[{"2":[{"k1":"v1","k2":"v2"},{"k3":"v3"}]}]}`,
`
0 {
1 "1" [
2 {
3 "2" [
4 {
5 "k1" "v1"
5 "k2" "v2"
4 }
4 {
5 "k3" "v3"
4 }
3 ]
2 }
1 ]
0 }`,
},
{
`{ "1" : [ { "2": [ { "k1" : "v1" , "k2" : "v2" } ,{"k3":"v3" } ] } ] }`,
`
0 {
1 "1" [
2 {
3 "2" [
4 {
5 "k1" "v1"
5 "k2" "v2"
4 }
4 {
5 "k3" "v3"
4 }
3 ]
2 }
1 ]
0 }`,
},
}
for _, tt := range tests {
t.Run("Walk/"+tt.inp, func(t *testing.T) {
var b strings.Builder
err := Walk([]byte(tt.inp), 0,
func(st int, key, value string) bool {
fmt.Fprintf(&b, "\n%v %v %v", st, key, value)
return true
})
if err != nil {
t.Error(err)
} else if b.String() != tt.exp {
t.Errorf("\nExpect: `%v`\nOutput: `%v`\n", tt.exp, b.String())
}
})
}
for _, tt := range tests {
t.Run("Reconstruct/"+tt.inp, func(t *testing.T) {
// Handle the sepcial testcase ending with \n
exp := tt.inp
if exp[len(exp)-1] == '\n' {
exp = exp[:len(exp)-1]
}
exp = strings.Replace(exp, " ", "", -1)
data, err := Reconstruct([]byte(tt.inp))
if err != nil {
t.Error(err)
} else if s := string(data); s != exp {
t.Errorf("\nExpect: %v\nOutput: %v\n", exp, s)
}
})
}
}
func TestFilterAndRename(t *testing.T) {
msg := `{"external_id":"123","product_id":123456789,"amount":100000,"supplier_ids":[12345678,12345679],"tags":[123],"foo":null,"x":{"bar":null,"baz":false},"y":{}}`
var b []byte
b, err := FilterAndRename(b, []byte(msg))
assert.NoError(t, err)
// TODO: Wrap ids in array
exp := `{"x_id":"123","product_id":"123456789","amount":100000,"tags":[123],"x":{"baz":false},"y":{}}`
assert.Equal(t, exp, string(b))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment