Skip to content

Instantly share code, notes, and snippets.

@ostcar
Created January 13, 2020 22:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ostcar/2261240f8c26378a7ad40cd3de80fb84 to your computer and use it in GitHub Desktop.
Save ostcar/2261240f8c26378a7ad40cd3de80fb84 to your computer and use it in GitHub Desktop.
Test decoding of big list of ints
package main
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"strconv"
"time"
)
const idsCount = 4_000_000
func main() {
ids := make([]int, idsCount)
for i := 0; i < idsCount; i++ {
ids[i] = i
}
start := time.Now()
jsonIDs, _ := json.Marshal(ids)
end := time.Since(start)
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond)
var newIDs []int
start = time.Now()
json.Unmarshal(jsonIDs, &newIDs)
end = time.Since(start)
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond)
if !testEq(ids, newIDs) {
panic("new IDs not equal")
}
start = time.Now()
jsonIDs = encodeJSON(ids)
end = time.Since(start)
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond)
start = time.Now()
newIDs = decodeJSON(jsonIDs)
end = time.Since(start)
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond)
if !testEq(ids, newIDs) {
panic("new IDs not equal")
}
start = time.Now()
jsonIDs = encodeBytes(ids)
end = time.Since(start)
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond)
start = time.Now()
newIDs = decodeBytes(jsonIDs)
end = time.Since(start)
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond)
if !testEq(ids, newIDs) {
panic("new IDs not equal")
}
}
func encodeJSON(ids []int) []byte {
buf := []byte{'['}
for _, id := range ids {
buf = strconv.AppendInt(buf, int64(id), 10)
buf = append(buf, ',')
}
buf[len(buf)-1] = ']'
return buf
}
func decodeJSON(buf []byte) []int {
var out []int
buf = buf[1:]
var idx int
for {
idx = bytes.IndexByte(buf, ',')
if idx == -1 {
break
}
id, _ := strconv.Atoi(string(buf[:idx]))
out = append(out, id)
buf = buf[idx+1:]
}
id, _ := strconv.Atoi(string(buf[:len(buf)-1]))
out = append(out, id)
return out
}
func encodeBytes(ids []int) []byte {
buf := bytes.NewBuffer(nil)
for _, id := range ids {
binary.Write(buf, binary.LittleEndian, int32(id))
}
return buf.Bytes()
}
func decodeBytes(b []byte) []int {
r := bytes.NewReader(b)
var out []int
var id int32
for {
if err := binary.Read(r, binary.LittleEndian, &id); err == io.EOF {
break
}
out = append(out, int(id))
}
return out
}
func testEq(a, b []int) bool {
// If one is nil, the other must also be nil.
if (a == nil) != (b == nil) {
return false
}
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
import json
import time
from typing import List
ids = list(range(4_000_000))
# Encode ids to json
start = time.time()
json_ids = json.dumps(ids, separators=(",", ":"))
end = int(1000 * (time.time() - start))
print(f"encoding is {len(json_ids)} bytes, took {end} ms")
# Decode ids from json
start = time.time()
new_ids = json.loads(json_ids)
end = int(1000 * (time.time() - start))
print(f"decoding, took {end} ms")
if new_ids != ids:
raise RuntimeError()
def encode_json(ids: List[int]) -> str:
out = bytearray()
out.append(ord("["))
for id in ids:
out.extend(str(id).encode())
out.append(ord(","))
out[-1] = ord("]")
return out.decode()
def decode_json(s: str) -> List[int]:
out = []
i = 1
while True:
try:
j = s.index(",", i)
except ValueError:
break
out.append(int(s[i:j]))
i = j+1
out.append(int(s[i:-1]))
return out
# Encode ids to json
start = time.time()
json_ids = encode_json(ids)
end = int(1000 * (time.time() - start))
print(f"encoding is {len(json_ids)} bytes, took {end} ms")
# Decode ids from json
start = time.time()
new_ids = decode_json(json_ids)
end = int(1000 * (time.time() - start))
print(f"decoding, took {end} ms")
if new_ids != ids:
raise RuntimeError()
def int_to_bytes(ids: List[int]) -> bytes:
out = bytearray(len(ids)*4)
for id in ids:
out.extend(id.to_bytes(4, "big"))
return out
def bytes_to_int(b: bytes) -> List[int]:
out = []
while b:
id = int.from_bytes(b[:4], "big")
out.append(id)
b = b[4:]
return out
# Encode ids to bytes
start = time.time()
bytes_ids = int_to_bytes(ids)
end = int(1000 * (time.time() - start))
print(f"encoding is {len(json_ids)} bytes, took {end} ms")
if False:
# Devode ids from bytes
start = time.time()
new_ids = bytes_to_int(bytes_ids)
end = int(1000 * (time.time() - start))
if new_ids != ids:
raise RuntimeError()
print(f"decoding, took {end} ms")
$ python bigjson.py
encoding is 30888891 bytes, took 404 ms
decoding, took 349 ms
encoding is 30888891 bytes, took 1440 ms
decoding, took 1790 ms
encoding is 30888891 bytes, took 622 ms
$ go run bigjson.go
encoded is 30888891 bytes, took 174 ms
decoding, took 893 ms
encoded is 30888891 bytes, took 111 ms
decoding, took 153 ms
encoded is 16000000 bytes, took 180 ms
decoding, took 175 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment