Skip to content

Instantly share code, notes, and snippets.

@kajuberdut
Last active March 19, 2023 22:24
Show Gist options
  • Save kajuberdut/0191ec20f14253094792cd3c00f06257 to your computer and use it in GitHub Desktop.
Save kajuberdut/0191ec20f14253094792cd3c00f06257 to your computer and use it in GitHub Desktop.
Compare JSON and CBOR in Python with GZIP compression
  • Uncompressed JSON size: 13829
  • Uncompressed CBOR size: 7816
  • GZIP JSON size: 4215
  • GZIP CBOR size: 1192
  • JSON took 0.412476 seconds to execute 1000 times.
  • CBOR took 0.466668 seconds to execute 1000 times.
  • Deserialize JSON took 0.474627 seconds to execute 1000 times.
  • Deserialize CBOR took 0.686742 seconds to execute 1000 times.
  • JSON + GZIP took 2.089109 seconds to execute 1000 times.
  • CBOR + GZIP took 0.857445 seconds to execute 1000 times.
import timeit
def time_function(func, name, number=10, *args, **kwargs):
def to_time():
func(*args, **kwargs)
elapsed_time = timeit.timeit(to_time, number=number)
print(f"{name} took {elapsed_time:.6f}" f" seconds to execute {number} times.")
def do_nothing(data):
return data
def serialize_and_compress(data, serialize_function, compress_function=do_nothing):
json_data = serialize_function(data)
data = compress_function(json_data)
return data
if __name__ == "__main__":
import cbor2
import json
import gzip
import sys
data = {str(k): str(v) for k, v in zip(range(1000), range(1000))}
number = 1000
json_value = json.dumps(data)
cbor_value = cbor2.dumps(data)
print(f"Uncompressed JSON size: {sys.getsizeof(json_value)}")
print(f"Uncompressed CBOR size: {sys.getsizeof(cbor_value)}")
json_gzip = gzip.compress(json_value.encode("utf-8"))
cbor_gzip = gzip.compress(cbor_value)
print(f"GZIP JSON size: {sys.getsizeof(json_gzip)}")
print(f"GZIP CBOR size: {sys.getsizeof(cbor_gzip)}")
time_function(
func=serialize_and_compress,
name="JSON",
data=data,
number=number,
serialize_function=lambda data: json.dumps(data),
)
time_function(
func=serialize_and_compress,
name="CBOR",
data=data,
number=number,
serialize_function=lambda data: cbor2.dumps(data),
)
time_function(
func=serialize_and_compress,
name="Deserialize JSON",
data=data,
number=number,
serialize_function=lambda data: json.loads(json_value),
)
time_function(
func=serialize_and_compress,
name="Deserialize CBOR",
data=data,
number=number,
serialize_function=lambda data: cbor2.loads(cbor_value),
)
time_function(
func=serialize_and_compress,
name="JSON + GZIP",
data=data,
number=number,
serialize_function=lambda data: json.dumps(data).encode("utf-8"),
compress_function=lambda x: gzip.compress(x),
)
time_function(
func=serialize_and_compress,
name="CBOR + GZIP",
data=data,
number=number,
serialize_function=lambda data: cbor2.dumps(data),
compress_function=lambda x: gzip.compress(x),
)
@kajuberdut
Copy link
Author

Not the only conclusion but a significant take away: CBOR+GZIP is only 28% of the size and takes only 41% of the time as JSON+GZIP.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment