Skip to content

Instantly share code, notes, and snippets.

@cr0hn
Created April 24, 2020 13:01
Show Gist options
  • Save cr0hn/9b564d449efa674fe4155d71621068fa to your computer and use it in GitHub Desktop.
Save cr0hn/9b564d449efa674fe4155d71621068fa to your computer and use it in GitHub Desktop.
Analysis of performance and memory consumption of pickle vs json
Number of elements: 10
-----------------------
Json Size: 0.02715015411376953 MB
Json time: 0.006659951999999997 sec
UJson time: 0.0030970319999999996 sec
Pickle Size Proto 5: 0.02531909942626953 MB
Pickle time Proto 5: 0.003745575000000001 sec
Pickle Size Proto 4: 0.02531909942626953 MB
Pickle time Proto 4: 0.0031195769999999984 sec
Number of elements: 500
------------------------
Json Size: 24.98400592803955 MB
Json time: 1.651241939 sec
UJson time: 0.6243056419999997 sec
Pickle Size Proto 5: 25.035669326782227 MB
Pickle time Proto 5: 0.38723978699999995 sec
Pickle Size Proto 4: 25.035669326782227 MB
Pickle time Proto 4: 0.38475265300000006 sec
Number of elements: 1000
-------------------------
Json Size: 98.13797855377197 MB
Json time: 6.275222810999999 sec
UJson time: 2.5652533450000004 sec
Pickle Size Proto 5: 98.24791717529297 MB
Pickle time Proto 5: 1.3807646590000004 sec
Pickle Size Proto 4: 98.24791717529297 MB
Pickle time Proto 4: 1.3946891620000006 sec
"""
Analysis of performance and memory consumption of pickle vs json
Author: Daniel García (cr0hn)
Last update: 2020/24/04
Python version: 3.8
"""
import sys
import json
import ujson
import pickle
import timeit
from gc import get_referents
from dataclasses import dataclass
from collections import namedtuple
from types import ModuleType, FunctionType
#
# This function was got from: https://stackoverflow.com/q/58675479/8153205
#
# Custom objects know their class.
# Function objects seem to know way too much, including modules.
# Exclude modules as well.
BLACKLIST = type, ModuleType, FunctionType
def getsize(obj):
"""sum size of object & members."""
if isinstance(obj, BLACKLIST):
raise TypeError(
'getsize() does not take argument of type: ' + str(type(obj)))
seen_ids = set()
size = 0
objects = [obj]
while objects:
need_referents = []
for obj in objects:
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids:
seen_ids.add(id(obj))
size += sys.getsizeof(obj)
need_referents.append(obj)
objects = get_referents(*need_referents)
return size
for s in (10, 500, 1000):
print("Number of elements: ", s)
print("-" * (len(str(s)) + len("Number of elements: ")))
print()
info = {
f"key-{x}": f"{'s'* s}" for x in range(s)
}
# add some sub dictionaries
for ss in range(100):
info[f"sub-{ss}"] = {
f"sub-key-{x}": f"{'s' * s}" for x in range(s)
}
json_size = json.dumps(info)
json_time = timeit.timeit(lambda: json.dumps(info), number=10)
ujson_time = timeit.timeit(lambda: ujson.dumps(info), number=10)
pickle_size = pickle.dumps(info, protocol=pickle.HIGHEST_PROTOCOL)
pickle_size_proto_4 = pickle.dumps(info, protocol=4)
pickle_time = timeit.timeit(lambda: pickle.dumps(info, protocol=pickle.HIGHEST_PROTOCOL), number=10)
pickle_time_proto_4 = timeit.timeit(lambda: pickle.dumps(info, protocol=4), number=10)
print("Json Size: ", sys.getsizeof(json_size) / (1024 * 1024), "MB")
print("Json time: ", json_time, "sec")
print("UJson time: ", ujson_time, "sec")
print("Pickle Size Proto 5: ", sys.getsizeof(pickle_size) / (1024 * 1024), "MB")
print("Pickle time Proto 5: ", pickle_time, "sec")
print("Pickle Size Proto 4: ", sys.getsizeof(pickle_size_proto_4) / (1024 * 1024), "MB")
print("Pickle time Proto 4: ", pickle_time_proto_4, "sec")
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment