Skip to content

Instantly share code, notes, and snippets.

@CTimmerman
Forked from marians/bench.py
Last active March 6, 2024 14:57
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CTimmerman/1f328f02ac2740f4c90d to your computer and use it in GitHub Desktop.
Save CTimmerman/1f328f02ac2740f4c90d to your computer and use it in GitHub Desktop.
Python serialization benchmark
"""
Python serialization benchmark
Based on https://gist.github.com/marians/f1314446b8bf4d34e782
2014-11-11 v1.0
2014-11-12 v1.1 Added compression and output size.
"""
try: import cPickle
except: import pickle as cPickle # don't break in Python 3
import json, marshal, pickle, random
from hashlib import md5
from time import time
from zlib import compress, decompress # halves json float size (19 kB to 10 kB)
#import ujson # from http://www.lfd.uci.edu/~gohlke/pythonlibs/#ujson
# 2 to 3 times slower and up to 50% larger output than marshal.
data_size = 100000
test_runs = 10
if __name__ == "__main__":
payload = {
"float": [(random.randrange(0, 99) + random.random()) for i in range(data_size)],
"int": [random.randrange(0, 9999) for i in range(data_size)],
"str": [md5(str(random.random()).encode('utf8')).hexdigest() for i in range(data_size)]
}
modules = [json, pickle, cPickle, marshal]
print("%s times %s elements:" % (test_runs, data_size))
for payload_type in payload:
data = payload[payload_type]
for module in modules:
dumps_binary = module.__name__ in ['pickle', 'cPickle', 'marshal']
dumps = module.dumps
loads = module.loads
start = time()
if module.__name__ in ['pickle', 'cPickle']:
for i in range(test_runs): serialized = compress(dumps(data, protocol=-1)) # -1 = highest available (2 in Python 2; 3 in Python 3.4 (2x speed))
elif dumps_binary:
#print("%s kB" % (len(module.dumps(data))/1000))
for i in range(test_runs): serialized = compress(dumps(data))
else:
for i in range(test_runs): serialized = compress(dumps(data).encode('utf8'))
w = time() - start
start = time()
if dumps_binary:
for i in range(test_runs): unserialized = loads(decompress(serialized))
else:
for i in range(test_runs): unserialized = loads(decompress(serialized).decode('utf8'))
r = time() - start
print("%s\t%s\t%6.3f sW %6.3f sR %10.0f kB" % (module.__name__, payload_type, w, r, len(serialized)/1e3))
32-bit results in 64-bit Windows 7 Enterprise on Xeon W3540 @ 2.93 GHz 2.93 GHz (Dell Precision T3500):
C:\Python27\python.exe -u "serialization_benchmark.py"
1000 times 1000 elements:
json int W 0.125 R 0.156
pickle int W 2.808 R 1.139
cPickle int W 0.047 R 0.046
marshal int W 0.016 R 0.031
json float W 1.981 R 0.624
pickle float W 2.607 R 1.092
cPickle float W 0.063 R 0.062
marshal float W 0.047 R 0.031
json str W 0.172 R 0.437
pickle str W 5.149 R 2.309
cPickle str W 0.281 R 0.156
marshal str W 0.109 R 0.047
C:\pypy-1.6\pypy-c -u "serialization_benchmark.py"
1000 times 1000 elements:
json int W 0.515 R 0.452
pickle int W 0.546 R 0.219
cPickle int W 0.577 R 0.171
marshal int W 0.032 R 0.031
json float W 2.390 R 1.341
pickle float W 0.656 R 0.436
cPickle float W 0.593 R 0.406
marshal float W 0.327 R 0.203
json str W 1.141 R 1.186
pickle str W 0.702 R 0.546
cPickle str W 0.828 R 0.562
marshal str W 0.265 R 0.078
c:\Python34\python -u "serialization_benchmark.py"
1000 times 1000 elements:
json int W 0.203 R 0.140 6 kB
pickle int W 0.047 R 0.062 3 kB
pickle int W 0.031 R 0.062 3 kB
marshal int W 0.031 R 0.047 5 kB
json float W 1.935 R 0.749 19 kB
pickle float W 0.047 R 0.062 9 kB
pickle float W 0.047 R 0.062 9 kB
marshal float W 0.047 R 0.047 9 kB
json str W 0.281 R 0.187 36 kB
pickle str W 0.125 R 0.140 35 kB
pickle str W 0.125 R 0.140 35 kB
marshal str W 0.094 R 0.078 34 kB
v1.1 zlib:
c:\Python34\python -u "serialization_benchmark.py"
1000 times 1000 elements:
json int 0.686 sW 0.187 sR 3 kB
pickle int 0.187 sW 0.078 sR 2 kB
pickle int 0.187 sW 0.094 sR 2 kB
marshal int 0.702 sW 0.094 sR 3 kB
json float 3.325 sW 0.874 sR 10 kB
pickle float 0.546 sW 0.140 sR 8 kB
pickle float 0.546 sW 0.125 sR 8 kB
marshal float 0.468 sW 0.109 sR 8 kB
json str 2.590 sW 0.608 sR 20 kB
pickle str 2.496 sW 0.452 sR 20 kB
pickle str 2.418 sW 0.421 sR 20 kB
marshal str 1.981 sW 0.374 sR 20 kB
v1.1 bz2:
c:\Python34\python -u "serialization_benchmark.py"
1000 times 1000 elements:
json int 1.188 sW 0.446 sR 2 kB
pickle int 1.263 sW 0.317 sR 2 kB
pickle int 1.295 sW 0.271 sR 2 kB
marshal int 1.627 sW 0.270 sR 2 kB
json float 4.828 sW 1.706 sR 8 kB
pickle float 2.737 sW 0.749 sR 8 kB
pickle float 2.699 sW 0.718 sR 8 kB
marshal float 2.590 sW 0.733 sR 8 kB
json str 5.179 sW 1.778 sR 17 kB
pickle str 4.883 sW 1.888 sR 17 kB
pickle str 4.849 sW 1.857 sR 17 kB
marshal str 4.782 sW 1.741 sR 17 kB
c:\Python34\python -u "serialization_benchmark_nocompress.py"
10 times 100000 elements:
json int 0.187 sW 0.140 sR 589 kB
pickle int 0.031 sW 0.062 sR 298 kB
pickle int 0.031 sW 0.047 sR 298 kB
marshal int 0.047 sW 0.047 sR 500 kB
json float 1.778 sW 0.686 sR 1919 kB
pickle float 0.062 sW 0.047 sR 900 kB
pickle float 0.047 sW 0.062 sR 900 kB
marshal float 0.047 sW 0.031 sR 900 kB
json str 0.312 sW 0.203 sR 3600 kB
pickle str 0.250 sW 0.156 sR 3501 kB
pickle str 0.250 sW 0.156 sR 3501 kB
marshal str 0.125 sW 0.094 sR 3400 kB
2.7 zlib:
C:\Python27\python.exe -u "serialization_benchmark.py"
10 times 100000 elements:
json int 1.108 sW 0.814 sR 226 kB
pickle int 3.011 sW 1.186 sR 216 kB
cPickle int 0.265 sW 0.094 sR 216 kB
marshal int 1.154 sW 0.078 sR 239 kB
json float 4.509 sW 1.030 sR 906 kB
pickle float 4.025 sW 1.185 sR 797 kB
cPickle float 1.498 sW 0.156 sR 797 kB
marshal float 1.108 sW 0.125 sR 793 kB
json str 3.276 sW 0.780 sR 1982 kB
pickle str 9.034 sW 2.981 sR 2259 kB
cPickle str 4.135 sW 0.686 sR 2259 kB
marshal str 2.980 sW 0.421 sR 1989 kB
zlib:
c:\Python34\python -u "serialization_benchmark.py"
10 times 100000 elements:
json int 1.123 sW 0.218 sR 226 kB
pickle int 0.265 sW 0.094 sR 216 kB
pickle int 0.281 sW 0.078 sR 216 kB
marshal int 1.170 sW 0.078 sR 242 kB
json float 4.227 sW 0.842 sR 906 kB
pickle float 1.529 sW 0.125 sR 797 kB
pickle float 1.529 sW 0.125 sR 797 kB
marshal float 1.154 sW 0.109 sR 794 kB
json str 3.182 sW 0.468 sR 1983 kB
pickle str 3.026 sW 0.452 sR 1966 kB
pickle str 3.074 sW 0.437 sR 1966 kB
marshal str 2.637 sW 0.343 sR 1933 kB
bz2:
c:\Python34\python -u "serialization_benchmark.py"
10 times 100000 elements:
json int 1.061 sW 0.655 sR 177 kB
pickle int 0.499 sW 0.218 sR 171 kB
pickle int 0.499 sW 0.234 sR 171 kB
marshal int 0.608 sW 0.374 sR 169 kB
json float 4.415 sW 2.683 sR 769 kB
pickle float 1.981 sW 1.170 sR 758 kB
pickle float 1.622 sW 0.920 sR 758 kB
marshal float 1.982 sW 1.185 sR 770 kB
json str 5.647 sW 3.931 sR 1711 kB
pickle str 5.381 sW 3.759 sR 1712 kB
pickle str 5.335 sW 3.806 sR 1712 kB
marshal str 5.179 sW 3.650 sR 1711 kB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment