-
-
Save CTimmerman/1f328f02ac2740f4c90d to your computer and use it in GitHub Desktop.
Python serialization benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Python serialization benchmark | |
Based on https://gist.github.com/marians/f1314446b8bf4d34e782 | |
2014-11-11 v1.0 | |
2014-11-12 v1.1 Added compression and output size. | |
""" | |
try: import cPickle | |
except: import pickle as cPickle # don't break in Python 3 | |
import json, marshal, pickle, random | |
from hashlib import md5 | |
from time import time | |
from zlib import compress, decompress # halves json float size (19 kB to 10 kB) | |
#import ujson # from http://www.lfd.uci.edu/~gohlke/pythonlibs/#ujson | |
# 2 to 3 times slower and up to 50% larger output than marshal. | |
data_size = 100000 | |
test_runs = 10 | |
if __name__ == "__main__": | |
payload = { | |
"float": [(random.randrange(0, 99) + random.random()) for i in range(data_size)], | |
"int": [random.randrange(0, 9999) for i in range(data_size)], | |
"str": [md5(str(random.random()).encode('utf8')).hexdigest() for i in range(data_size)] | |
} | |
modules = [json, pickle, cPickle, marshal] | |
print("%s times %s elements:" % (test_runs, data_size)) | |
for payload_type in payload: | |
data = payload[payload_type] | |
for module in modules: | |
dumps_binary = module.__name__ in ['pickle', 'cPickle', 'marshal'] | |
dumps = module.dumps | |
loads = module.loads | |
start = time() | |
if module.__name__ in ['pickle', 'cPickle']: | |
for i in range(test_runs): serialized = compress(dumps(data, protocol=-1)) # -1 = highest available (2 in Python 2; 3 in Python 3.4 (2x speed)) | |
elif dumps_binary: | |
#print("%s kB" % (len(module.dumps(data))/1000)) | |
for i in range(test_runs): serialized = compress(dumps(data)) | |
else: | |
for i in range(test_runs): serialized = compress(dumps(data).encode('utf8')) | |
w = time() - start | |
start = time() | |
if dumps_binary: | |
for i in range(test_runs): unserialized = loads(decompress(serialized)) | |
else: | |
for i in range(test_runs): unserialized = loads(decompress(serialized).decode('utf8')) | |
r = time() - start | |
print("%s\t%s\t%6.3f sW %6.3f sR %10.0f kB" % (module.__name__, payload_type, w, r, len(serialized)/1e3)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32-bit results in 64-bit Windows 7 Enterprise on Xeon W3540 @ 2.93 GHz 2.93 GHz (Dell Precision T3500): | |
C:\Python27\python.exe -u "serialization_benchmark.py" | |
1000 times 1000 elements: | |
json int W 0.125 R 0.156 | |
pickle int W 2.808 R 1.139 | |
cPickle int W 0.047 R 0.046 | |
marshal int W 0.016 R 0.031 | |
json float W 1.981 R 0.624 | |
pickle float W 2.607 R 1.092 | |
cPickle float W 0.063 R 0.062 | |
marshal float W 0.047 R 0.031 | |
json str W 0.172 R 0.437 | |
pickle str W 5.149 R 2.309 | |
cPickle str W 0.281 R 0.156 | |
marshal str W 0.109 R 0.047 | |
C:\pypy-1.6\pypy-c -u "serialization_benchmark.py" | |
1000 times 1000 elements: | |
json int W 0.515 R 0.452 | |
pickle int W 0.546 R 0.219 | |
cPickle int W 0.577 R 0.171 | |
marshal int W 0.032 R 0.031 | |
json float W 2.390 R 1.341 | |
pickle float W 0.656 R 0.436 | |
cPickle float W 0.593 R 0.406 | |
marshal float W 0.327 R 0.203 | |
json str W 1.141 R 1.186 | |
pickle str W 0.702 R 0.546 | |
cPickle str W 0.828 R 0.562 | |
marshal str W 0.265 R 0.078 | |
c:\Python34\python -u "serialization_benchmark.py" | |
1000 times 1000 elements: | |
json int W 0.203 R 0.140 6 kB | |
pickle int W 0.047 R 0.062 3 kB | |
pickle int W 0.031 R 0.062 3 kB | |
marshal int W 0.031 R 0.047 5 kB | |
json float W 1.935 R 0.749 19 kB | |
pickle float W 0.047 R 0.062 9 kB | |
pickle float W 0.047 R 0.062 9 kB | |
marshal float W 0.047 R 0.047 9 kB | |
json str W 0.281 R 0.187 36 kB | |
pickle str W 0.125 R 0.140 35 kB | |
pickle str W 0.125 R 0.140 35 kB | |
marshal str W 0.094 R 0.078 34 kB | |
v1.1 zlib: | |
c:\Python34\python -u "serialization_benchmark.py" | |
1000 times 1000 elements: | |
json int 0.686 sW 0.187 sR 3 kB | |
pickle int 0.187 sW 0.078 sR 2 kB | |
pickle int 0.187 sW 0.094 sR 2 kB | |
marshal int 0.702 sW 0.094 sR 3 kB | |
json float 3.325 sW 0.874 sR 10 kB | |
pickle float 0.546 sW 0.140 sR 8 kB | |
pickle float 0.546 sW 0.125 sR 8 kB | |
marshal float 0.468 sW 0.109 sR 8 kB | |
json str 2.590 sW 0.608 sR 20 kB | |
pickle str 2.496 sW 0.452 sR 20 kB | |
pickle str 2.418 sW 0.421 sR 20 kB | |
marshal str 1.981 sW 0.374 sR 20 kB | |
v1.1 bz2: | |
c:\Python34\python -u "serialization_benchmark.py" | |
1000 times 1000 elements: | |
json int 1.188 sW 0.446 sR 2 kB | |
pickle int 1.263 sW 0.317 sR 2 kB | |
pickle int 1.295 sW 0.271 sR 2 kB | |
marshal int 1.627 sW 0.270 sR 2 kB | |
json float 4.828 sW 1.706 sR 8 kB | |
pickle float 2.737 sW 0.749 sR 8 kB | |
pickle float 2.699 sW 0.718 sR 8 kB | |
marshal float 2.590 sW 0.733 sR 8 kB | |
json str 5.179 sW 1.778 sR 17 kB | |
pickle str 4.883 sW 1.888 sR 17 kB | |
pickle str 4.849 sW 1.857 sR 17 kB | |
marshal str 4.782 sW 1.741 sR 17 kB | |
c:\Python34\python -u "serialization_benchmark_nocompress.py" | |
10 times 100000 elements: | |
json int 0.187 sW 0.140 sR 589 kB | |
pickle int 0.031 sW 0.062 sR 298 kB | |
pickle int 0.031 sW 0.047 sR 298 kB | |
marshal int 0.047 sW 0.047 sR 500 kB | |
json float 1.778 sW 0.686 sR 1919 kB | |
pickle float 0.062 sW 0.047 sR 900 kB | |
pickle float 0.047 sW 0.062 sR 900 kB | |
marshal float 0.047 sW 0.031 sR 900 kB | |
json str 0.312 sW 0.203 sR 3600 kB | |
pickle str 0.250 sW 0.156 sR 3501 kB | |
pickle str 0.250 sW 0.156 sR 3501 kB | |
marshal str 0.125 sW 0.094 sR 3400 kB | |
2.7 zlib: | |
C:\Python27\python.exe -u "serialization_benchmark.py" | |
10 times 100000 elements: | |
json int 1.108 sW 0.814 sR 226 kB | |
pickle int 3.011 sW 1.186 sR 216 kB | |
cPickle int 0.265 sW 0.094 sR 216 kB | |
marshal int 1.154 sW 0.078 sR 239 kB | |
json float 4.509 sW 1.030 sR 906 kB | |
pickle float 4.025 sW 1.185 sR 797 kB | |
cPickle float 1.498 sW 0.156 sR 797 kB | |
marshal float 1.108 sW 0.125 sR 793 kB | |
json str 3.276 sW 0.780 sR 1982 kB | |
pickle str 9.034 sW 2.981 sR 2259 kB | |
cPickle str 4.135 sW 0.686 sR 2259 kB | |
marshal str 2.980 sW 0.421 sR 1989 kB | |
zlib: | |
c:\Python34\python -u "serialization_benchmark.py" | |
10 times 100000 elements: | |
json int 1.123 sW 0.218 sR 226 kB | |
pickle int 0.265 sW 0.094 sR 216 kB | |
pickle int 0.281 sW 0.078 sR 216 kB | |
marshal int 1.170 sW 0.078 sR 242 kB | |
json float 4.227 sW 0.842 sR 906 kB | |
pickle float 1.529 sW 0.125 sR 797 kB | |
pickle float 1.529 sW 0.125 sR 797 kB | |
marshal float 1.154 sW 0.109 sR 794 kB | |
json str 3.182 sW 0.468 sR 1983 kB | |
pickle str 3.026 sW 0.452 sR 1966 kB | |
pickle str 3.074 sW 0.437 sR 1966 kB | |
marshal str 2.637 sW 0.343 sR 1933 kB | |
bz2: | |
c:\Python34\python -u "serialization_benchmark.py" | |
10 times 100000 elements: | |
json int 1.061 sW 0.655 sR 177 kB | |
pickle int 0.499 sW 0.218 sR 171 kB | |
pickle int 0.499 sW 0.234 sR 171 kB | |
marshal int 0.608 sW 0.374 sR 169 kB | |
json float 4.415 sW 2.683 sR 769 kB | |
pickle float 1.981 sW 1.170 sR 758 kB | |
pickle float 1.622 sW 0.920 sR 758 kB | |
marshal float 1.982 sW 1.185 sR 770 kB | |
json str 5.647 sW 3.931 sR 1711 kB | |
pickle str 5.381 sW 3.759 sR 1712 kB | |
pickle str 5.335 sW 3.806 sR 1712 kB | |
marshal str 5.179 sW 3.650 sR 1711 kB |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment