Skip to content

Instantly share code, notes, and snippets.

@ppwwyyxx
Created April 27, 2019 01:04
Show Gist options
  • Save ppwwyyxx/9d9ace8abc243f0e3b56c179ba381db1 to your computer and use it in GitHub Desktop.
Save ppwwyyxx/9d9ace8abc243f0e3b56c179ba381db1 to your computer and use it in GitHub Desktop.
Serialization Benchmark
"""
Dependencies:
pip install \
tabulate ujson msgpack msgpack_numpy numpy pyarrow
"""
import sys
from timeit import timeit
import pickle
from tabulate import tabulate
def get_tests(is_numpy):
tests = [
# (title, setup, enc_test, dec_test)
('msgpack-python', 'import msgpack; import msgpack_numpy as m; m.patch(); src = msgpack.dumps(d)', 'msgpack.dumps(d)', 'msgpack.loads(src)'),
('pyarrow', 'import pyarrow as pa; src = pa.serialize(d).to_buffer()', 'pa.serialize(d).to_buffer()', 'pa.deserialize(src)'),
]
for k in range(2, min(pickle.HIGHEST_PROTOCOL, 3) + 1):
if sys.version_info.major == 3:
setup_pickle = 'import pickle ; src = pickle.dumps(d, {})'.format(k)
else:
setup_pickle = 'import cPickle as pickle; src = pickle.dumps(d, {})'.format(k)
tests.append(('pickle-protocol{}'.format(k), setup_pickle, 'pickle.dumps(d, {})'.format(k), 'pickle.loads(src)'))
if not is_numpy:
tests.extend([
('json', 'import json; src = json.dumps(d)', 'json.dumps(d)', 'json.loads(src)'),
('ujson', 'import ujson; src = ujson.dumps(d)', 'ujson.dumps(d)', 'ujson.loads(src)')
])
return tests
def run_tests(tests, data, loops):
enc_table = []
dec_table = []
print("Running tests (%d loops each)" % loops)
for title, mod, enc, dec in tests:
mod = data + ' ; ' + mod
print("Running " + title)
#print(" [Encode]", enc)
result = timeit(enc, mod, number=loops)
enc_table.append([title, result])
#print(" [Decode]", dec)
result = timeit(dec, mod, number=loops)
dec_table.append([title, result])
enc_table.sort(key=lambda x: x[1])
enc_table.insert(0, ['Method', 'Seconds'])
dec_table.sort(key=lambda x: x[1])
dec_table.insert(0, ['Method', 'Seconds'])
print("\nEncoding Test (%d loops)" % loops)
print(tabulate(enc_table, headers="firstrow"))
print("\nDecoding Test (%d loops)" % loops)
print(tabulate(dec_table, headers="firstrow"))
print("Benchmarking plain data ...............")
plain_data = '''d = {
'words': """
Lorem ipsum dolor sit amet, consectetur adipiscing
elit. Mauris adipiscing adipiscing placerat.
Vestibulum augue augue,
pellentesque quis sollicitudin id, adipiscing.
""" * 100,
'list': list(range(100)) * 300,
'dict': dict((str(i),'a') for i in range(5000)),
'int': 3000,
'float': 100.123456
}'''
run_tests(get_tests(False), plain_data, 1000)
print("Benchmarking numpy data ...............")
numpy_data = """
import numpy as np
d = {
"arrays": [np.random.rand(1000, 353) for k in range(10)]
} """
run_tests(get_tests(True), numpy_data, 300)
@ppwwyyxx
Copy link
Author

Outputs with Python 3.7:

Benchmarking plain data ...............
Running tests (1000 loops each)
Running msgpack-python
Running pyarrow
Running pickle-protocol2
Running pickle-protocol3
Running json
Running ujson

Encoding Test (1000 loops)
Method              Seconds
----------------  ---------
pickle-protocol3   0.608393
pickle-protocol2   0.610825
msgpack-python     1.07399
ujson              1.09344
pyarrow            2.66014
json               2.68677

Decoding Test (1000 loops)
Method              Seconds
----------------  ---------
msgpack-python     0.452591
pickle-protocol2   0.722493
pickle-protocol3   0.723882
pyarrow            0.812569
ujson              0.97694
json               2.04448
Benchmarking numpy data ...............
Running tests (300 loops each)
Running msgpack-python
Running pyarrow
Running pickle-protocol2
Running pickle-protocol3

Encoding Test (300 loops)
Method              Seconds
----------------  ---------
pyarrow             3.3704
pickle-protocol3    6.12827
msgpack-python      6.18487
pickle-protocol2   45.2517

Decoding Test (300 loops)
Method               Seconds
----------------  ----------
pyarrow            0.0100455
pickle-protocol3   1.04249
msgpack-python     1.05066
pickle-protocol2  40.2511

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment