Skip to content

Instantly share code, notes, and snippets.

@ambroff
Created June 30, 2012 06:32
Show Gist options
  • Save ambroff/3022655 to your computer and use it in GitHub Desktop.
Save ambroff/3022655 to your computer and use it in GitHub Desktop.
comparing JSON and Thrift serialization speed / data size
import jsonlib
import random
import timeit
import lz4
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
from thrift.protocol.TCompactProtocol import TCompactProtocol
from thrift.transport.TTransport import TMemoryBuffer
from example.ttypes import Item
from example.ttypes import ItemSet
def thrift_obj_to_dict(obj):
d = {}
for field_spec in obj.thrift_spec:
if field_spec:
field_name = field_spec[2]
field_value = getattr(obj, field_name, None)
if not field_value is None:
d[field_name] = field_value
return d
item_list = []
dict_list = []
#for i in xrange(1000, 5000):
for i in xrange(1000, 2000):
new_item = Item(
id=i, timestamp=i+1000, object_id=i+2000,
type=random.choice(['foo', 'bar']), recipient_id=i+3000,
sender_id=i+4000, metadata={str(i): str(i + 9)})
item_list.append(new_item)
dict_list.append(thrift_obj_to_dict(new_item))
TEST_OBJECT = ItemSet(new_events=item_list)
TEST_DICT = {'new_events': dict_list}
def test_thrift(compact=False):
transport_out = TMemoryBuffer()
if compact:
protocol_out = TCompactProtocol(transport_out)
else:
protocol_out = TBinaryProtocol(transport_out)
TEST_OBJECT.write(protocol_out)
# the string 'bytes' can be written out to disk
# to be read in at a different time
bytes = transport_out.getvalue()
transport_in = TMemoryBuffer(bytes)
if compact:
protocol_in = TCompactProtocol(transport_in)
else:
protocol_in = TBinaryProtocol(transport_in)
new_set = ItemSet()
new_set.read(protocol_in)
return len(bytes)
#ITERATIONS=1000000
ITERATIONS=100
def test_json(compress=False):
data = jsonlib.write(TEST_DICT)
if compress:
data = lz4.compress(data)
jsonlib.read(lz4.uncompress(data))
else:
jsonlib.read(data)
return len(data)
print '==', ITERATIONS, \
'iterations serializing and deserializing a large object', '=='
print 'THRIFT:'
print ' - ', \
timeit.timeit(
'test_thrift()', 'from __main__ import test_thrift', number=ITERATIONS), \
'seconds'
print ' - ', test_thrift(), 'bytes'
print 'COMPACT THRIFT:'
print ' - ', \
timeit.timeit(
'test_thrift(True)', 'from __main__ import test_thrift',
number=ITERATIONS), 'seconds'
print ' - ', test_thrift(True), 'bytes'
print 'JSON:'
print ' - ', \
timeit.timeit(
'test_json()', 'from __main__ import test_json', number=ITERATIONS), \
'seconds'
print ' - ', test_json(), 'bytes'
print 'LZ4 COMPRESSED JSON:'
print ' - ', \
timeit.timeit(
'test_json(True)', 'from __main__ import test_json', number=ITERATIONS), \
'seconds'
print ' - ', test_json(True), 'bytes'
namespace py example
struct Item {
/* The ID of this event. */
1: required i64 id,
/* When this event occurred UTC. */
2: required i64 timestamp,
/* The ID of the object related to this event. */
3: required i64 object_id,
/* Type of this event or object. */
4: required string type,
/* The account_id for the user receiving this event. */
5: required i64 recipient_id,
/* ID of the user that triggered this event. */
6: required i64 sender_id,
/* Additional metadata that you may want to attatch to this item */
7: optional map<string, string> metadata
}
struct ItemSet {
1: required list<Item> new_events
}
== 100 iterations serializing and deserializing a large object ==
THRIFT:
- 27.6038601398 seconds
- 91009 bytes
COMPACT THRIFT:
- 44.0193760395 seconds
- 34005 bytes
JSON:
- 1.46023511887 seconds
- 123016 bytes
LZ4 COMPRESSED JSON:
- 1.53419780731 seconds
- 34556 bytes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment