Created
December 15, 2015 00:45
-
-
Save etscrivner/fd314fe39fa0a24b360e to your computer and use it in GitHub Desktop.
Simple JSON Benchmarking For Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
json_bench | |
~~~~~~~~~~ | |
JSON encoding benchmark | |
""" | |
import datetime | |
import itertools | |
import json | |
import random | |
import string | |
import sys | |
import timeit | |
import uuid | |
fixture_data = {} | |
class CustomJSONEncoder(json.JSONEncoder): | |
"""Customized JSON encoder that handles datetimes""" | |
def default(self, obj): | |
if isinstance(obj, datetime.datetime): | |
return obj.isoformat() | |
return super(CustomJSONEncoder, self).default(obj) | |
def random_str(): | |
return ''.join([ | |
random.choice(string.ascii_uppercase + string.digits) | |
for _ in range(100) | |
]) | |
def generate_random_object(): | |
"""Generates a random object for use in serialization. | |
Returns: | |
dict: The random object to be serialized. | |
""" | |
return { | |
'id': str(uuid.uuid4()), | |
'name': random_str(), | |
'b_true': True, | |
'b_false': False, | |
'entry_time': datetime.datetime.utcnow(), | |
'array': [1, 2, 3, 4], | |
'sub_object': { | |
'points': True, | |
'lines': False | |
} | |
} | |
def encode_object(obj): | |
json.dumps(obj, cls=CustomJSONEncoder) | |
def test(): | |
global fixture_data | |
if not fixture_data: | |
fixture_data = {'code': 200, 'data': []} | |
for i in range(100): | |
fixture_data['data'].append(generate_random_object()) | |
encode_object(fixture_data) | |
def total_size(o, handlers={}, verbose=False): | |
""" Returns the approximate memory footprint an object and all of its contents. | |
Automatically finds the contents of the following builtin containers and | |
their subclasses: tuple, list, deque, dict, set and frozenset. | |
To search other containers, add handlers to iterate over their contents: | |
handlers = {SomeContainerClass: iter, | |
OtherContainerClass: OtherContainerClass.get_elements} | |
""" | |
dict_handler = lambda d: itertools.chain.from_iterable(d.items()) | |
all_handlers = { | |
tuple: iter, | |
list: iter, | |
dict: dict_handler, | |
set: iter, | |
frozenset: iter, | |
} | |
all_handlers.update(handlers) # user handlers take precedence | |
seen = set() # track which object id's have already been seen | |
default_size = sys.getsizeof(0) # estimate sizeof object without __sizeof__ | |
def sizeof(o): | |
if id(o) in seen: # do not double count the same object | |
return 0 | |
seen.add(id(o)) | |
s = sys.getsizeof(o, default_size) | |
if verbose: | |
print >>sys.stderr, s, type(o), repr(o) | |
for typ, handler in all_handlers.items(): | |
if isinstance(o, typ): | |
s += sum(map(sizeof, handler(o))) | |
break | |
return s | |
return sizeof(o) | |
if __name__ == '__main__': | |
num_times = 1000 | |
result = timeit.timeit('test()', setup='from __main__ import test', number=num_times) | |
print 'Time: {:0.03f} secs'.format(result) | |
single_object_size = total_size(fixture_data) | |
objects_per_second = num_times / result | |
print '{:0.03f} objects / second'.format(objects_per_second) | |
print '{} bytes / object'.format(single_object_size) | |
print '{:0.03f} bytes / second'.format(objects_per_second * single_object_size) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment