Last active
May 24, 2016 13:59
-
-
Save hbldh/4b87521f3b24db87e8318a9958473c0a to your computer and use it in GitHub Desktop.
Code for blog post "JSON and the Python dictionary"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
dict_memory_foray | |
~~~~~~~~~~~~~~~~~ | |
:copyright: 2016 by Henrik Blidh <henrik.blidh@nedomkull.com> | |
""" | |
from __future__ import division | |
from __future__ import print_function | |
from __future__ import unicode_literals | |
from __future__ import absolute_import | |
import os | |
import sys | |
import json | |
is_py3 = (sys.version_info[0] > 2) | |
if is_py3: | |
# py3 mappings | |
unicode = str | |
basestring = str | |
xrange = range | |
long = int | |
def calculate_document_size_in_memory(doc): | |
"""A "size in memory" estimator for JSON documents/dicts. | |
:param doc: The document or list of documents to find size of. | |
:type doc: dict or list | |
:return: The size of the input document(s) in bytes. | |
:rtype: int | |
""" | |
size = 0 | |
if isinstance(doc, (list, tuple)): | |
# Add the base size the list or tuple. | |
size += sys.getsizeof(type(doc)()) | |
# Iterate over all elements and sum their sizes. | |
size += int(sum([calculate_document_size_in_memory(d) for d in doc])) | |
elif isinstance(doc, dict): | |
# Add the base size of a dict. | |
size += sys.getsizeof(type(doc)()) | |
for k in doc: | |
# Add size of key. | |
size += calculate_document_size_in_memory(k) | |
# Add size of value of key. | |
size += calculate_document_size_in_memory(doc[k]) | |
elif isinstance(doc, (float, int, long, basestring)): | |
# Base type which can be evaluated with sys.getsizeof. | |
size += sys.getsizeof(doc) | |
elif doc is None: | |
pass | |
else: | |
raise ValueError("Unsizable object: {0}".format(type(doc))) | |
return size | |
doc = """ | |
{ | |
"glossary": { | |
"title": "example glossary", | |
"GlossDiv": { | |
"title": "S", | |
"GlossList": { | |
"GlossEntry": { | |
"ID": "SGML", | |
"SortAs": "SGML", | |
"GlossTerm": "Standard Generalized Markup Language", | |
"Acronym": "SGML", | |
"Abbrev": "ISO 8879:1986", | |
"GlossDef": { | |
"para": "A meta-markup language, used to create markup languages such as DocBook.", | |
"GlossSeeAlso": ["GML", "XML"] | |
}, | |
"GlossSee": "markup" | |
} | |
} | |
} | |
} | |
} | |
""" | |
as_dict = json.loads(doc) | |
as_minified_json = json.dumps(as_dict) | |
d_1 = calculate_document_size_in_memory(as_dict) | |
d_2 = sys.getsizeof(as_minified_json) | |
d_3_tmp_file = '/tmp/d_3_file.json' | |
with open(d_3_tmp_file, 'w') as f: | |
json.dump(as_dict, f) | |
d_3 = os.path.getsize(d_3_tmp_file) | |
os.remove(d_3_tmp_file) | |
print("Size in memory as dict: {0:>6d} B".format(d_1)) | |
print("Size in memory as json.dumps str: {0:>6d} B".format(d_2)) | |
print("Size on disc as json.dump in file: {0:>6d} B".format(d_3)) | |
# ------------------------------- | |
from xmlr import xmlparse | |
filepath = '/home/hbldh/Downloads/google-renewals-all-20080624.xml' | |
doc = xmlparse(filepath) | |
as_minified_json = json.dumps(doc) | |
d_0 = os.path.getsize(filepath) | |
d_1 = calculate_document_size_in_memory(doc) | |
d_2 = sys.getsizeof(as_minified_json) | |
d_3_tmp_file = '/tmp/d_2_file.json' | |
with open(d_3_tmp_file, 'w') as f: | |
json.dump(doc, f) | |
d_3 = os.path.getsize(d_3_tmp_file) | |
os.remove(d_3_tmp_file) | |
print("Size on disc as xml: {0:>10d} B".format(d_0)) | |
print("Size in memory as dict: {0:>10d} B".format(d_1)) | |
print("Size in memory as json.dumps str: {0:>10d} B".format(d_2)) | |
print("Size on disc as json.dump in file : {0:>10d} B".format(d_3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment