Skip to content

Instantly share code, notes, and snippets.

@hbldh
Last active May 24, 2016 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hbldh/4b87521f3b24db87e8318a9958473c0a to your computer and use it in GitHub Desktop.
Save hbldh/4b87521f3b24db87e8318a9958473c0a to your computer and use it in GitHub Desktop.
Code for blog post "JSON and the Python dictionary"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
dict_memory_foray
~~~~~~~~~~~~~~~~~
:copyright: 2016 by Henrik Blidh <henrik.blidh@nedomkull.com>
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
import os
import sys
import json
is_py3 = (sys.version_info[0] > 2)
if is_py3:
# py3 mappings
unicode = str
basestring = str
xrange = range
long = int
def calculate_document_size_in_memory(doc):
"""A "size in memory" estimator for JSON documents/dicts.
:param doc: The document or list of documents to find size of.
:type doc: dict or list
:return: The size of the input document(s) in bytes.
:rtype: int
"""
size = 0
if isinstance(doc, (list, tuple)):
# Add the base size the list or tuple.
size += sys.getsizeof(type(doc)())
# Iterate over all elements and sum their sizes.
size += int(sum([calculate_document_size_in_memory(d) for d in doc]))
elif isinstance(doc, dict):
# Add the base size of a dict.
size += sys.getsizeof(type(doc)())
for k in doc:
# Add size of key.
size += calculate_document_size_in_memory(k)
# Add size of value of key.
size += calculate_document_size_in_memory(doc[k])
elif isinstance(doc, (float, int, long, basestring)):
# Base type which can be evaluated with sys.getsizeof.
size += sys.getsizeof(doc)
elif doc is None:
pass
else:
raise ValueError("Unsizable object: {0}".format(type(doc)))
return size
doc = """
{
"glossary": {
"title": "example glossary",
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language, used to create markup languages such as DocBook.",
"GlossSeeAlso": ["GML", "XML"]
},
"GlossSee": "markup"
}
}
}
}
}
"""
as_dict = json.loads(doc)
as_minified_json = json.dumps(as_dict)
d_1 = calculate_document_size_in_memory(as_dict)
d_2 = sys.getsizeof(as_minified_json)
d_3_tmp_file = '/tmp/d_3_file.json'
with open(d_3_tmp_file, 'w') as f:
json.dump(as_dict, f)
d_3 = os.path.getsize(d_3_tmp_file)
os.remove(d_3_tmp_file)
print("Size in memory as dict: {0:>6d} B".format(d_1))
print("Size in memory as json.dumps str: {0:>6d} B".format(d_2))
print("Size on disc as json.dump in file: {0:>6d} B".format(d_3))
# -------------------------------
from xmlr import xmlparse
filepath = '/home/hbldh/Downloads/google-renewals-all-20080624.xml'
doc = xmlparse(filepath)
as_minified_json = json.dumps(doc)
d_0 = os.path.getsize(filepath)
d_1 = calculate_document_size_in_memory(doc)
d_2 = sys.getsizeof(as_minified_json)
d_3_tmp_file = '/tmp/d_2_file.json'
with open(d_3_tmp_file, 'w') as f:
json.dump(doc, f)
d_3 = os.path.getsize(d_3_tmp_file)
os.remove(d_3_tmp_file)
print("Size on disc as xml: {0:>10d} B".format(d_0))
print("Size in memory as dict: {0:>10d} B".format(d_1))
print("Size in memory as json.dumps str: {0:>10d} B".format(d_2))
print("Size on disc as json.dump in file : {0:>10d} B".format(d_3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment