Skip to content

Instantly share code, notes, and snippets.

@Suor
Last active Oct 30, 2021
Embed
What would you like to do?
Different way to calculate struct size
import gc
import sys
import types
from collection import deque
import random
def getsize(obj):
BLACKLIST = type, types.ModuleType, types.FunctionType
if isinstance(obj, BLACKLIST):
raise TypeError("getsize() does not take argument of type: " + str(type(obj)))
seen_ids, size, objects = set(), 0, [obj]
while objects:
need_referents = []
for obj in objects:
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids:
seen_ids.add(id(obj))
size += sys.getsizeof(obj)
need_referents.append(obj)
objects = gc.get_referents(*need_referents)
return size
def json_size_d(obj):
size, todo = 0, deque([obj])
while todo:
obj = todo.pop()
if isinstance(obj, (bool, int, float)):
size += len(str(obj))
elif isinstance(obj, str):
size += len(obj) + 2
elif isinstance(obj, (list, tuple, set)):
size += len(obj) * 2
todo.extend(obj)
elif isinstance(obj, dict):
size += len(obj) * 6 + sum(len(str(k)) for k in obj)
todo.extend(obj.values())
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
return size
def json_size_dd(obj, cache=None):
"""A slower version of len(json.dumps(obj)), which doesn't require much extra memory"""
literals = (bool, int, float, type(None), datetime)
size, todo = 0, [(set(), [obj])]
while todo:
path, objects = todo.pop()
for obj in objects:
if isinstance(obj, literals):
size += len(str(obj))
elif isinstance(obj, str):
size += len(obj) + 2
else:
obj_id = id(obj)
if obj_id in path:
raise ValueError("Circular reference found")
elif cache and obj_id in cache:
size += cache[obj_id]
elif isinstance(obj, (list, tuple, set)):
size += len(obj) * 2
todo.append((path | {obj_id}, obj))
elif isinstance(obj, dict):
size += len(obj) * 6 + sum(len(str(k)) for k in obj)
todo.append((path | {obj_id}, obj.values()))
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
return size
def json_size(obj):
if isinstance(obj, (bool, int, float)):
return len(str(obj))
elif isinstance(obj, str):
return len(obj) + 2
elif isinstance(obj, (list, tuple, set, dict)):
n = len(obj)
if n == 0:
return 2
elif isinstance(obj, dict):
return len(obj) * 6 + sum(len(str(k)) for k in obj) + sum(map(json_size, obj.values()))
else:
return len(obj) * 2 + sum(map(json_size, obj))
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
def json_size_hint(obj, sample_size=10):
if isinstance(obj, (bool, int, float)):
return len(str(obj))
elif isinstance(obj, str):
return len(obj) + 2
elif isinstance(obj, (list, tuple, set, dict)):
n = len(obj)
if n == 0:
return 2
elif isinstance(obj, dict):
sample = obj.items() if n <= sample_size else random.sample(obj.items(), sample_size)
return n * 6 + sum(len(str(k)) + json_size_hint(v) for k, v in sample) * n / len(sample)
else:
sample = obj if n <= sample_size else random.sample(obj, sample_size)
return n * 2 + sum(map(json_size_hint, sample)) * n / len(sample)
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment