Skip to content

Instantly share code, notes, and snippets.

@Erotemic
Created October 13, 2020 23:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Erotemic/3f017de31529dc64c1a54948f37da1d5 to your computer and use it in GitHub Desktop.
Save Erotemic/3f017de31529dc64c1a54948f37da1d5 to your computer and use it in GitHub Desktop.
"""
Experiment Script Related to Pytorch Memory Leak Issue
References:
https://github.com/pytorch/pytorch/issues/13246
https://gist.github.com/mprostock/2850f3cd465155689052f0fa3a177a50
"""
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
import psutil
import ubelt as ub
import sys
class DataIter(Dataset):
def __init__(self, storage_mode='numpy', return_mode='tensor', total=24e7):
self.return_mode = return_mode
assert self.return_mode in {'tensor', 'dict', 'tuple', 'list'}
if storage_mode == 'numpy':
self.data = np.array([x for x in range(int(total))])
elif storage_mode == 'python':
self.data = [x for x in range(int(total))]
else:
raise KeyError(storage_mode)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
data = self.data[idx]
data = np.array([data], dtype=np.int64)
data_pt = torch.tensor(data)
if self.return_mode == 'tensor':
item = data_pt
elif self.return_mode == 'dict':
item = {
'data': data_pt
}
elif self.return_mode == 'tuple':
item = (data_pt,)
elif self.return_mode == 'list':
item = [data_pt]
return item
def getsize(obj):
"""
sum size of object & members.
https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python
"""
import sys
from types import ModuleType, FunctionType
from gc import get_referents
# Custom objects know their class.
# Function objects seem to know way too much, including modules.
# Exclude modules as well.
blocklist = (type, ModuleType, FunctionType)
if isinstance(obj, blocklist):
raise TypeError('getsize() does not take argument of type: ' + str(type(obj)))
seen_ids = set()
size = 0
objects = [obj]
while objects:
need_referents = []
for obj in objects:
if not isinstance(obj, blocklist) and id(obj) not in seen_ids:
seen_ids.add(id(obj))
size += sys.getsizeof(obj)
need_referents.append(obj)
objects = get_referents(*need_referents)
return size
def byte_str(num, unit='auto', precision=2):
"""
Automatically chooses relevant unit (KB, MB, or GB) for displaying some
number of bytes.
Args:
num (int): number of bytes
unit (str): which unit to use, can be auto, B, KB, MB, GB, TB, PB, EB,
ZB, or YB.
References:
https://en.wikipedia.org/wiki/Orders_of_magnitude_(data)
Returns:
str: string representing the number of bytes with appropriate units
Example:
>>> num_list = [1, 100, 1024, 1048576, 1073741824, 1099511627776]
>>> result = ub.repr2(list(map(byte_str, num_list)), nl=0)
>>> print(result)
['0.00 KB', '0.10 KB', '1.00 KB', '1.00 MB', '1.00 GB', '1.00 TB']
"""
abs_num = abs(num)
if unit == 'auto':
if abs_num < 2.0 ** 10:
unit = 'KB'
elif abs_num < 2.0 ** 20:
unit = 'KB'
elif abs_num < 2.0 ** 30:
unit = 'MB'
elif abs_num < 2.0 ** 40:
unit = 'GB'
elif abs_num < 2.0 ** 50:
unit = 'TB'
elif abs_num < 2.0 ** 60:
unit = 'PB'
elif abs_num < 2.0 ** 70:
unit = 'EB'
elif abs_num < 2.0 ** 80:
unit = 'ZB'
else:
unit = 'YB'
if unit.lower().startswith('b'):
num_unit = num
elif unit.lower().startswith('k'):
num_unit = num / (2.0 ** 10)
elif unit.lower().startswith('m'):
num_unit = num / (2.0 ** 20)
elif unit.lower().startswith('g'):
num_unit = num / (2.0 ** 30)
elif unit.lower().startswith('t'):
num_unit = num / (2.0 ** 40)
elif unit.lower().startswith('p'):
num_unit = num / (2.0 ** 50)
elif unit.lower().startswith('e'):
num_unit = num / (2.0 ** 60)
elif unit.lower().startswith('z'):
num_unit = num / (2.0 ** 70)
elif unit.lower().startswith('y'):
num_unit = num / (2.0 ** 80)
else:
raise ValueError('unknown num={!r} unit={!r}'.format(num, unit))
return ub.repr2(num_unit, precision=precision) + ' ' + unit
def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=False):
"""
Args:
storage_mode : how the dataset is stored in backend datasets
return_mode : how each data item is returned
total : size of backend storage
"""
mem = psutil.virtual_memory()
start_mem = mem.used
mem_str = byte_str(start_mem)
print('Starting memory = {!r}'.format(mem_str))
train_data = DataIter(
storage_mode=storage_mode,
return_mode=return_mode,
total=total)
# self = train_data
if storage_mode == 'numpy':
total_storate_bytes = train_data.data.dtype.itemsize * train_data.data.size
else:
total_storate_bytes = sys.getsizeof(train_data.data)
# total_storate_bytes = getsize(self.data)
print('total_storage_size = {!r}'.format(byte_str(total_storate_bytes)))
mem = psutil.virtual_memory()
mem_str = byte_str(mem.used - start_mem)
print('After init DataIter memory = {!r}'.format(mem_str))
print('shuffle = {!r}'.format(shuffle))
train_loader = DataLoader(train_data, batch_size=300,
shuffle=shuffle,
drop_last=True,
pin_memory=False,
num_workers=18)
mem = psutil.virtual_memory()
mem_str = byte_str(mem.used - start_mem)
print('After init DataLoader memory = {!r}'.format(mem_str))
mem_usage = []
prog = ub.ProgIter(train_loader)
for i, item in enumerate(prog):
mem = psutil.virtual_memory()
mem_str = byte_str(mem.used - start_mem)
prog.set_extra(' Mem=' + mem_str)
mem_usage.append(mem.used - start_mem)
if __name__ == '__main__':
"""
CommandLine:
python debug_memory.py numpy tensor --total=24e5 --shuffle=True
python debug_memory.py --storage_mode=numpy --total=24e5 --shuffle=True
python debug_memory.py --storage_mode=numpy --total=24e5 --shuffle=False
python debug_memory.py --storage_mode=python --total=24e5 --shuffle=True
python debug_memory.py --storage_mode=python --total=24e5 --shuffle=False
python debug_memory.py numpy dict 24e5
python debug_memory.py python dict 24e7
Conclusions:
* It seems like it is ok if the return type is a dictionary
the problem seems to be localized to the storage type.
"""
import fire
fire.Fire(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment