Created
October 13, 2020 23:39
-
-
Save Erotemic/3f017de31529dc64c1a54948f37da1d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Experiment Script Related to Pytorch Memory Leak Issue | |
References: | |
https://github.com/pytorch/pytorch/issues/13246 | |
https://gist.github.com/mprostock/2850f3cd465155689052f0fa3a177a50 | |
""" | |
from torch.utils.data import Dataset, DataLoader | |
import numpy as np | |
import torch | |
import psutil | |
import ubelt as ub | |
import sys | |
class DataIter(Dataset): | |
def __init__(self, storage_mode='numpy', return_mode='tensor', total=24e7): | |
self.return_mode = return_mode | |
assert self.return_mode in {'tensor', 'dict', 'tuple', 'list'} | |
if storage_mode == 'numpy': | |
self.data = np.array([x for x in range(int(total))]) | |
elif storage_mode == 'python': | |
self.data = [x for x in range(int(total))] | |
else: | |
raise KeyError(storage_mode) | |
def __len__(self): | |
return len(self.data) | |
def __getitem__(self, idx): | |
data = self.data[idx] | |
data = np.array([data], dtype=np.int64) | |
data_pt = torch.tensor(data) | |
if self.return_mode == 'tensor': | |
item = data_pt | |
elif self.return_mode == 'dict': | |
item = { | |
'data': data_pt | |
} | |
elif self.return_mode == 'tuple': | |
item = (data_pt,) | |
elif self.return_mode == 'list': | |
item = [data_pt] | |
return item | |
def getsize(obj): | |
""" | |
sum size of object & members. | |
https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python | |
""" | |
import sys | |
from types import ModuleType, FunctionType | |
from gc import get_referents | |
# Custom objects know their class. | |
# Function objects seem to know way too much, including modules. | |
# Exclude modules as well. | |
blocklist = (type, ModuleType, FunctionType) | |
if isinstance(obj, blocklist): | |
raise TypeError('getsize() does not take argument of type: ' + str(type(obj))) | |
seen_ids = set() | |
size = 0 | |
objects = [obj] | |
while objects: | |
need_referents = [] | |
for obj in objects: | |
if not isinstance(obj, blocklist) and id(obj) not in seen_ids: | |
seen_ids.add(id(obj)) | |
size += sys.getsizeof(obj) | |
need_referents.append(obj) | |
objects = get_referents(*need_referents) | |
return size | |
def byte_str(num, unit='auto', precision=2): | |
""" | |
Automatically chooses relevant unit (KB, MB, or GB) for displaying some | |
number of bytes. | |
Args: | |
num (int): number of bytes | |
unit (str): which unit to use, can be auto, B, KB, MB, GB, TB, PB, EB, | |
ZB, or YB. | |
References: | |
https://en.wikipedia.org/wiki/Orders_of_magnitude_(data) | |
Returns: | |
str: string representing the number of bytes with appropriate units | |
Example: | |
>>> num_list = [1, 100, 1024, 1048576, 1073741824, 1099511627776] | |
>>> result = ub.repr2(list(map(byte_str, num_list)), nl=0) | |
>>> print(result) | |
['0.00 KB', '0.10 KB', '1.00 KB', '1.00 MB', '1.00 GB', '1.00 TB'] | |
""" | |
abs_num = abs(num) | |
if unit == 'auto': | |
if abs_num < 2.0 ** 10: | |
unit = 'KB' | |
elif abs_num < 2.0 ** 20: | |
unit = 'KB' | |
elif abs_num < 2.0 ** 30: | |
unit = 'MB' | |
elif abs_num < 2.0 ** 40: | |
unit = 'GB' | |
elif abs_num < 2.0 ** 50: | |
unit = 'TB' | |
elif abs_num < 2.0 ** 60: | |
unit = 'PB' | |
elif abs_num < 2.0 ** 70: | |
unit = 'EB' | |
elif abs_num < 2.0 ** 80: | |
unit = 'ZB' | |
else: | |
unit = 'YB' | |
if unit.lower().startswith('b'): | |
num_unit = num | |
elif unit.lower().startswith('k'): | |
num_unit = num / (2.0 ** 10) | |
elif unit.lower().startswith('m'): | |
num_unit = num / (2.0 ** 20) | |
elif unit.lower().startswith('g'): | |
num_unit = num / (2.0 ** 30) | |
elif unit.lower().startswith('t'): | |
num_unit = num / (2.0 ** 40) | |
elif unit.lower().startswith('p'): | |
num_unit = num / (2.0 ** 50) | |
elif unit.lower().startswith('e'): | |
num_unit = num / (2.0 ** 60) | |
elif unit.lower().startswith('z'): | |
num_unit = num / (2.0 ** 70) | |
elif unit.lower().startswith('y'): | |
num_unit = num / (2.0 ** 80) | |
else: | |
raise ValueError('unknown num={!r} unit={!r}'.format(num, unit)) | |
return ub.repr2(num_unit, precision=precision) + ' ' + unit | |
def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=False): | |
""" | |
Args: | |
storage_mode : how the dataset is stored in backend datasets | |
return_mode : how each data item is returned | |
total : size of backend storage | |
""" | |
mem = psutil.virtual_memory() | |
start_mem = mem.used | |
mem_str = byte_str(start_mem) | |
print('Starting memory = {!r}'.format(mem_str)) | |
train_data = DataIter( | |
storage_mode=storage_mode, | |
return_mode=return_mode, | |
total=total) | |
# self = train_data | |
if storage_mode == 'numpy': | |
total_storate_bytes = train_data.data.dtype.itemsize * train_data.data.size | |
else: | |
total_storate_bytes = sys.getsizeof(train_data.data) | |
# total_storate_bytes = getsize(self.data) | |
print('total_storage_size = {!r}'.format(byte_str(total_storate_bytes))) | |
mem = psutil.virtual_memory() | |
mem_str = byte_str(mem.used - start_mem) | |
print('After init DataIter memory = {!r}'.format(mem_str)) | |
print('shuffle = {!r}'.format(shuffle)) | |
train_loader = DataLoader(train_data, batch_size=300, | |
shuffle=shuffle, | |
drop_last=True, | |
pin_memory=False, | |
num_workers=18) | |
mem = psutil.virtual_memory() | |
mem_str = byte_str(mem.used - start_mem) | |
print('After init DataLoader memory = {!r}'.format(mem_str)) | |
mem_usage = [] | |
prog = ub.ProgIter(train_loader) | |
for i, item in enumerate(prog): | |
mem = psutil.virtual_memory() | |
mem_str = byte_str(mem.used - start_mem) | |
prog.set_extra(' Mem=' + mem_str) | |
mem_usage.append(mem.used - start_mem) | |
if __name__ == '__main__': | |
""" | |
CommandLine: | |
python debug_memory.py numpy tensor --total=24e5 --shuffle=True | |
python debug_memory.py --storage_mode=numpy --total=24e5 --shuffle=True | |
python debug_memory.py --storage_mode=numpy --total=24e5 --shuffle=False | |
python debug_memory.py --storage_mode=python --total=24e5 --shuffle=True | |
python debug_memory.py --storage_mode=python --total=24e5 --shuffle=False | |
python debug_memory.py numpy dict 24e5 | |
python debug_memory.py python dict 24e7 | |
Conclusions: | |
* It seems like it is ok if the return type is a dictionary | |
the problem seems to be localized to the storage type. | |
""" | |
import fire | |
fire.Fire(main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment