Skip to content

Instantly share code, notes, and snippets.

@yjchun
Last active December 4, 2018 02:30
Show Gist options
  • Save yjchun/78277364d3955321746d5fec3dd46824 to your computer and use it in GitHub Desktop.
Save yjchun/78277364d3955321746d5fec3dd46824 to your computer and use it in GitHub Desktop.
pytorch memory usage
# from https://discuss.pytorch.org/t/how-to-debug-causes-of-gpu-memory-leaks/6741/19
# collect tensors and could calculate memory size
# like following
import torch
import gc
from functools import reduce
import operator as op
def get_tensors(only_cuda=False, omit_objs=[]):
"""
:return: list of active PyTorch tensors
>>> import torch
>>> from torch import tensor
>>> clean_gc_return = map((lambda obj: del_object(obj)), gc.get_objects())
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> device = torch.device(device)
>>> only_cuda = True if torch.cuda.is_available() else False
>>> t1 = tensor([1], device=device)
>>> a3 = tensor([[1, 2], [3, 4]], device=device)
>>> # print(get_all_tensor_names())
>>> tensors = [tensor_obj for tensor_obj in get_tensors(only_cuda=only_cuda)]
>>> # print(tensors)
>>> # We doubled each t1, a3 tensors because of the tensors collection.
>>> expected_tensor_length = 2
>>> assert len(tensors) == expected_tensor_length, f"Expected length of tensors {expected_tensor_length}, but got {len(tensors)}, the tensors: {tensors}"
>>> exp_size = (2,2)
>>> act_size = tensors[1].size()
>>> assert exp_size == act_size, f"Expected size {exp_size} but got: {act_size}"
>>> del t1
>>> del a3
>>> clean_gc_return = map((lambda obj: del_object(obj)), tensors)
"""
add_all_tensors = False if only_cuda is True else True
# To avoid counting the same tensor twice, create a dictionary of tensors,
# each one identified by its id (the in memory address).
tensors = {}
# omit_obj_ids = [id(obj) for obj in omit_objs]
def add_tensor(obj):
if torch.is_tensor(obj):
tensor = obj
elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
tensor = obj.data
else:
return
if (only_cuda and tensor.is_cuda) or add_all_tensors:
tensors[id(tensor)] = tensor
for obj in gc.get_objects():
try:
# Add the obj if it is a tensor.
add_tensor(obj)
# Some tensors are "saved & hidden" for the backward pass.
if hasattr(obj, 'saved_tensors') and (id(obj) not in omit_objs):
for tensor_obj in obj.saved_tensors:
add_tensor(tensor_obj)
except Exception as ex:
pass
# print("Exception: ", ex)
# logger.debug(f"Exception: {str(ex)}")
return tensors.values() # return a list of detected tensors
def get_tensor_mem_usage(only_cuda=False, omit_objs=[]):
tensors = get_tensors(only_cuda, omit_objs)
total_size = 0
for obj in tensors:
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
size = reduce(op.mul, obj.size()) if len(obj.size()) > 0 else 0
total_size += size
#print(size, type(obj), obj.size())
except Exception as e:
# error dlopen(data, 6) in macos
pass
return total_size
a = torch.Tensor(10, 20)
print('total tensor size: {}'.format(get_tensor_mem_usage()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment