Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A script to generate per-line GPU memory usage trace. For more meaningful results set `CUDA_LAUNCH_BLOCKING=1`.
import datetime
import linecache
import os
import pynvml3
import torch
print_tensor_sizes = True
last_tensor_sizes = set()
gpu_profile_fn = f'{}-gpu_mem_prof.txt'
if 'GPU_DEBUG' in os.environ:
print('profiling gpu usage to ', gpu_profile_fn)
lineno = None
func_name = None
filename = None
module_name = None
def gpu_profile(frame, event, arg):
# it is _about to_ execute (!)
global last_tensor_sizes
global lineno, func_name, filename, module_name
if event == 'line':
# about _previous_ line (!)
if lineno is not None:
handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG']))
meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle)
line = linecache.getline(filename, lineno)
where_str = module_name+' '+func_name+':'+str(lineno)
with open(gpu_profile_fn, 'a+') as f:
f":{meminfo.used/1024**2:<7.1f}Mb "
if print_tensor_sizes is True:
for tensor in get_tensors():
if not hasattr(tensor, 'dbg_alloc_where'):
tensor.dbg_alloc_where = where_str
new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where)
for x in get_tensors()}
for t, s, loc in new_tensor_sizes - last_tensor_sizes:
f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n')
for t, s, loc in last_tensor_sizes - new_tensor_sizes:
f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n')
last_tensor_sizes = new_tensor_sizes
# save details about line _to be_ executed
lineno = None
func_name = frame.f_code.co_name
filename = frame.f_globals["__file__"]
if (filename.endswith(".pyc") or
filename = filename[:-1]
module_name = frame.f_globals["__name__"]
lineno = frame.f_lineno
if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)):
lineno = None # skip current line evaluation
if ('car_datasets' in filename
or '_exec_config' in func_name
or 'gpu_profile' in module_name
or 'tee_stdout' in module_name):
lineno = None # skip current
return gpu_profile
except (KeyError, AttributeError):
return gpu_profile
def get_tensors(gpu_only=True):
import gc
for obj in gc.get_objects():
if torch.is_tensor(obj):
tensor = obj
elif hasattr(obj, 'data') and torch.is_tensor(
tensor =
if tensor.is_cuda:
yield tensor
except Exception as e:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment