Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A script to generate per-line GPU memory usage trace. For more meaningful results set `CUDA_LAUNCH_BLOCKING=1`.
import datetime
import linecache
import os
import pynvml3
import torch
print_tensor_sizes = True
last_tensor_sizes = set()
gpu_profile_fn = f'{}-gpu_mem_prof.txt'
if 'GPU_DEBUG' in os.environ:
print('profiling gpu usage to ', gpu_profile_fn)
lineno = None
func_name = None
filename = None
module_name = None
def gpu_profile(frame, event, arg):
# it is _about to_ execute (!)
global last_tensor_sizes
global lineno, func_name, filename, module_name
if event == 'line':
# about _previous_ line (!)
if lineno is not None:
handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG']))
meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle)
line = linecache.getline(filename, lineno)
where_str = module_name+' '+func_name+':'+str(lineno)
with open(gpu_profile_fn, 'a+') as f:
f":{meminfo.used/1024**2:<7.1f}Mb "
if print_tensor_sizes is True:
for tensor in get_tensors():
if not hasattr(tensor, 'dbg_alloc_where'):
tensor.dbg_alloc_where = where_str
new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where)
for x in get_tensors()}
for t, s, loc in new_tensor_sizes - last_tensor_sizes:
f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n')
for t, s, loc in last_tensor_sizes - new_tensor_sizes:
f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n')
last_tensor_sizes = new_tensor_sizes
# save details about line _to be_ executed
lineno = None
func_name = frame.f_code.co_name
filename = frame.f_globals["__file__"]
if (filename.endswith(".pyc") or
filename = filename[:-1]
module_name = frame.f_globals["__name__"]
lineno = frame.f_lineno
if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)):
lineno = None # skip current line evaluation
if ('car_datasets' in filename
or '_exec_config' in func_name
or 'gpu_profile' in module_name
or 'tee_stdout' in module_name):
lineno = None # skip current
return gpu_profile
except (KeyError, AttributeError):
return gpu_profile
def get_tensors(gpu_only=True):
import gc
for obj in gc.get_objects():
if torch.is_tensor(obj):
tensor = obj
elif hasattr(obj, 'data') and torch.is_tensor(
tensor =
if tensor.is_cuda:
yield tensor
except Exception as e:

This comment has been minimized.

Show comment Hide comment

vardaan123 Mar 1, 2018

Could you please provide an explanation of the arguments frame, event, args input to your function. Thanks

Could you please provide an explanation of the arguments frame, event, args input to your function. Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment