Skip to content

Instantly share code, notes, and snippets.

@MInner
Created September 12, 2017 16:11
Show Gist options
  • Star 48 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save MInner/8968b3b120c95d3f50b8a22a74bf66bc to your computer and use it in GitHub Desktop.
Save MInner/8968b3b120c95d3f50b8a22a74bf66bc to your computer and use it in GitHub Desktop.
A script to generate per-line GPU memory usage trace. For more meaningful results set `CUDA_LAUNCH_BLOCKING=1`.
import datetime
import linecache
import os
import pynvml3
import torch
print_tensor_sizes = True
last_tensor_sizes = set()
gpu_profile_fn = f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_prof.txt'
if 'GPU_DEBUG' in os.environ:
print('profiling gpu usage to ', gpu_profile_fn)
lineno = None
func_name = None
filename = None
module_name = None
def gpu_profile(frame, event, arg):
# it is _about to_ execute (!)
global last_tensor_sizes
global lineno, func_name, filename, module_name
if event == 'line':
try:
# about _previous_ line (!)
if lineno is not None:
pynvml3.nvmlInit()
handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG']))
meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle)
line = linecache.getline(filename, lineno)
where_str = module_name+' '+func_name+':'+str(lineno)
with open(gpu_profile_fn, 'a+') as f:
f.write(f"{where_str:<50}"
f":{meminfo.used/1024**2:<7.1f}Mb "
f"{line.rstrip()}\n")
if print_tensor_sizes is True:
for tensor in get_tensors():
if not hasattr(tensor, 'dbg_alloc_where'):
tensor.dbg_alloc_where = where_str
new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where)
for x in get_tensors()}
for t, s, loc in new_tensor_sizes - last_tensor_sizes:
f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n')
for t, s, loc in last_tensor_sizes - new_tensor_sizes:
f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n')
last_tensor_sizes = new_tensor_sizes
pynvml3.nvmlShutdown()
# save details about line _to be_ executed
lineno = None
func_name = frame.f_code.co_name
filename = frame.f_globals["__file__"]
if (filename.endswith(".pyc") or
filename.endswith(".pyo")):
filename = filename[:-1]
module_name = frame.f_globals["__name__"]
lineno = frame.f_lineno
if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)):
lineno = None # skip current line evaluation
if ('car_datasets' in filename
or '_exec_config' in func_name
or 'gpu_profile' in module_name
or 'tee_stdout' in module_name):
lineno = None # skip current
return gpu_profile
except (KeyError, AttributeError):
pass
return gpu_profile
def get_tensors(gpu_only=True):
import gc
for obj in gc.get_objects():
try:
if torch.is_tensor(obj):
tensor = obj
elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
tensor = obj.data
else:
continue
if tensor.is_cuda:
yield tensor
except Exception as e:
pass
@vardaan123
Copy link

Could you please provide an explanation of the arguments frame, event, args input to your function. Thanks

@hoxmark
Copy link

hoxmark commented Jun 16, 2018

@zhanwenchen
Copy link

Shouldn't pynvml3 be py3nvml?

@zhanwenchen
Copy link

There is a better version of this file without all the typos and with some printing: https://github.com/li-js/gpu_memory_profiling/blob/master/gpu_profile.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment