MInner/gpu_profile.py

## gpu_profile.py
import datetime
import linecache
import os

import pynvml3
import torch

print_tensor_sizes = True
last_tensor_sizes = set()
gpu_profile_fn = f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_prof.txt'
if 'GPU_DEBUG' in os.environ:
    print('profiling gpu usage to ', gpu_profile_fn)

lineno = None
func_name = None
filename = None
module_name = None


def gpu_profile(frame, event, arg):
    # it is _about to_ execute (!)
    global last_tensor_sizes
    global lineno, func_name, filename, module_name

    if event == 'line':
        try:
            # about _previous_ line (!)
            if lineno is not None:
                pynvml3.nvmlInit()
                handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG']))
                meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle)
                line = linecache.getline(filename, lineno)
                where_str = module_name+' '+func_name+':'+str(lineno)

                with open(gpu_profile_fn, 'a+') as f:
                    f.write(f"{where_str:<50}"
                            f":{meminfo.used/1024**2:<7.1f}Mb "
                            f"{line.rstrip()}\n")

                    if print_tensor_sizes is True:
                        for tensor in get_tensors():
                            if not hasattr(tensor, 'dbg_alloc_where'):
                                tensor.dbg_alloc_where = where_str
                        new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where)
                                            for x in get_tensors()}
                        for t, s, loc in new_tensor_sizes - last_tensor_sizes:
                            f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n')
                        for t, s, loc in last_tensor_sizes - new_tensor_sizes:
                            f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n')
                        last_tensor_sizes = new_tensor_sizes
                pynvml3.nvmlShutdown()

            # save details about line _to be_ executed
            lineno = None

            func_name = frame.f_code.co_name
            filename = frame.f_globals["__file__"]
            if (filename.endswith(".pyc") or
                    filename.endswith(".pyo")):
                filename = filename[:-1]
            module_name = frame.f_globals["__name__"]
            lineno = frame.f_lineno

            if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)):
                lineno = None  # skip current line evaluation

            if ('car_datasets' in filename
                    or '_exec_config' in func_name
                    or 'gpu_profile' in module_name
                    or 'tee_stdout' in module_name):
                lineno = None  # skip current

            return gpu_profile

        except (KeyError, AttributeError):
            pass

    return gpu_profile


def get_tensors(gpu_only=True):
    import gc
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                tensor = obj
            elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
                tensor = obj.data
            else:
                continue

            if tensor.is_cuda:
                yield tensor
        except Exception as e:
            pass
	import datetime
	import linecache
	import os

	import pynvml3
	import torch

	print_tensor_sizes = True
	last_tensor_sizes = set()
	gpu_profile_fn = f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_prof.txt'
	if 'GPU_DEBUG' in os.environ:
	print('profiling gpu usage to ', gpu_profile_fn)

	lineno = None
	func_name = None
	filename = None
	module_name = None


	def gpu_profile(frame, event, arg):
	# it is _about to_ execute (!)
	global last_tensor_sizes
	global lineno, func_name, filename, module_name

	if event == 'line':
	try:
	# about _previous_ line (!)
	if lineno is not None:
	pynvml3.nvmlInit()
	handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG']))
	meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle)
	line = linecache.getline(filename, lineno)
	where_str = module_name+' '+func_name+':'+str(lineno)

	with open(gpu_profile_fn, 'a+') as f:
	f.write(f"{where_str:<50}"
	f":{meminfo.used/1024**2:<7.1f}Mb "
	f"{line.rstrip()}\n")

	if print_tensor_sizes is True:
	for tensor in get_tensors():
	if not hasattr(tensor, 'dbg_alloc_where'):
	tensor.dbg_alloc_where = where_str
	new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where)
	for x in get_tensors()}
	for t, s, loc in new_tensor_sizes - last_tensor_sizes:
	f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n')
	for t, s, loc in last_tensor_sizes - new_tensor_sizes:
	f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n')
	last_tensor_sizes = new_tensor_sizes
	pynvml3.nvmlShutdown()

	# save details about line _to be_ executed
	lineno = None

	func_name = frame.f_code.co_name
	filename = frame.f_globals["__file__"]
	if (filename.endswith(".pyc") or
	filename.endswith(".pyo")):
	filename = filename[:-1]
	module_name = frame.f_globals["__name__"]
	lineno = frame.f_lineno

	if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)):
	lineno = None # skip current line evaluation

	if ('car_datasets' in filename
	or '_exec_config' in func_name
	or 'gpu_profile' in module_name
	or 'tee_stdout' in module_name):
	lineno = None # skip current

	return gpu_profile

	except (KeyError, AttributeError):
	pass

	return gpu_profile


	def get_tensors(gpu_only=True):
	import gc
	for obj in gc.get_objects():
	try:
	if torch.is_tensor(obj):
	tensor = obj
	elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
	tensor = obj.data
	else:
	continue

	if tensor.is_cuda:
	yield tensor
	except Exception as e:
	pass