Utility for logging system profile to tensorboardx during pytorch training.
import torch | |
import psutil | |
import numpy as np | |
def log_profile(summaryWriter, step, scope='profile', cpu=True, mem=True, gpu=torch.cuda.is_available(), disk=['read_time', 'write_time'], network=False): | |
if cpu: | |
cpu_usage = np.array(psutil.cpu_percent(percpu=True)) | |
summaryWriter.add_scalars(f'{scope}/cpu/percent', { | |
'min': cpu_usage.min(), | |
'avg': cpu_usage.mean(), | |
'max': cpu_usage.max(), | |
}, step) | |
if mem: | |
summaryWriter.add_scalars(f'{scope}/ram', psutil.virtual_memory()._asdict(), step) | |
summaryWriter.add_scalars(f'{scope}/swap', psutil.swap_memory()._asdict(), step) | |
if disk: | |
diskios = psutil.disk_io_counters(perdisk=True) | |
diskios = {dname: diskio._asdict() for dname, diskio in diskios.items() if not dname.startswith('ram') and not dname.startswith('loop')} | |
# Invert the dict so we can look at values "across" the disks. | |
vnames = list(diskios.values())[0].keys() | |
diskios = {vname: {dname: diskio[vname] for dname, diskio in diskios.items()} for vname in vnames} | |
for vname, valuebydisk in diskios.items(): | |
if disk == True or vname in disk: | |
summaryWriter.add_scalars(f'{scope}/disk/{vname}', valuebydisk, step) | |
if network: | |
summaryWriter.add_scalars(f'{scope}/network', psutil.net_io_counters()._asdict(), step) | |
if gpu: | |
summaryWriter.add_scalars(f'{scope}/cuda', { | |
'memory_allocated': torch.cuda.memory_allocated(), | |
'max_memory_allocated': torch.cuda.max_memory_allocated(), | |
'memory_cached': torch.cuda.memory_cached(), | |
'max_memory_cached': torch.cuda.max_memory_cached(), | |
}, step) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment