Last active
February 4, 2018 22:09
-
-
Save jmansour/17c9d4e6767fab22a317ba795e171df1 to your computer and use it in GitHub Desktop.
This script extracts basic gpu usage metrics from data generated by nvidia-smi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script simply extracts some metrics from runtime gpu utilisation data | |
# This data is generated using: | |
# $ nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,nounits -l 10 > gpu-${SLURM_JOBID}.out | |
import numpy as np | |
import sys | |
try: | |
filename = sys.argv[1] | |
mat = np.loadtxt(filename, delimiter=",", skiprows=1) | |
except: | |
raise RuntimeError("You must provide a valid filename as the first argument to this script.") | |
# average gpu utilisation across entire run | |
gpu_runtime_mean_utilisation = np.mean(mat[:,1])/100. | |
# gpu/cpu utilisation | |
# rows which have zero utilisation | |
cputimes = np.where(mat[:,1] < 0.1) | |
totallines = len(mat) | |
cpulines = len(cputimes[0]) | |
gpulines = totallines - cpulines | |
gpu_runtime_fraction = float(gpulines)/float(totallines) | |
# gpu utilisation during gpu runtime segments | |
gputimes = np.where(mat[:,1] > 0.1) | |
if gputimes[0].size > 0: | |
gpu_gputime_mean_utilisation = np.mean(mat[:,1][gputimes])/100. | |
band_gputime_mean_utilisation = np.mean(mat[:,2][gputimes])/100. | |
else: | |
gpu_gputime_mean_utilisation = 0 | |
band_gputime_mean_utilisation = 0 | |
tot_gpus = np.max(mat[:,0])+1 | |
print("{} = Total GPUs used".format(int(tot_gpus))) | |
print("{} = Total runtime (mins)".format(len(mat)*10./60./tot_gpus)) | |
print("{} = GPU runtime fraction".format(gpu_runtime_fraction)) | |
print("{} = GPU mean utilisation during GPU usage".format(gpu_gputime_mean_utilisation)) | |
print("{} = Memory bandwith mean utilisation during GPU usage".format(band_gputime_mean_utilisation)) | |
print("{} = Peak bandwith utilisation fraction".format(mat[:,2].max())) | |
print("{} = Peak memory usage (mb)".format(mat[:,5].max())) | |
#print("{} {} {} {} {} {}".format(filename, int(tot_gpus), len(mat)*10./60./tot_gpus, gpu_runtime_fraction, gpu_gputime_mean_utilisation, mem_gputime_mean_utilisation)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment