Skip to content

Instantly share code, notes, and snippets.

@jmansour
Last active February 4, 2018 22:09
Show Gist options
  • Save jmansour/17c9d4e6767fab22a317ba795e171df1 to your computer and use it in GitHub Desktop.
Save jmansour/17c9d4e6767fab22a317ba795e171df1 to your computer and use it in GitHub Desktop.
This script extracts basic gpu usage metrics from data generated by nvidia-smi
#!/usr/bin/env python
# This script simply extracts some metrics from runtime gpu utilisation data
# This data is generated using:
# $ nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,nounits -l 10 > gpu-${SLURM_JOBID}.out
import numpy as np
import sys
try:
filename = sys.argv[1]
mat = np.loadtxt(filename, delimiter=",", skiprows=1)
except:
raise RuntimeError("You must provide a valid filename as the first argument to this script.")
# average gpu utilisation across entire run
gpu_runtime_mean_utilisation = np.mean(mat[:,1])/100.
# gpu/cpu utilisation
# rows which have zero utilisation
cputimes = np.where(mat[:,1] < 0.1)
totallines = len(mat)
cpulines = len(cputimes[0])
gpulines = totallines - cpulines
gpu_runtime_fraction = float(gpulines)/float(totallines)
# gpu utilisation during gpu runtime segments
gputimes = np.where(mat[:,1] > 0.1)
if gputimes[0].size > 0:
gpu_gputime_mean_utilisation = np.mean(mat[:,1][gputimes])/100.
band_gputime_mean_utilisation = np.mean(mat[:,2][gputimes])/100.
else:
gpu_gputime_mean_utilisation = 0
band_gputime_mean_utilisation = 0
tot_gpus = np.max(mat[:,0])+1
print("{} = Total GPUs used".format(int(tot_gpus)))
print("{} = Total runtime (mins)".format(len(mat)*10./60./tot_gpus))
print("{} = GPU runtime fraction".format(gpu_runtime_fraction))
print("{} = GPU mean utilisation during GPU usage".format(gpu_gputime_mean_utilisation))
print("{} = Memory bandwith mean utilisation during GPU usage".format(band_gputime_mean_utilisation))
print("{} = Peak bandwith utilisation fraction".format(mat[:,2].max()))
print("{} = Peak memory usage (mb)".format(mat[:,5].max()))
#print("{} {} {} {} {} {}".format(filename, int(tot_gpus), len(mat)*10./60./tot_gpus, gpu_runtime_fraction, gpu_gputime_mean_utilisation, mem_gputime_mean_utilisation))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment