Skip to content

Instantly share code, notes, and snippets.

@chr5tphr
Last active February 9, 2022 10:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chr5tphr/209860ce30768f4f58d676f23630f76c to your computer and use it in GitHub Desktop.
Save chr5tphr/209860ce30768f4f58d676f23630f76c to your computer and use it in GitHub Desktop.
Print CUDA-info in JSON using the Nvidia Management Library (NVML) to avoid parsing of nvidia-smi
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>
#include <nvml.h>
#include <time.h>
#define MAXINFO 32
#define MAXCBUF 64
// compile with:
// gcc cudamemstat.c -I /usr/local/cuda/include -lnvidia-ml -L /usr/lib/nvidia-384 -o cudamemstat
int main(int argc, char* argv[]){
nvmlDevice_t handle;
nvmlReturn_t retval;
nvmlMemory_t meminfo;
nvmlUtilization_t utilinfo;
nvmlProcessInfo_t pinfo[MAXINFO];
struct stat sbuf;
char cbuf[MAXCBUF];
retval = nvmlInit();
if (retval != NVML_SUCCESS) {
fprintf(stderr, "%s\n",nvmlErrorString(retval));
return 1;
}
int version;
retval = nvmlSystemGetCudaDriverVersion(&version);
if (retval != NVML_SUCCESS) {
fprintf(stderr, "%s\n",nvmlErrorString(retval));
return 1;
}
unsigned int numdev = 0;
retval = nvmlDeviceGetCount(&numdev);
if (retval != NVML_SUCCESS) {
fprintf(stderr, "%s\n",nvmlErrorString(retval));
return 1;
}
gethostname(cbuf, MAXCBUF);
// WARNING: hostname is not escaped here
printf("{\"hostname\": \"%s\"", cbuf);
printf(", \"cuda_version\": %d", version);
printf(", \"time\": %lu", time(0));
printf(", \"gpu\": [");
unsigned int i = 0;
for (i=0;i<numdev;i++){
if (i > 0) {
printf(", ");
}
printf("{\"device\": %u", i);
nvmlDeviceGetHandleByIndex(i,&handle);
// device memory
retval = nvmlDeviceGetMemoryInfo(handle,&meminfo);
if (retval == NVML_SUCCESS) {
printf(", \"memused\": %llu, \"memtotal\": %llu", meminfo.used, meminfo.total);
}
else {
fprintf(stderr, "Error in device memory: %s\n", nvmlErrorString(retval));
}
// device utilization
retval = nvmlDeviceGetUtilizationRates(handle, &utilinfo);
if (retval == NVML_SUCCESS) {
printf(", \"gpuutil\": %u, \"memutil\": %u", utilinfo.gpu, utilinfo.memory);
}
else {
fprintf(stderr, "Error in device util: %s\n", nvmlErrorString(retval));
}
// processes on device
printf(", \"proc\": [");
unsigned int infoCount = MAXINFO;
retval = nvmlDeviceGetComputeRunningProcesses(handle, &infoCount, pinfo);
if (retval == NVML_SUCCESS) {
unsigned int j = 0;
for (j=0;j<infoCount;j++) {
if (j > 0) {
printf(", ");
}
printf("{");
// pid
printf("\"pid\": %u", pinfo[j].pid);
// uid
snprintf(cbuf, MAXCBUF, "/proc/%u", pinfo[j].pid);
if (!stat(cbuf, &sbuf)) {
printf(", \"uid\": %u", sbuf.st_uid);
}
// mem used
printf(", \"memused\": %llu", pinfo[j].usedGpuMemory);
printf("}");
}
}
else {
fprintf(stderr, "Error in process info: %s, count %u\n", nvmlErrorString(retval), infoCount);
}
printf("]");
printf("}");
}
printf("]");
printf("}\n");
nvmlShutdown();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment