Skip to content

Instantly share code, notes, and snippets.

@kwen2501
Created December 2, 2021 18:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kwen2501/186641efe951f344cd9a6d39353d19d6 to your computer and use it in GitHub Desktop.
Save kwen2501/186641efe951f344cd9a6d39353d19d6 to your computer and use it in GitHub Desktop.
A test that creates and destroys a NCCL communicator in a loop
#include <stdio.h>
#include "cuda_runtime.h"
#include "nccl.h"
#include "mpi.h"
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#define MPICHECK(cmd) do { \
int e = cmd; \
if( e != MPI_SUCCESS ) { \
printf("Failed: MPI error %s:%d '%d'\n", \
__FILE__,__LINE__, e); \
exit(EXIT_FAILURE); \
} \
} while(0)
#define CUDACHECK(cmd) do { \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Failed: Cuda error %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(0)
#define NCCLCHECK(cmd) do { \
ncclResult_t r = cmd; \
if (r!= ncclSuccess) { \
printf("Failed, NCCL error %s:%d '%s'\n", \
__FILE__,__LINE__,ncclGetErrorString(r)); \
exit(EXIT_FAILURE); \
} \
} while(0)
static uint64_t getHostHash(const char* string) {
// Based on DJB2a, result = result * 33 ^ char
uint64_t result = 5381;
for (int c = 0; string[c] != '\0'; c++){
result = ((result << 5) + result) ^ string[c];
}
return result;
}
static void getHostName(char* hostname, int maxlen) {
gethostname(hostname, maxlen);
for (int i=0; i< maxlen; i++) {
if (hostname[i] == '.') {
hostname[i] = '\0';
return;
}
}
}
static void getMemInfo() {
int id;
CUDACHECK(cudaGetDevice(&id));
size_t free, total;
CUDACHECK(cudaMemGetInfo(&free, &total));
printf("GPU %d: total %ld MiB free %ld MiB\n", id, total/1024/1024, free/1024/1024);
}
int main(int argc, char* argv[])
{
int myRank, nRanks, localRank = 0;
//initializing MPI
MPICHECK(MPI_Init(&argc, &argv));
MPICHECK(MPI_Comm_rank(MPI_COMM_WORLD, &myRank));
MPICHECK(MPI_Comm_size(MPI_COMM_WORLD, &nRanks));
//calculating localRank based on hostname which is used in selecting a GPU
uint64_t hostHashs[nRanks];
char hostname[1024];
getHostName(hostname, 1024);
hostHashs[myRank] = getHostHash(hostname);
MPICHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD));
for (int p=0; p<nRanks; p++) {
if (p == myRank) break;
if (hostHashs[p] == hostHashs[myRank]) localRank++;
}
CUDACHECK(cudaSetDevice(localRank));
// Make sure CUDA runtime is initialized
CUDACHECK(cudaFree(NULL));
for (int i = 0; i < 10; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (myRank == 0) {
printf("========== After %2d (init, destroy) ==========\n", i);
fflush(stdout);
}
sleep(1);
MPI_Barrier(MPI_COMM_WORLD);
getMemInfo();
fflush(stdout);
ncclUniqueId id;
ncclComm_t comm;
//get NCCL unique ID at rank 0 and broadcast it to all others
if (myRank == 0) ncclGetUniqueId(&id);
MPICHECK(MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD));
//initializing NCCL
NCCLCHECK(ncclCommInitRank(&comm, nRanks, id, myRank));
//finalizing NCCL
ncclCommDestroy(comm);
}
//finalizing MPI
MPICHECK(MPI_Finalize());
printf("[MPI Rank %d] Success \n", myRank);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment