This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ | |
cudaError_t e = cmd; \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ | |
cudaError_t e = cmd; \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time accl --topo NVLinkOnly --chunks 2 --size 4 --steps 4 --from-step 1 --collectives _all_reduce --features cuda_ipc --prefix /home/t-liuzhe/work/collcc/out | |
send 0 from 0 to 3 at time 1 | |
send 1 from 3 to 0 at time 1 | |
send 1 from 1 to 3 at time 0 | |
send 2 from 2 to 0 at time 0 | |
send 2 from 0 to 3 at time 1 | |
send 3 from 3 to 0 at time 1 | |
send 3 from 3 to 1 at time 0 | |
send 4 from 3 to 2 at time 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time accl --topo NVLinkOnly --chunks 2 --size 4 --steps 4 --from-step 1 --collectives _all_reduce --features cuda_ipc --prefix /home/t-liuzhe/work/collcc/out | |
send 0 from 3 to 1 at time 1 | |
send 0 from 0 to 3 at time 0 | |
send 2 from 2 to 1 at time 0 | |
send 3 from 3 to 1 at time 1 | |
send 5 from 1 to 0 at time 1 | |
send 6 from 1 to 0 at time 1 | |
send 6 from 2 to 1 at time 0 | |
send 7 from 3 to 0 at time 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ | |
cudaError_t e = cmd; \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NORTHAMERICA.t-liuzhe@GCR-DGX-01:~/work/omb/mpi/collective$ mpirun -np 8 osu_allreduce_accl -d cuda | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 | |
Allreduce ACCL a52f3e2-dirty Mon Jun 22 12:49:29 PDT 2020 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chunks == 1 | |
bandwidth at time 0 | |
0 1 0 0 0 0 0 0 | |
0 0 0 0 0 0 0 0 | |
0 0 0 1 0 0 0 0 | |
0 0 0 0 0 0 0 0 | |
0 0 0 0 0 0 0 1 | |
0 0 0 0 0 0 0 0 | |
0 0 0 0 0 1 0 0 | |
0 0 0 0 0 0 0 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NORTHAMERICA.t-liuzhe@GCR-DGX-01:~/work/omb/mpi/collective$ mpirun -np 8 osu_allreduce_nccl -m 48:10000000 -d cuda | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version | |
Allreduce NCCL version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# OSU MPI-CUDA Allgather Latency Test v5.6.2 | |
# Size Avg Latency(us) | |
192 75.68 | |
384 76.04 | |
768 75.72 | |
1536 81.04 | |
3072 80.32 | |
6144 82.26 | |
12288 94.67 | |
24576 100.29 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ | |
cudaError_t e = cmd; \ |