Created
April 20, 2019 00:41
-
-
Save cswinter/6193721676f6a7903cca74ad660db975 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 | |
# | |
# Using devices | |
# Rank 0 Pid 7938 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB | |
# Rank 1 Pid 8021 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB | |
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0> | |
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/IB : No device found. | |
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0> | |
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/IB : No device found. | |
NCCL version 2.4.2+cuda10.0 | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO comm 0x7fcce0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-l83z:7938:7944 [0] NCCL INFO comm 0x7ff118002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:7938:7944 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-l83z:7938:7944 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1. | |
managed-worker-l83z:7938:7944 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1. | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Limiting to 1 rings per user request. | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Channel 00 : 0 1 | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1. | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1. | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7938:7944 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled | |
managed-worker-jbk7:8021:8026 [0] NCCL INFO comm 0x7fcce0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
managed-worker-l83z:7938:7944 [0] NCCL INFO comm 0x7ff118002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
# | |
# out-of-place in-place | |
# size count type redop time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
managed-worker-l83z:7938:7938 [0] NCCL INFO Launch mode Parallel | |
1073741824 268435456 float sum 578867 1.85 1.85 N/A 606879 1.77 1.77 N/A | |
managed-worker-l83z:7938:7938 [0] NCCL INFO Destroyed comm 0x7ff118002560 rank 0 | |
managed-worker-jbk7:8021:8021 [0] NCCL INFO Destroyed comm 0x7fcce0002560 rank 1 | |
# Out of bounds values : 0 OK | |
# Avg bus bandwidth : 1.81209 | |
# | |
Limitingroot@managed-worker-^C | |
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 | |
# | |
# Using devices | |
# Rank 0 Pid 7952 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB | |
# Rank 1 Pid 8048 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB | |
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0> | |
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/IB : No device found. | |
NCCL version 2.4.2+cuda10.0 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0> | |
managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/IB : No device found. | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-l83z:7952:7958 [0] NCCL INFO comm 0x7fb75c002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO comm 0x7f1c2c002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:7952:7958 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1. | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1. | |
managed-worker-l83z:7952:7958 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1. | |
managed-worker-l83z:7952:7958 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1. | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Limiting to 1 rings per user request. | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Channel 00 : 0 1 | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:7952:7958 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled | |
managed-worker-l83z:7952:7958 [0] NCCL INFO comm 0x7fb75c002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
managed-worker-jbk7:8048:8053 [0] NCCL INFO comm 0x7f1c2c002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
# | |
# out-of-place in-place | |
# size count type redop time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
managed-worker-l83z:7952:7952 [0] NCCL INFO Launch mode Parallel | |
1073741824 268435456 float sum 601911 1.78 1.78 N/A 612369 1.75 1.75 N/A | |
managed-worker-l83z:7952:7952 [0] NCCL INFO Destroyed comm 0x7fb75c002560 rank 0 | |
managed-worker-jbk7:8048:8048 [0] NCCL INFO Destroyed comm 0x7f1c2c002560 rank 1 | |
# Out of bounds values : 0 OK | |
# Avg bus bandwidth : 1.76865 | |
# | |
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=2 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 | |
# | |
# Using devices | |
# Rank 0 Pid 7968 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB | |
# Rank 1 Pid 8075 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB | |
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0> | |
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/IB : No device found. | |
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0> | |
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/IB : No device found. | |
NCCL version 2.4.2+cuda10.0 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO comm 0x7fd4c4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=managed-worker-l83z:7968:7974 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO comm 0x7fca48002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 2. | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 2. | |
managed-worker-l83z:7968:7974 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 2. | |
managed-worker-l83z:7968:7974 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 2. | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Limiting to 2 rings per user request. | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Channel 00 : 0 1 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Channel 01 : 0 1 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:7968:7974 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled | |
managed-worker-l83z:7968:7974 [0] NCCL INFO comm 0x7fca48002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
managed-worker-jbk7:8075:8080 [0] NCCL INFO comm 0x7fd4c4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
# | |
# out-of-place in-place | |
# size count type redop time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
managed-worker-l83z:7968:7968 [0] NCCL INFO Launch mode Parallel | |
1073741824 268435456 float sum 549783 1.95 1.95 N/A 553811 1.94 1.94 N/A | |
managed-worker-l83z:7968:7968 [0] NCCL INFO Destroyed comm 0x7fca48002560 rank 0 | |
managed-worker-jbk7:8075:8075 [0] NCCL INFO Destroyed comm 0x7fd4c4002560 rank 1 | |
# Out of bounds values : 0 OK | |
# Avg bus bandwidth : 1.94593 | |
# | |
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=2 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=4 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 | |
# | |
# Using devices | |
# Rank 0 Pid 7984 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB | |
# Rank 1 Pid 8102 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB | |
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0> | |
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/IB : No device found. | |
NCCL version 2.4.2+cuda10.0 | |
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0> | |
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/IB : No device found. | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO comm 0x7fe0dc002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO comm 0x7fcfa4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-l83z:7984:7990 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 4. | |
managed-worker-l83z:7984:7990 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 4. | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Duplicating rings to 4 per user request. | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 00 : 0 1 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 01 : 0 1 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 02 : 0 1 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=8 -x NCCL_MAX_NRINGS=managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 03 : 0 1 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 4. | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 4. | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 02 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 02 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 02 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 02 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 03 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 03 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 03 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 03 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:7984:7990 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled | |
managed-worker-l83z:7984:7990 [0] NCCL INFO comm 0x7fe0dc002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
managed-worker-jbk7:8102:8107 [0] NCCL INFO comm 0x7fcfa4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
# | |
# out-of-place in-place | |
# size count type redop time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
managed-worker-l83z:7984:7984 [0] NCCL INFO Launch mode Parallel | |
1073741824 268435456 float sum 621741 1.73 1.73 N/A 631218 1.70 1.70 N/A | |
managed-worker-jbk7:8102:8102 [0] NCCL INFO Destroyed comm 0x7fcfa4002560 rank 1 | |
managed-worker-l83z:7984:7984 [0] NCCL INFO Destroyed comm 0x7fe0dc002560 rank 0 | |
# Out of bounds values : 0 OK | |
# Avg bus bandwidth : 1.71403 | |
# | |
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=4 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=8 -x NCCL_MAX_NRINGS=8 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0 | |
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 | |
# | |
# Using devices | |
# Rank 0 Pid 8000 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB | |
# Rank 1 Pid 8129 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB | |
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0> | |
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/IB : No device found. | |
NCCL version 2.4.2+cuda10.0 | |
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0> | |
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so). | |
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/IB : No device found. | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO comm 0x7fcad4002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO comm 0x7fcad0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB | |
managed-worker-l83z:8000:8006 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 8. | |
managed-worker-l83z:8000:8006 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 8. | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Duplicating rings to 8 per user request. | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 00 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 01 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 02 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 03 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 04 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 05 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 06 : 0 1 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 07 : 0 1 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 8. | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 8. | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 02 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 02 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 02 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 02 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 03 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 03 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 03 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 03 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 04 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 04 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 04 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 04 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 05 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 05 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 05 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 05 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 06 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 06 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 06 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 06 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 07 : 1 -> 0 [receive] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 07 : 0 -> 1 [receive] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 07 : 0 -> 1 [send] via NET/Socket/0 | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 07 : 1 -> 0 [send] via NET/Socket/0 | |
managed-worker-l83z:8000:8006 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled | |
managed-worker-l83z:8000:8006 [0] NCCL INFO comm 0x7fcad4002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
# | |
# out-of-place in-place | |
# size count type redop time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
managed-worker-l83z:8000:8000 [0] NCCL INFO Launch mode Parallel | |
managed-worker-jbk7:8129:8134 [0] NCCL INFO comm 0x7fcad0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE | |
1073741824 268435456 float sum 742440 1.45 1.45 N/A 741982 1.45 1.45 N/A | |
managed-worker-l83z:8000:8000 [0] NCCL INFO Destroyed comm 0x7fcad4002560 rank 0 | |
managed-worker-jbk7:8129:8129 [0] NCCL INFO Destroyed comm 0x7fcad0002560 rank 1 | |
# Out of bounds values : 0 OK | |
# Avg bus bandwidth : 1.44668 | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment