Skip to content

Instantly share code, notes, and snippets.

@cswinter
Created April 20, 2019 00:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cswinter/6193721676f6a7903cca74ad660db975 to your computer and use it in GitHub Desktop.
Save cswinter/6193721676f6a7903cca74ad660db975 to your computer and use it in GitHub Desktop.
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0
#
# Using devices
# Rank 0 Pid 7938 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB
# Rank 1 Pid 8021 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0>
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-l83z:7938:7938 [0] NCCL INFO NET/IB : No device found.
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0>
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-jbk7:8021:8021 [0] NCCL INFO NET/IB : No device found.
NCCL version 2.4.2+cuda10.0
managed-worker-jbk7:8021:8026 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-jbk7:8021:8026 [0] NCCL INFO comm 0x7fcce0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:7938:7944 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-l83z:7938:7944 [0] NCCL INFO comm 0x7ff118002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:7938:7944 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8021:8026 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-l83z:7938:7944 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1.
managed-worker-l83z:7938:7944 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1.
managed-worker-l83z:7938:7944 [0] NCCL INFO Limiting to 1 rings per user request.
managed-worker-l83z:7938:7944 [0] NCCL INFO Channel 00 : 0 1
managed-worker-jbk7:8021:8026 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1.
managed-worker-jbk7:8021:8026 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1.
managed-worker-l83z:7938:7944 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8021:8026 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7938:7944 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8021:8026 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7938:7944 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled
managed-worker-jbk7:8021:8026 [0] NCCL INFO comm 0x7fcce0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
managed-worker-l83z:7938:7944 [0] NCCL INFO comm 0x7ff118002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
#
# out-of-place in-place
# size count type redop time algbw busbw error time algbw busbw error
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)
managed-worker-l83z:7938:7938 [0] NCCL INFO Launch mode Parallel
1073741824 268435456 float sum 578867 1.85 1.85 N/A 606879 1.77 1.77 N/A
managed-worker-l83z:7938:7938 [0] NCCL INFO Destroyed comm 0x7ff118002560 rank 0
managed-worker-jbk7:8021:8021 [0] NCCL INFO Destroyed comm 0x7fcce0002560 rank 1
# Out of bounds values : 0 OK
# Avg bus bandwidth : 1.81209
#
Limitingroot@managed-worker-^C
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0
#
# Using devices
# Rank 0 Pid 7952 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB
# Rank 1 Pid 8048 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0>
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-l83z:7952:7952 [0] NCCL INFO NET/IB : No device found.
NCCL version 2.4.2+cuda10.0
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0>
managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-jbk7:8048:8048 [0] NCCL INFO NET/IB : No device found.
managed-worker-l83z:7952:7958 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-l83z:7952:7958 [0] NCCL INFO comm 0x7fb75c002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-jbk7:8048:8053 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-jbk7:8048:8053 [0] NCCL INFO comm 0x7f1c2c002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:7952:7958 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8048:8053 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8048:8053 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1.
managed-worker-jbk7:8048:8053 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1.
managed-worker-l83z:7952:7958 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 1.
managed-worker-l83z:7952:7958 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 1.
managed-worker-l83z:7952:7958 [0] NCCL INFO Limiting to 1 rings per user request.
managed-worker-l83z:7952:7958 [0] NCCL INFO Channel 00 : 0 1
managed-worker-jbk7:8048:8053 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7952:7958 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8048:8053 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7952:7958 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:7952:7958 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled
managed-worker-l83z:7952:7958 [0] NCCL INFO comm 0x7fb75c002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
managed-worker-jbk7:8048:8053 [0] NCCL INFO comm 0x7f1c2c002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
#
# out-of-place in-place
# size count type redop time algbw busbw error time algbw busbw error
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)
managed-worker-l83z:7952:7952 [0] NCCL INFO Launch mode Parallel
1073741824 268435456 float sum 601911 1.78 1.78 N/A 612369 1.75 1.75 N/A
managed-worker-l83z:7952:7952 [0] NCCL INFO Destroyed comm 0x7fb75c002560 rank 0
managed-worker-jbk7:8048:8048 [0] NCCL INFO Destroyed comm 0x7f1c2c002560 rank 1
# Out of bounds values : 0 OK
# Avg bus bandwidth : 1.76865
#
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=1 -x NCCL_MAX_NRINGS=1 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=2 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0
#
# Using devices
# Rank 0 Pid 7968 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB
# Rank 1 Pid 8075 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0>
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-l83z:7968:7968 [0] NCCL INFO NET/IB : No device found.
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0>
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-jbk7:8075:8075 [0] NCCL INFO NET/IB : No device found.
NCCL version 2.4.2+cuda10.0
managed-worker-jbk7:8075:8080 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-jbk7:8075:8080 [0] NCCL INFO comm 0x7fd4c4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=managed-worker-l83z:7968:7974 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-l83z:7968:7974 [0] NCCL INFO comm 0x7fca48002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:7968:7974 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8075:8080 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8075:8080 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 2.
managed-worker-jbk7:8075:8080 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 2.
managed-worker-l83z:7968:7974 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 2.
managed-worker-l83z:7968:7974 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 2.
managed-worker-l83z:7968:7974 [0] NCCL INFO Limiting to 2 rings per user request.
managed-worker-l83z:7968:7974 [0] NCCL INFO Channel 00 : 0 1
managed-worker-l83z:7968:7974 [0] NCCL INFO Channel 01 : 0 1
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8075:8080 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7968:7974 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:7968:7974 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled
managed-worker-l83z:7968:7974 [0] NCCL INFO comm 0x7fca48002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
managed-worker-jbk7:8075:8080 [0] NCCL INFO comm 0x7fd4c4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
#
# out-of-place in-place
# size count type redop time algbw busbw error time algbw busbw error
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)
managed-worker-l83z:7968:7968 [0] NCCL INFO Launch mode Parallel
1073741824 268435456 float sum 549783 1.95 1.95 N/A 553811 1.94 1.94 N/A
managed-worker-l83z:7968:7968 [0] NCCL INFO Destroyed comm 0x7fca48002560 rank 0
managed-worker-jbk7:8075:8075 [0] NCCL INFO Destroyed comm 0x7fd4c4002560 rank 1
# Out of bounds values : 0 OK
# Avg bus bandwidth : 1.94593
#
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=2 -x NCCL_MAX_NRINGS=2 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=4 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0
#
# Using devices
# Rank 0 Pid 7984 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB
# Rank 1 Pid 8102 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0>
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-l83z:7984:7984 [0] NCCL INFO NET/IB : No device found.
NCCL version 2.4.2+cuda10.0
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0>
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-jbk7:8102:8102 [0] NCCL INFO NET/IB : No device found.
managed-worker-l83z:7984:7990 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-l83z:7984:7990 [0] NCCL INFO comm 0x7fe0dc002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-jbk7:8102:8107 [0] NCCL INFO comm 0x7fcfa4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:7984:7990 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8102:8107 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-l83z:7984:7990 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 4.
managed-worker-l83z:7984:7990 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 4.
managed-worker-l83z:7984:7990 [0] NCCL INFO Duplicating rings to 4 per user request.
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 00 : 0 1
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 01 : 0 1
managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 02 : 0 1
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=8 -x NCCL_MAX_NRINGS=managed-worker-l83z:7984:7990 [0] NCCL INFO Channel 03 : 0 1
managed-worker-jbk7:8102:8107 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 4.
managed-worker-jbk7:8102:8107 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 4.
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 02 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 02 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 02 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 02 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 03 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 03 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8102:8107 [0] NCCL INFO Ring 03 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Ring 03 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:7984:7990 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled
managed-worker-l83z:7984:7990 [0] NCCL INFO comm 0x7fe0dc002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
managed-worker-jbk7:8102:8107 [0] NCCL INFO comm 0x7fcfa4002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
#
# out-of-place in-place
# size count type redop time algbw busbw error time algbw busbw error
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)
managed-worker-l83z:7984:7984 [0] NCCL INFO Launch mode Parallel
1073741824 268435456 float sum 621741 1.73 1.73 N/A 631218 1.70 1.70 N/A
managed-worker-jbk7:8102:8102 [0] NCCL INFO Destroyed comm 0x7fcfa4002560 rank 1
managed-worker-l83z:7984:7984 [0] NCCL INFO Destroyed comm 0x7fe0dc002560 rank 0
# Out of bounds values : 0 OK
# Avg bus bandwidth : 1.71403
#
root@managed-worker-l83z:/# mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=4 -x NCCL_MAX_NRINGS=4 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
mpirun --allow-run-as-root -H 10.73.0.52:1,10.73.0.15:1 -np 2 -mca btl_tcp_if_include ens12 -x LD_LIBRARY_PATH -x NCCL_SOCKET_IFNAME=ens12 -x NCCL_MIN_NRINGS=8 -x NCCL_MAX_NRINGS=8 -x NCCL_DEBUG=TRACE /nccl-tests/build/all_reduce_perf -b 1G -e 1G -f 2 -g 1 -c 0
# nThread 1 nGpus 1 minBytes 1073741824 maxBytes 1073741824 step: 2(factor) warmup iters: 5 iters: 20 validation: 0
#
# Using devices
# Rank 0 Pid 8000 on managed-worker-l83z device 0 [0x00] Tesla V100-SXM2-16GB
# Rank 1 Pid 8129 on managed-worker-jbk7 device 0 [0x00] Tesla V100-SXM2-16GB
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.52<0>
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-l83z:8000:8000 [0] NCCL INFO NET/IB : No device found.
NCCL version 2.4.2+cuda10.0
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/Socket : Using [0]ens12:10.73.0.15<0>
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so).
managed-worker-jbk7:8129:8129 [0] NCCL INFO NET/IB : No device found.
managed-worker-l83z:8000:8006 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-l83z:8000:8006 [0] NCCL INFO comm 0x7fcad4002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Setting affinity for GPU 0 to 010000,00000001
managed-worker-jbk7:8129:8134 [0] NCCL INFO comm 0x7fcad0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0
managed-worker-l83z:8000:8006 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-jbk7:8129:8134 [0] NCCL INFO CUDA Dev 0[0], Socket NIC distance : PHB
managed-worker-l83z:8000:8006 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 8.
managed-worker-l83z:8000:8006 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 8.
managed-worker-l83z:8000:8006 [0] NCCL INFO Duplicating rings to 8 per user request.
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 00 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 01 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 02 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 03 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 04 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 05 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 06 : 0 1
managed-worker-l83z:8000:8006 [0] NCCL INFO Channel 07 : 0 1
managed-worker-jbk7:8129:8134 [0] NCCL INFO NCCL_MAX_NRINGS set by environment to 8.
managed-worker-jbk7:8129:8134 [0] NCCL INFO NCCL_MIN_NRINGS set by environment to 8.
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 00 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 00 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 00 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 00 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 01 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 01 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 01 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 01 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 02 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 02 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 02 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 02 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 03 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 03 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 03 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 03 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 04 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 04 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 04 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 04 : 0 -> 1 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 05 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 05 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 05 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 05 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 06 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 06 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 06 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 06 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 07 : 1 -> 0 [receive] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 07 : 0 -> 1 [receive] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Ring 07 : 0 -> 1 [send] via NET/Socket/0
managed-worker-jbk7:8129:8134 [0] NCCL INFO Ring 07 : 1 -> 0 [send] via NET/Socket/0
managed-worker-l83z:8000:8006 [0] NCCL INFO Using 256 threads, Min Comp Cap 7, Trees disabled
managed-worker-l83z:8000:8006 [0] NCCL INFO comm 0x7fcad4002560 rank 0 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
#
# out-of-place in-place
# size count type redop time algbw busbw error time algbw busbw error
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)
managed-worker-l83z:8000:8000 [0] NCCL INFO Launch mode Parallel
managed-worker-jbk7:8129:8134 [0] NCCL INFO comm 0x7fcad0002560 rank 1 nranks 2 cudaDev 0 nvmlDev 0 - Init COMPLETE
1073741824 268435456 float sum 742440 1.45 1.45 N/A 741982 1.45 1.45 N/A
managed-worker-l83z:8000:8000 [0] NCCL INFO Destroyed comm 0x7fcad4002560 rank 0
managed-worker-jbk7:8129:8129 [0] NCCL INFO Destroyed comm 0x7fcad0002560 rank 1
# Out of bounds values : 0 OK
# Avg bus bandwidth : 1.44668
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment