Skip to content

Instantly share code, notes, and snippets.

@surak
Created May 30, 2023 14:05
Show Gist options
  • Save surak/5f3f236616e5db48f19d31df457b4350 to your computer and use it in GitHub Desktop.
Save surak/5f3f236616e5db48f19d31df457b4350 to your computer and use it in GitHub Desktop.
+ '[' -z '' ']'
+ case "$-" in
+ __lmod_vx=x
+ '[' -n x ']'
+ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for this output (/p/software/juwelsbooster/lmod/8.7.12/init/bash)
Shell debugging restarted
+ unset __lmod_vx
+ export SRUN_CPUS_PER_TASK=48
+ SRUN_CPUS_PER_TASK=48
++ scontrol show hostnames 'jwb[0038,0061]'
++ head -n 1
+ MASTER_ADDR=jwb0038
+ MASTER_ADDR=jwb0038i
++ nslookup jwb0038i
++ grep -oP '(?<=Address: ).*'
+ export MASTER_ADDR=10.13.23.40
+ MASTER_ADDR=10.13.23.40
+ export MASTER_PORT=7010
+ MASTER_PORT=7010
+ export GPUS_PER_NODE=4
+ GPUS_PER_NODE=4
+ export NNODES=2
+ NNODES=2
+ export CUDA_LAUNCH_BLOCKING=1
+ CUDA_LAUNCH_BLOCKING=1
+ export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
+ TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
+ export NCCL_ASYNC_ERROR_HANDLING=1
+ NCCL_ASYNC_ERROR_HANDLING=1
+ export NCCL_IB_TIMEOUT=20
+ NCCL_IB_TIMEOUT=20
+ cd /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src
+ source sc_venv_template/activate.sh
++ SOURCE_PATH=sc_venv_template/activate.sh
+++ dirname sc_venv_template/activate.sh
++ RELATIVE_PATH=sc_venv_template
+++ realpath sc_venv_template
++ ABSOLUTE_PATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template
++ [[ /var/spool/parastation/jobs/7791752 != \s\c\_\v\e\n\v\_\t\e\m\p\l\a\t\e\/\a\c\t\i\v\a\t\e\.\s\h ]]
++ echo 'The activation script must be sourced, otherwise the virtual environment will not work.'
++ source /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/config.sh
+++ SOURCE_PATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/config.sh
+++ [[ /var/spool/parastation/jobs/7791752 != \/\p\/\p\r\o\j\e\c\t\/\c\c\s\t\a\o\/\c\s\t\a\o\0\5\/\2\0\2\3\-\m\a\y\-\i\n\t\r\o\-\t\o\-\s\u\p\e\r\c\o\m\p\t\i\n\g\-\j\s\c\/\s\r\c\/\s\c\_\v\e\n\v\_\t\e\m\p\l\a\t\e\/\c\o\n\f\i\g\.\s\h ]]
+++ echo 'Setting vars'
++++ dirname /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/config.sh
+++ RELATIVE_PATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template
++++ realpath /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template
+++ ABSOLUTE_PATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template
++++ basename /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template
+++ export ENV_NAME=sc_venv_template
+++ ENV_NAME=sc_venv_template
+++ export ENV_DIR=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv
+++ ENV_DIR=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv
++ source /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/modules.sh
+++ module purge
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
The following modules were not unloaded:
(Use "module --force purge" to unload all):
1) Stages/2023
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load Stages/2023
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load GCC OpenMPI
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load mpi4py numba tqdm OpenCV matplotlib IPython SciPy-Stack bokeh git
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load Flask Seaborn
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load PyQuil
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ module load PyTorch scikit-learn torchvision PyTorch-Lightning
+++ '[' -z '' ']'
+++ case "$-" in
+++ __lmod_sh_dbg=x
+++ '[' -n x ']'
+++ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for Lmod's output
Shell debugging restarted
+++ unset __lmod_sh_dbg
+++ return 0
+++ echo /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv/lib/python3.10/site-packages
++ export PYTHONPATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyTorch-Lightning/1.8.2-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/tensorboard/2.11.2-foss-2022a/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/torchvision/0.13.1-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/scikit-learn/1.1.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyTorch/1.12.0-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/expecttest/0.1.3-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/protobuf-python/3.19.4-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyQuil/3.3.3-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/networkx/2.8.4-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Seaborn/0.12.1-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Flask/2.2.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/bokeh/2.4.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/xarray/2022.9.0-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/dask/2022.12.0-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/typing-extensions/4.3.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyYAML/6.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/h5py/3.7.0-GCCcore-11.3.0-serial/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/netcdf4-python/1.6.1-GCCcore-11.3.0-serial/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/sympy/1.11.1-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/gmpy2/2.1.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/IPython/8.5.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/BeautifulSoup/4.10.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/lxml/4.9.1-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/matplotlib/3.5.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Pillow-SIMD/9.2.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Tkinter/3.10.4-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/OpenCV/4.7.0-gcccoremkl-11.3.0-2022.1.0-CUDA-11.7-contrib/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/tqdm/4.64.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/numba/0.56.4-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/SciPy-bundle/2022.05-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/pybind11/2.9.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/mpi4py/3.1.4-gompi-2022a/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Python/3.10.4-GCCcore-11.3.0/easybuild/python
++ PYTHONPATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyTorch-Lightning/1.8.2-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/tensorboard/2.11.2-foss-2022a/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/torchvision/0.13.1-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/scikit-learn/1.1.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyTorch/1.12.0-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/expecttest/0.1.3-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/protobuf-python/3.19.4-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyQuil/3.3.3-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/networkx/2.8.4-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Seaborn/0.12.1-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Flask/2.2.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/bokeh/2.4.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/xarray/2022.9.0-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/dask/2022.12.0-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/typing-extensions/4.3.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/PyYAML/6.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/h5py/3.7.0-GCCcore-11.3.0-serial/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/netcdf4-python/1.6.1-GCCcore-11.3.0-serial/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/sympy/1.11.1-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/gmpy2/2.1.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/IPython/8.5.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/BeautifulSoup/4.10.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/lxml/4.9.1-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/matplotlib/3.5.2-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Pillow-SIMD/9.2.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Tkinter/3.10.4-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/OpenCV/4.7.0-gcccoremkl-11.3.0-2022.1.0-CUDA-11.7-contrib/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/tqdm/4.64.0-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/numba/0.56.4-foss-2022a-CUDA-11.7/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/SciPy-bundle/2022.05-gcccoremkl-11.3.0-2022.1.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/pybind11/2.9.2-GCCcore-11.3.0/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/mpi4py/3.1.4-gompi-2022a/lib/python3.10/site-packages:/p/software/juwelsbooster/stages/2023/software/Python/3.10.4-GCCcore-11.3.0/easybuild/python
++ source /p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv/bin/activate
+++ deactivate nondestructive
+++ '[' -n '' ']'
+++ '[' -n '' ']'
+++ '[' -n /bin/bash -o -n '' ']'
+++ hash -r
+++ '[' -n '' ']'
+++ unset VIRTUAL_ENV
+++ unset VIRTUAL_ENV_PROMPT
+++ '[' '!' nondestructive = nondestructive ']'
+++ VIRTUAL_ENV=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv
+++ export VIRTUAL_ENV
+++ _OLD_VIRTUAL_PATH=/p/software/juwelsbooster/stages/2023/software/tensorboard/2.11.2-foss-2022a/bin:/p/software/juwelsbooster/stages/2023/software/PyTorch/1.12.0-foss-2022a-CUDA-11.7/bin:/p/software/juwelsbooster/stages/2023/software/Ninja/1.10.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PyQuil/3.3.3-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/Flask/2.2.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/git/2.36.0-GCCcore-11.3.0-nodocs/bin:/p/software/juwelsbooster/stages/2023/software/BioPerl/1.7.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Perl/5.34.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/DB/18.1.40-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/bokeh/2.4.2-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/dask/2022.12.0-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/netcdf4-python/1.6.1-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/netCDF/4.9.0-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/cURL/7.83.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/sympy/1.11.1-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/IPython/8.5.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libxslt/1.1.34-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ZeroMQ/4.3.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/matplotlib/3.5.2-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/Qhull/2020.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Tk/8.6.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenCV/4.7.0-gcccoremkl-11.3.0-2022.1.0-CUDA-11.7-contrib/bin:/p/software/juwelsbooster/stages/2023/software/protobuf/3.19.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/HDF5/1.12.2-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/ant/1.10.13-Java-11:/p/software/juwelsbooster/stages/2023/software/ant/1.10.13-Java-11/bin:/p/software/juwelsbooster/stages/2023/software/Java/11.0.16:/p/software/juwelsbooster/stages/2023/software/Java/11.0.16/bin:/p/software/juwelsbooster/stages/2023/software/JasPer/2.0.33-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenEXR/3.1.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libwebp/1.2.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/giflib/5.2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenJPEG/2.5.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GStreamer/1.20.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/elfutils/0.187-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libarchive/3.6.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GTK+/3.24.34-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/librsvg/2.55.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GObject-Introspection/1.72.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenGL/2022a-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/nettle/3.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Pango/1.50.7-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/HarfBuzz/4.2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ICU/71.1-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/ICU/71.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/cairo/1.17.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Gdk-Pixbuf/2.42.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/LibTIFF/4.3.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libdeflate/1.10-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/zstd/1.5.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/lz4/1.9.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/gzip/1.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/jbigkit/2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libjpeg-turbo/2.1.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/DBus/1.14.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GLib/2.72.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PCRE/8.45-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/gettext/0.21-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GSL/2.7-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FFmpeg/4.4.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FriBidi/1.0.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/X11/20220504-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/fontconfig/2.14.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/util-linux/2.38-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/util-linux/2.38-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/freetype/2.12.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Brotli/1.0.9-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libpng/1.6.37-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/expat/2.4.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libvpx/1.12.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/x265/3.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/LAME/3.100-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/x264/20220620-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/NASM/2.15.05-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/tqdm/4.64.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/numba/0.56.4-foss-2022a-CUDA-11.7/bin:/p/software/juwelsbooster/stages/2023/software/LLVM/14.0.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/SciPy-bundle/2022.05-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/pybind11/2.9.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FFTW/3.3.10-gompi-2022a/bin:/p/software/juwelsbooster/stages/2023/software/FlexiBLAS/3.2.0-GCC-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Python/3.10.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/SQLite/3.38.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Tcl/8.6.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ncurses/6.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/bzip2/1.0.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenMPI/4.1.4-GCC-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/UCC/default-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PMIx/3.2.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libevent/2.1.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenSSL/1.1/bin:/p/software/juwelsbooster/stages/2023/software/UCX/default-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/CUDA/11.7/nvvm/bin:/p/software/juwelsbooster/stages/2023/software/CUDA/11.7/bin:/p/software/juwelsbooster/stages/2023/software/nvidia-driver/default:/p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/bin:/p/software/juwelsbooster/stages/2023/software/hwloc/2.7.1-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/hwloc/2.7.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libxml2/2.9.13-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/XZ/5.2.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/numactl/2.0.15-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/binutils/2.38-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GCCcore/11.3.0/bin:/p/project/ccstao/cstao05/bin:/p/project/ccstao/cstao05/.local/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/ddn/ime/bin:/opt/jsc/bin:/usr/local/jsc/bin:/opt/parastation/bin:/p/software/juwelsbooster/bin
+++ PATH=/p/project/ccstao/cstao05/2023-may-intro-to-supercompting-jsc/src/sc_venv_template/venv/bin:/p/software/juwelsbooster/stages/2023/software/tensorboard/2.11.2-foss-2022a/bin:/p/software/juwelsbooster/stages/2023/software/PyTorch/1.12.0-foss-2022a-CUDA-11.7/bin:/p/software/juwelsbooster/stages/2023/software/Ninja/1.10.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PyQuil/3.3.3-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/Flask/2.2.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/git/2.36.0-GCCcore-11.3.0-nodocs/bin:/p/software/juwelsbooster/stages/2023/software/BioPerl/1.7.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Perl/5.34.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/DB/18.1.40-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/bokeh/2.4.2-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/dask/2022.12.0-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/netcdf4-python/1.6.1-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/netCDF/4.9.0-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/cURL/7.83.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/sympy/1.11.1-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/IPython/8.5.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libxslt/1.1.34-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ZeroMQ/4.3.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/matplotlib/3.5.2-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/Qhull/2020.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Tk/8.6.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenCV/4.7.0-gcccoremkl-11.3.0-2022.1.0-CUDA-11.7-contrib/bin:/p/software/juwelsbooster/stages/2023/software/protobuf/3.19.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/HDF5/1.12.2-GCCcore-11.3.0-serial/bin:/p/software/juwelsbooster/stages/2023/software/ant/1.10.13-Java-11:/p/software/juwelsbooster/stages/2023/software/ant/1.10.13-Java-11/bin:/p/software/juwelsbooster/stages/2023/software/Java/11.0.16:/p/software/juwelsbooster/stages/2023/software/Java/11.0.16/bin:/p/software/juwelsbooster/stages/2023/software/JasPer/2.0.33-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenEXR/3.1.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libwebp/1.2.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/giflib/5.2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenJPEG/2.5.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GStreamer/1.20.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/elfutils/0.187-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libarchive/3.6.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GTK+/3.24.34-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/librsvg/2.55.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GObject-Introspection/1.72.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenGL/2022a-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/nettle/3.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Pango/1.50.7-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/HarfBuzz/4.2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ICU/71.1-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/ICU/71.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/cairo/1.17.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Gdk-Pixbuf/2.42.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/LibTIFF/4.3.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libdeflate/1.10-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/zstd/1.5.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/lz4/1.9.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/gzip/1.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/jbigkit/2.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libjpeg-turbo/2.1.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/DBus/1.14.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GLib/2.72.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PCRE/8.45-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/gettext/0.21-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GSL/2.7-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FFmpeg/4.4.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FriBidi/1.0.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/X11/20220504-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/fontconfig/2.14.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/util-linux/2.38-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/util-linux/2.38-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/freetype/2.12.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Brotli/1.0.9-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libpng/1.6.37-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/expat/2.4.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libvpx/1.12.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/x265/3.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/LAME/3.100-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/x264/20220620-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/NASM/2.15.05-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/tqdm/4.64.0-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/numba/0.56.4-foss-2022a-CUDA-11.7/bin:/p/software/juwelsbooster/stages/2023/software/LLVM/14.0.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/SciPy-bundle/2022.05-gcccoremkl-11.3.0-2022.1.0/bin:/p/software/juwelsbooster/stages/2023/software/pybind11/2.9.2-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/FFTW/3.3.10-gompi-2022a/bin:/p/software/juwelsbooster/stages/2023/software/FlexiBLAS/3.2.0-GCC-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Python/3.10.4-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/SQLite/3.38.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/Tcl/8.6.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/ncurses/6.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/bzip2/1.0.8-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenMPI/4.1.4-GCC-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/UCC/default-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/PMIx/3.2.3-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libevent/2.1.12-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/OpenSSL/1.1/bin:/p/software/juwelsbooster/stages/2023/software/UCX/default-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/CUDA/11.7/nvvm/bin:/p/software/juwelsbooster/stages/2023/software/CUDA/11.7/bin:/p/software/juwelsbooster/stages/2023/software/nvidia-driver/default:/p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/bin:/p/software/juwelsbooster/stages/2023/software/hwloc/2.7.1-GCCcore-11.3.0/sbin:/p/software/juwelsbooster/stages/2023/software/hwloc/2.7.1-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/libxml2/2.9.13-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/XZ/5.2.5-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/numactl/2.0.15-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/binutils/2.38-GCCcore-11.3.0/bin:/p/software/juwelsbooster/stages/2023/software/GCCcore/11.3.0/bin:/p/project/ccstao/cstao05/bin:/p/project/ccstao/cstao05/.local/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/ddn/ime/bin:/opt/jsc/bin:/usr/local/jsc/bin:/opt/parastation/bin:/p/software/juwelsbooster/bin
+++ export PATH
+++ '[' -n '' ']'
+++ '[' -z '' ']'
+++ _OLD_VIRTUAL_PS1=
+++ PS1='(sc_venv_template) '
+++ export PS1
+++ VIRTUAL_ENV_PROMPT='(sc_venv_template) '
+++ export VIRTUAL_ENV_PROMPT
+++ '[' -n /bin/bash -o -n '' ']'
+++ hash -r
+ export NCCL_DEBUG=INFO NCCL_DEBUG_SUBSYS=ALL
+ NCCL_DEBUG=INFO
+ NCCL_DEBUG_SUBSYS=ALL
+ export LOGLEVEL=INFO
+ LOGLEVEL=INFO
+ srun bash -c 'accelerate launch \
--main_process_ip $MASTER_ADDR \
--main_process_port $MASTER_PORT \
--multi_gpu \
--mixed_precision=no \
--num_processes=$(($NNODES * 4)) \
--dynamo_backend=no \
--num_machines=$NNODES \
--machine_rank=$SLURM_PROCID \
distrib.py'
INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:
entrypoint : distrib.py
min_nodes : 2
max_nodes : 2
nproc_per_node : 4
run_id : none
rdzv_backend : static
rdzv_endpoint : 10.13.23.40:7010
rdzv_configs : {'rank': 1, 'timeout': 900}
max_restarts : 0
monitor_interval : 5
log_dir : None
metrics_cfg : {}
INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:
entrypoint : distrib.py
min_nodes : 2
max_nodes : 2
nproc_per_node : 4
run_id : none
rdzv_backend : static
rdzv_endpoint : 10.13.23.40:7010
rdzv_configs : {'rank': 0, 'timeout': 900}
max_restarts : 0
monitor_interval : 5
log_dir : None
metrics_cfg : {}
INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_twsenhke/none_3_6rk4ef
INFO:torch.distributed.elastic.agent.server.api:[default] starting workers for entrypoint: python
INFO:torch.distributed.elastic.agent.server.api:[default] Rendezvous'ing worker group
[W socket.cpp:401] [c10d] The server socket cannot be initialized on [::]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
INFO:torch.distributed.elastic.agent.server.api:[default] Rendezvous complete for workers. Result:
restart_count=0
master_addr=10.13.23.40
master_port=7010
group_rank=0
group_world_size=2
local_ranks=[0, 1, 2, 3]
role_ranks=[0, 1, 2, 3]
global_ranks=[0, 1, 2, 3]
role_world_sizes=[8, 8, 8, 8]
global_world_sizes=[8, 8, 8, 8]
INFO:torch.distributed.elastic.agent.server.api:[default] Starting worker group
INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_twsenhke/none_3_6rk4ef/attempt_0/0/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker1 reply file to: /tmp/torchelastic_twsenhke/none_3_6rk4ef/attempt_0/1/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker2 reply file to: /tmp/torchelastic_twsenhke/none_3_6rk4ef/attempt_0/2/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker3 reply file to: /tmp/torchelastic_twsenhke/none_3_6rk4ef/attempt_0/3/error.json
INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_h_dj1caf/none__czscsxo
INFO:torch.distributed.elastic.agent.server.api:[default] starting workers for entrypoint: python
INFO:torch.distributed.elastic.agent.server.api:[default] Rendezvous'ing worker group
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
INFO:torch.distributed.elastic.agent.server.api:[default] Rendezvous complete for workers. Result:
restart_count=0
master_addr=10.13.23.40
master_port=7010
group_rank=1
group_world_size=2
local_ranks=[0, 1, 2, 3]
role_ranks=[4, 5, 6, 7]
global_ranks=[4, 5, 6, 7]
role_world_sizes=[8, 8, 8, 8]
global_world_sizes=[8, 8, 8, 8]
INFO:torch.distributed.elastic.agent.server.api:[default] Starting worker group
INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_h_dj1caf/none__czscsxo/attempt_0/0/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker1 reply file to: /tmp/torchelastic_h_dj1caf/none__czscsxo/attempt_0/1/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker2 reply file to: /tmp/torchelastic_h_dj1caf/none__czscsxo/attempt_0/2/error.json
INFO:torch.distributed.elastic.multiprocessing:Setting worker3 reply file to: /tmp/torchelastic_h_dj1caf/none__czscsxo/attempt_0/3/error.json
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
[W socket.cpp:558] [c10d] The client socket cannot be initialized to connect to [jwb0038i.juwels]:7010 (errno: 97 - Address family not supported by protocol).
The activation script must be sourced, otherwise the virtual environment will not work.
Setting vars
jwb0038:16067:16067 [0] NCCL INFO Bootstrap : Using ib0:10.13.23.40<0>
jwb0038:16067:16067 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0038:16067:16067 [0] NCCL INFO cudaDriverVersion 12000
NCCL version 2.15.1+cuda11.7
jwb0038:16067:16067 [0] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x151a74600000
jwb0038:16070:16070 [3] NCCL INFO cudaDriverVersion 12000
jwb0038:16068:16068 [1] NCCL INFO cudaDriverVersion 12000
jwb0038:16069:16069 [2] NCCL INFO cudaDriverVersion 12000
jwb0061:16371:16371 [3] NCCL INFO cudaDriverVersion 12000
jwb0061:16369:16369 [1] NCCL INFO cudaDriverVersion 12000
jwb0061:16370:16370 [2] NCCL INFO cudaDriverVersion 12000
jwb0061:16368:16368 [0] NCCL INFO cudaDriverVersion 12000
jwb0038:16067:16132 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.40<0>
jwb0038:16067:16132 [0] NCCL INFO Using network IB
jwb0038:16070:16070 [3] NCCL INFO Bootstrap : Using ib0:10.13.23.40<0>
jwb0038:16070:16070 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0038:16070:16070 [3] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x148ae6600000
jwb0038:16068:16068 [1] NCCL INFO Bootstrap : Using ib0:10.13.23.40<0>
jwb0038:16069:16069 [2] NCCL INFO Bootstrap : Using ib0:10.13.23.40<0>
jwb0038:16068:16068 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0038:16069:16069 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0038:16068:16068 [1] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x14f22e600000
jwb0038:16069:16069 [2] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x14660c600000
jwb0038:16068:16138 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.40<0>
jwb0038:16068:16138 [1] NCCL INFO Using network IB
jwb0038:16069:16139 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.40<0>
jwb0038:16069:16139 [2] NCCL INFO Using network IB
jwb0061:16368:16368 [0] NCCL INFO Bootstrap : Using ib0:10.13.23.55<0>
jwb0061:16371:16371 [3] NCCL INFO Bootstrap : Using ib0:10.13.23.55<0>
jwb0061:16370:16370 [2] NCCL INFO Bootstrap : Using ib0:10.13.23.55<0>
jwb0061:16369:16369 [1] NCCL INFO Bootstrap : Using ib0:10.13.23.55<0>
jwb0061:16368:16368 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0061:16368:16368 [0] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x145c42600000
jwb0061:16371:16371 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0061:16370:16370 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0061:16369:16369 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
jwb0061:16371:16371 [3] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x15320e600000
jwb0061:16370:16370 [2] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x14ca60600000
jwb0061:16369:16369 [1] NCCL INFO init.cc:1147 Cuda Host Alloc Size 4 pointer 0x14ce1c600000
jwb0038:16070:16137 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.40<0>
jwb0038:16070:16137 [3] NCCL INFO Using network IB
jwb0061:16371:16434 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.55<0>
jwb0061:16371:16434 [3] NCCL INFO Using network IB
jwb0061:16369:16436 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.55<0>
jwb0061:16369:16436 [1] NCCL INFO Using network IB
jwb0061:16368:16432 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.55<0>
jwb0061:16368:16432 [0] NCCL INFO Using network IB
jwb0061:16370:16435 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [1]mlx5_1:1/IB [2]mlx5_2:1/IB [3]mlx5_3:1/IB [RO]; OOB ib0:10.13.23.55<0>
jwb0061:16370:16435 [2] NCCL INFO Using network IB
jwb0061:16368:16432 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0061:16368:16432 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0061:16368:16432 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0061:16368:16432 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16067:16132 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0038:16067:16132 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0038:16067:16132 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16067:16132 [0] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16068:16138 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0038:16068:16138 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0038:16068:16138 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16068:16138 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0061:16368:16432 [0] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x145c43000000
jwb0038:16067:16132 [0] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x151a75000000
jwb0061:16371:16434 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0061:16371:16434 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0038:16068:16138 [1] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x14f22f000000
jwb0038:16069:16139 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0061:16371:16434 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16069:16139 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0061:16371:16434 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16069:16139 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16069:16139 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0038:16070:16137 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0038:16070:16137 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16068:16138 [1] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0038:16068:16138 [1] NCCL INFO CPU/3 (1/2/-1)
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - GPU/3000 (0)
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - NIC/4000
jwb0038:16068:16138 [1] NCCL INFO + NET[25.0] - NET/0 (dcf8bf0100380008/1/25.000000)
jwb0038:16068:16138 [1] NCCL INFO CPU/1 (1/2/-1)
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - GPU/44000 (1)
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - NIC/43000
jwb0038:16068:16138 [1] NCCL INFO + NET[25.0] - NET/1 (d8f8bf0100380008/1/25.000000)
jwb0038:16068:16138 [1] NCCL INFO CPU/7 (1/2/-1)
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - GPU/84000 (2)
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - NIC/83000
jwb0038:16068:16138 [1] NCCL INFO + NET[25.0] - NET/2 (d0f8bf0100380008/1/25.000000)
jwb0038:16068:16138 [1] NCCL INFO CPU/5 (1/2/-1)
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16068:16138 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - GPU/C4000 (3)
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16068:16138 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16068:16138 [1] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0038:16068:16138 [1] NCCL INFO + NET[25.0] - NET/3 (d4f8bf0100380008/1/25.000000)
jwb0038:16068:16138 [1] NCCL INFO ==========================================
jwb0038:16068:16138 [1] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16068:16138 [1] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16068:16138 [1] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0038:16068:16138 [1] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0038:16068:16138 [1] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16068:16138 [1] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16068:16138 [1] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0038:16068:16138 [1] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0038:16068:16138 [1] NCCL INFO Setting affinity for GPU 1 to 0fc00000,00000fc0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x14660d000000
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0061:16368:16432 [0] NCCL INFO CPU/3 (1/2/-1)
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - GPU/3000 (4)
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - NIC/4000
jwb0061:16368:16432 [0] NCCL INFO + NET[25.0] - NET/0 (c4fabf0100380008/1/25.000000)
jwb0061:16368:16432 [0] NCCL INFO CPU/1 (1/2/-1)
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - GPU/44000 (5)
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - NIC/43000
jwb0061:16368:16432 [0] NCCL INFO + NET[25.0] - NET/1 (c0fabf0100380008/1/25.000000)
jwb0061:16368:16432 [0] NCCL INFO CPU/7 (1/2/-1)
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - GPU/84000 (6)
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - NIC/83000
jwb0061:16368:16432 [0] NCCL INFO + NET[25.0] - NET/2 (2092c00100380008/1/25.000000)
jwb0061:16368:16432 [0] NCCL INFO CPU/5 (1/2/-1)
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16368:16432 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - GPU/C4000 (7)
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16368:16432 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16368:16432 [0] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0061:16368:16432 [0] NCCL INFO + NET[25.0] - NET/3 (2492c00100380008/1/25.000000)
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO ==========================================
jwb0061:16368:16432 [0] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16368:16432 [0] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16368:16432 [0] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0061:16368:16432 [0] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16368:16432 [0] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16368:16432 [0] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0061:16368:16432 [0] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0061:16368:16432 [0] NCCL INFO Setting affinity for GPU 0 to fc,00000000,00fc0000
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16069:16139 [2] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0038:16069:16139 [2] NCCL INFO CPU/3 (1/2/-1)
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - GPU/3000 (0)
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - NIC/4000
jwb0038:16069:16139 [2] NCCL INFO + NET[25.0] - NET/0 (dcf8bf0100380008/1/25.000000)
jwb0038:16069:16139 [2] NCCL INFO CPU/1 (1/2/-1)
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - GPU/44000 (1)
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - NIC/43000
jwb0038:16069:16139 [2] NCCL INFO + NET[25.0] - NET/1 (d8f8bf0100380008/1/25.000000)
jwb0038:16069:16139 [2] NCCL INFO CPU/7 (1/2/-1)
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - GPU/84000 (2)
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - NIC/83000
jwb0038:16069:16139 [2] NCCL INFO + NET[25.0] - NET/2 (d0f8bf0100380008/1/25.000000)
jwb0038:16069:16139 [2] NCCL INFO CPU/5 (1/2/-1)
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16069:16139 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0061:16370:16435 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - GPU/C4000 (3)
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16069:16139 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16069:16139 [2] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0038:16069:16139 [2] NCCL INFO + NET[25.0] - NET/3 (d4f8bf0100380008/1/25.000000)
jwb0038:16069:16139 [2] NCCL INFO ==========================================
jwb0038:16069:16139 [2] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16069:16139 [2] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16069:16139 [2] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0038:16069:16139 [2] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0038:16069:16139 [2] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16069:16139 [2] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16069:16139 [2] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0038:16069:16139 [2] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0038:16069:16139 [2] NCCL INFO Setting affinity for GPU 2 to fc000000,0000fc00,00000000
jwb0061:16370:16435 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0038:16070:16137 [3] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x148ae7000000
jwb0061:16370:16435 [2] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16137 [3] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0038:16070:16137 [3] NCCL INFO CPU/3 (1/2/-1)
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - GPU/3000 (0)
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - NIC/4000
jwb0038:16070:16137 [3] NCCL INFO + NET[25.0] - NET/0 (dcf8bf0100380008/1/25.000000)
jwb0038:16070:16137 [3] NCCL INFO CPU/1 (1/2/-1)
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - GPU/44000 (1)
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - NIC/43000
jwb0038:16070:16137 [3] NCCL INFO + NET[25.0] - NET/1 (d8f8bf0100380008/1/25.000000)
jwb0038:16070:16137 [3] NCCL INFO CPU/7 (1/2/-1)
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - GPU/84000 (2)
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - NIC/83000
jwb0038:16070:16137 [3] NCCL INFO + NET[25.0] - NET/2 (d0f8bf0100380008/1/25.000000)
jwb0038:16070:16137 [3] NCCL INFO CPU/5 (1/2/-1)
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16070:16137 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - GPU/C4000 (3)
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16070:16137 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16070:16137 [3] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0038:16070:16137 [3] NCCL INFO + NET[25.0] - NET/3 (d4f8bf0100380008/1/25.000000)
jwb0038:16070:16137 [3] NCCL INFO ==========================================
jwb0038:16070:16137 [3] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16070:16137 [3] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16070:16137 [3] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0038:16070:16137 [3] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0038:16070:16137 [3] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16070:16137 [3] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16070:16137 [3] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0038:16070:16137 [3] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0038:16070:16137 [3] NCCL INFO Setting affinity for GPU 3 to 0fc000,0000000f,c0000000
jwb0061:16371:16434 [3] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x15320f000000
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0038:16067:16132 [0] NCCL INFO CPU/3 (1/2/-1)
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - GPU/3000 (0)
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - NIC/4000
jwb0038:16067:16132 [0] NCCL INFO + NET[25.0] - NET/0 (dcf8bf0100380008/1/25.000000)
jwb0038:16067:16132 [0] NCCL INFO CPU/1 (1/2/-1)
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - GPU/44000 (1)
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - NIC/43000
jwb0038:16067:16132 [0] NCCL INFO + NET[25.0] - NET/1 (d8f8bf0100380008/1/25.000000)
jwb0038:16067:16132 [0] NCCL INFO CPU/7 (1/2/-1)
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/5
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - GPU/84000 (2)
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - NIC/83000
jwb0038:16067:16132 [0] NCCL INFO + NET[25.0] - NET/2 (d0f8bf0100380008/1/25.000000)
jwb0038:16067:16132 [0] NCCL INFO CPU/5 (1/2/-1)
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/3
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/1
jwb0038:16067:16132 [0] NCCL INFO + SYS[5000.0] - CPU/7
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - GPU/C4000 (3)
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/84000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/44000
jwb0038:16067:16132 [0] NCCL INFO + NVL[88.0] - GPU/3000
jwb0038:16067:16132 [0] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0038:16067:16132 [0] NCCL INFO + NET[25.0] - NET/3 (d4f8bf0100380008/1/25.000000)
jwb0038:16067:16132 [0] NCCL INFO ==========================================
jwb0038:16067:16132 [0] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16067:16132 [0] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0038:16067:16132 [0] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0038:16067:16132 [0] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0038:16067:16132 [0] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16067:16132 [0] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0038:16067:16132 [0] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0038:16067:16132 [0] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0038:16067:16132 [0] NCCL INFO Setting affinity for GPU 0 to fc,00000000,00fc0000
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x14ca61000000
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0061:16371:16434 [3] NCCL INFO CPU/3 (1/2/-1)
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - GPU/3000 (4)
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - NIC/4000
jwb0061:16371:16434 [3] NCCL INFO + NET[25.0] - NET/0 (c4fabf0100380008/1/25.000000)
jwb0061:16371:16434 [3] NCCL INFO CPU/1 (1/2/-1)
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - GPU/44000 (5)
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - NIC/43000
jwb0061:16371:16434 [3] NCCL INFO + NET[25.0] - NET/1 (c0fabf0100380008/1/25.000000)
jwb0061:16371:16434 [3] NCCL INFO CPU/7 (1/2/-1)
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - GPU/84000 (6)
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - NIC/83000
jwb0061:16371:16434 [3] NCCL INFO + NET[25.0] - NET/2 (2092c00100380008/1/25.000000)
jwb0061:16371:16434 [3] NCCL INFO CPU/5 (1/2/-1)
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16371:16434 [3] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - GPU/C4000 (7)
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16371:16434 [3] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16371:16434 [3] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0061:16371:16434 [3] NCCL INFO + NET[25.0] - NET/3 (2492c00100380008/1/25.000000)
jwb0061:16371:16434 [3] NCCL INFO ==========================================
jwb0061:16371:16434 [3] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16371:16434 [3] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16371:16434 [3] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0061:16371:16434 [3] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0061:16371:16434 [3] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16371:16434 [3] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16371:16434 [3] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0061:16371:16434 [3] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0061:16371:16434 [3] NCCL INFO Setting affinity for GPU 3 to 0fc000,0000000f,c0000000
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16370:16435 [2] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0061:16370:16435 [2] NCCL INFO CPU/3 (1/2/-1)
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - GPU/3000 (4)
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - NIC/4000
jwb0061:16370:16435 [2] NCCL INFO + NET[25.0] - NET/0 (c4fabf0100380008/1/25.000000)
jwb0061:16370:16435 [2] NCCL INFO CPU/1 (1/2/-1)
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - GPU/44000 (5)
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - NIC/43000
jwb0061:16370:16435 [2] NCCL INFO + NET[25.0] - NET/1 (c0fabf0100380008/1/25.000000)
jwb0061:16370:16435 [2] NCCL INFO CPU/7 (1/2/-1)
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - GPU/84000 (6)
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - NIC/83000
jwb0061:16370:16435 [2] NCCL INFO + NET[25.0] - NET/2 (2092c00100380008/1/25.000000)
jwb0061:16370:16435 [2] NCCL INFO CPU/5 (1/2/-1)
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16370:16435 [2] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - GPU/C4000 (7)
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16370:16435 [2] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16370:16435 [2] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0061:16370:16435 [2] NCCL INFO + NET[25.0] - NET/3 (2492c00100380008/1/25.000000)
jwb0061:16370:16435 [2] NCCL INFO ==========================================
jwb0061:16370:16435 [2] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16370:16435 [2] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16370:16435 [2] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0061:16370:16435 [2] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0061:16370:16435 [2] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16370:16435 [2] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16370:16435 [2] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0061:16370:16435 [2] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0061:16370:16435 [2] NCCL INFO Setting affinity for GPU 2 to fc000000,0000fc00,00000000
jwb0061:16369:16436 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 0 'mlx5_0'
jwb0061:16369:16436 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 1 'mlx5_1'
jwb0061:16369:16436 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 2 'mlx5_2'
jwb0061:16369:16436 [1] NCCL INFO NET/IB : GPU Direct RDMA Enabled for HCA 3 'mlx5_3'
jwb0061:16369:16436 [1] NCCL INFO transport/p2p.cc:151 Cuda Alloc Size 2097152 pointer 0x14ce1d000000
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO === System : maxBw 24.0 totalBw 264.0 ===
jwb0061:16369:16436 [1] NCCL INFO CPU/3 (1/2/-1)
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - PCI/1000 (1000c0101000100b)
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - GPU/3000 (4)
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - NIC/4000
jwb0061:16369:16436 [1] NCCL INFO + NET[25.0] - NET/0 (c4fabf0100380008/1/25.000000)
jwb0061:16369:16436 [1] NCCL INFO CPU/1 (1/2/-1)
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - PCI/41000 (1000c0101000100b)
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - GPU/44000 (5)
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - NIC/43000
jwb0061:16369:16436 [1] NCCL INFO + NET[25.0] - NET/1 (c0fabf0100380008/1/25.000000)
jwb0061:16369:16436 [1] NCCL INFO CPU/7 (1/2/-1)
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/5
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - PCI/81000 (1000c0101000100b)
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - GPU/84000 (6)
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/C4000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - NIC/83000
jwb0061:16369:16436 [1] NCCL INFO + NET[25.0] - NET/2 (2092c00100380008/1/25.000000)
jwb0061:16369:16436 [1] NCCL INFO CPU/5 (1/2/-1)
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/3
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/1
jwb0061:16369:16436 [1] NCCL INFO + SYS[5000.0] - CPU/7
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - PCI/C1000 (1000c0101000100b)
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - GPU/C4000 (7)
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/84000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/44000
jwb0061:16369:16436 [1] NCCL INFO + NVL[88.0] - GPU/3000
jwb0061:16369:16436 [1] NCCL INFO + PCI[24.0] - NIC/C3000
jwb0061:16369:16436 [1] NCCL INFO + NET[25.0] - NET/3 (2492c00100380008/1/25.000000)
jwb0061:16369:16436 [1] NCCL INFO ==========================================
jwb0061:16369:16436 [1] NCCL INFO GPU/3000 :GPU/3000 (0/5000.000000/LOC) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (2/24.000000/PHB) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (3/24.000000/PIX) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16369:16436 [1] NCCL INFO GPU/44000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (0/5000.000000/LOC) GPU/84000 (1/88.000000/NVL) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (2/24.000000/PHB) CPU/7 (3/24.000000/SYS) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (3/24.000000/PIX) NET/2 (4/24.000000/PXN) NET/3 (4/24.000000/PXN)
jwb0061:16369:16436 [1] NCCL INFO GPU/84000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (0/5000.000000/LOC) GPU/C4000 (1/88.000000/NVL) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (2/24.000000/PHB) CPU/5 (3/24.000000/SYS) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (3/24.000000/PIX) NET/3 (4/24.000000/PXN)
jwb0061:16369:16436 [1] NCCL INFO GPU/C4000 :GPU/3000 (1/88.000000/NVL) GPU/44000 (1/88.000000/NVL) GPU/84000 (1/88.000000/NVL) GPU/C4000 (0/5000.000000/LOC) CPU/3 (3/24.000000/SYS) CPU/1 (3/24.000000/SYS) CPU/7 (3/24.000000/SYS) CPU/5 (2/24.000000/PHB) NET/0 (4/24.000000/PXN) NET/1 (4/24.000000/PXN) NET/2 (4/24.000000/PXN) NET/3 (3/24.000000/PIX)
jwb0061:16369:16436 [1] NCCL INFO NET/0 :GPU/3000 (3/24.000000/PIX) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (3/24.000000/PHB) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (0/5000.000000/LOC) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16369:16436 [1] NCCL INFO NET/1 :GPU/3000 (6/24.000000/SYS) GPU/44000 (3/24.000000/PIX) GPU/84000 (6/24.000000/SYS) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (3/24.000000/PHB) CPU/7 (4/24.000000/SYS) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (0/5000.000000/LOC) NET/2 (7/24.000000/SYS) NET/3 (7/24.000000/SYS)
jwb0061:16369:16436 [1] NCCL INFO NET/2 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (3/24.000000/PIX) GPU/C4000 (6/24.000000/SYS) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (3/24.000000/PHB) CPU/5 (4/24.000000/SYS) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (0/5000.000000/LOC) NET/3 (7/24.000000/SYS)
jwb0061:16369:16436 [1] NCCL INFO NET/3 :GPU/3000 (6/24.000000/SYS) GPU/44000 (6/24.000000/SYS) GPU/84000 (6/24.000000/SYS) GPU/C4000 (3/24.000000/PIX) CPU/3 (4/24.000000/SYS) CPU/1 (4/24.000000/SYS) CPU/7 (4/24.000000/SYS) CPU/5 (3/24.000000/PHB) NET/0 (7/24.000000/SYS) NET/1 (7/24.000000/SYS) NET/2 (7/24.000000/SYS) NET/3 (0/5000.000000/LOC)
jwb0061:16369:16436 [1] NCCL INFO Setting affinity for GPU 1 to 0fc00000,00000fc0
jwb0061:16368:16432 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16368:16432 [0] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16368:16432 [0] NCCL INFO 1 : NET/1 GPU/5 GPU/4 GPU/7 GPU/6 NET/1
jwb0061:16368:16432 [0] NCCL INFO 2 : NET/2 GPU/6 GPU/5 GPU/4 GPU/7 NET/2
jwb0061:16368:16432 [0] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16368:16432 [0] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16368:16432 [0] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16368:16432 [0] NCCL INFO 1 : NET/1 GPU/5 GPU/7 GPU/4 GPU/6 NET/1
jwb0061:16368:16432 [0] NCCL INFO 2 : NET/2 GPU/6 GPU/4 GPU/7 GPU/5 NET/2
jwb0061:16368:16432 [0] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16368:16432 [0] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0038:16069:16139 [2] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16069:16139 [2] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16069:16139 [2] NCCL INFO 1 : NET/1 GPU/1 GPU/0 GPU/3 GPU/2 NET/1
jwb0038:16069:16139 [2] NCCL INFO 2 : NET/2 GPU/2 GPU/1 GPU/0 GPU/3 NET/2
jwb0038:16069:16139 [2] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16069:16139 [2] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16069:16139 [2] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16069:16139 [2] NCCL INFO 1 : NET/1 GPU/1 GPU/3 GPU/0 GPU/2 NET/1
jwb0038:16069:16139 [2] NCCL INFO 2 : NET/2 GPU/2 GPU/0 GPU/3 GPU/1 NET/2
jwb0038:16069:16139 [2] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16067:16132 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16067:16132 [0] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16067:16132 [0] NCCL INFO 1 : NET/1 GPU/1 GPU/0 GPU/3 GPU/2 NET/1
jwb0038:16067:16132 [0] NCCL INFO 2 : NET/2 GPU/2 GPU/1 GPU/0 GPU/3 NET/2
jwb0038:16067:16132 [0] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16069:16139 [2] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0038:16067:16132 [0] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16067:16132 [0] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16067:16132 [0] NCCL INFO 1 : NET/1 GPU/1 GPU/3 GPU/0 GPU/2 NET/1
jwb0038:16067:16132 [0] NCCL INFO 2 : NET/2 GPU/2 GPU/0 GPU/3 GPU/1 NET/2
jwb0038:16067:16132 [0] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16070:16137 [3] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16070:16137 [3] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16070:16137 [3] NCCL INFO 1 : NET/1 GPU/1 GPU/0 GPU/3 GPU/2 NET/1
jwb0038:16070:16137 [3] NCCL INFO 2 : NET/2 GPU/2 GPU/1 GPU/0 GPU/3 NET/2
jwb0038:16070:16137 [3] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16067:16132 [0] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0038:16070:16137 [3] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16070:16137 [3] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16070:16137 [3] NCCL INFO 1 : NET/1 GPU/1 GPU/3 GPU/0 GPU/2 NET/1
jwb0038:16070:16137 [3] NCCL INFO 2 : NET/2 GPU/2 GPU/0 GPU/3 GPU/1 NET/2
jwb0038:16070:16137 [3] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0061:16371:16434 [3] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16371:16434 [3] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16371:16434 [3] NCCL INFO 1 : NET/1 GPU/5 GPU/4 GPU/7 GPU/6 NET/1
jwb0061:16371:16434 [3] NCCL INFO 2 : NET/2 GPU/6 GPU/5 GPU/4 GPU/7 NET/2
jwb0061:16371:16434 [3] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16371:16434 [3] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16371:16434 [3] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16371:16434 [3] NCCL INFO 1 : NET/1 GPU/5 GPU/7 GPU/4 GPU/6 NET/1
jwb0061:16371:16434 [3] NCCL INFO 2 : NET/2 GPU/6 GPU/4 GPU/7 GPU/5 NET/2
jwb0061:16371:16434 [3] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0038:16070:16137 [3] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0061:16371:16434 [3] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0061:16370:16435 [2] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16370:16435 [2] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16370:16435 [2] NCCL INFO 1 : NET/1 GPU/5 GPU/4 GPU/7 GPU/6 NET/1
jwb0061:16370:16435 [2] NCCL INFO 2 : NET/2 GPU/6 GPU/5 GPU/4 GPU/7 NET/2
jwb0061:16370:16435 [2] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16370:16435 [2] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16370:16435 [2] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16370:16435 [2] NCCL INFO 1 : NET/1 GPU/5 GPU/7 GPU/4 GPU/6 NET/1
jwb0061:16370:16435 [2] NCCL INFO 2 : NET/2 GPU/6 GPU/4 GPU/7 GPU/5 NET/2
jwb0061:16370:16435 [2] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0038:16068:16138 [1] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16068:16138 [1] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16068:16138 [1] NCCL INFO 1 : NET/1 GPU/1 GPU/0 GPU/3 GPU/2 NET/1
jwb0038:16068:16138 [1] NCCL INFO 2 : NET/2 GPU/2 GPU/1 GPU/0 GPU/3 NET/2
jwb0038:16068:16138 [1] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0038:16068:16138 [1] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0038:16068:16138 [1] NCCL INFO 0 : NET/0 GPU/0 GPU/1 GPU/2 GPU/3 NET/0
jwb0038:16068:16138 [1] NCCL INFO 1 : NET/1 GPU/1 GPU/3 GPU/0 GPU/2 NET/1
jwb0038:16068:16138 [1] NCCL INFO 2 : NET/2 GPU/2 GPU/0 GPU/3 GPU/1 NET/2
jwb0038:16068:16138 [1] NCCL INFO 3 : NET/3 GPU/3 GPU/2 GPU/1 GPU/0 NET/3
jwb0061:16370:16435 [2] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0038:16068:16138 [1] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0061:16369:16436 [1] NCCL INFO Pattern 4, crossNic 0, nChannels 4, bw 24.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16369:16436 [1] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16369:16436 [1] NCCL INFO 1 : NET/1 GPU/5 GPU/4 GPU/7 GPU/6 NET/1
jwb0061:16369:16436 [1] NCCL INFO 2 : NET/2 GPU/6 GPU/5 GPU/4 GPU/7 NET/2
jwb0061:16369:16436 [1] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16369:16436 [1] NCCL INFO Pattern 1, crossNic 0, nChannels 4, bw 48.000000/24.000000, type NVL/PXN, sameChannels 0
jwb0061:16369:16436 [1] NCCL INFO 0 : NET/0 GPU/4 GPU/5 GPU/6 GPU/7 NET/0
jwb0061:16369:16436 [1] NCCL INFO 1 : NET/1 GPU/5 GPU/7 GPU/4 GPU/6 NET/1
jwb0061:16369:16436 [1] NCCL INFO 2 : NET/2 GPU/6 GPU/4 GPU/7 GPU/5 NET/2
jwb0061:16369:16436 [1] NCCL INFO 3 : NET/3 GPU/7 GPU/6 GPU/5 GPU/4 NET/3
jwb0061:16369:16436 [1] NCCL INFO Pattern 3, crossNic 0, nChannels 0, bw 0.000000/0.000000, type NVL/PIX, sameChannels 1
jwb0061:16369:16436 [1] NCCL INFO Tree 0 : 4 -> 5 -> 6/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Tree 4 : 4 -> 5 -> 6/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Tree 1 : 1 -> 5 -> 7/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Tree 5 : -1 -> 5 -> 7/1/-1
jwb0061:16370:16435 [2] NCCL INFO Tree 2 : 2 -> 6 -> 4/-1/-1
jwb0061:16370:16435 [2] NCCL INFO Tree 6 : -1 -> 6 -> 4/2/-1
jwb0061:16370:16435 [2] NCCL INFO Tree 3 : 7 -> 6 -> 5/-1/-1
jwb0061:16370:16435 [2] NCCL INFO Tree 7 : 7 -> 6 -> 5/-1/-1
jwb0061:16371:16434 [3] NCCL INFO Tree 1 : 5 -> 7 -> 4/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Ring 00 : 4 -> 5 -> 6
jwb0061:16371:16434 [3] NCCL INFO Tree 5 : 5 -> 7 -> 4/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Ring 01 : 2 -> 5 -> 4
jwb0061:16371:16434 [3] NCCL INFO Tree 3 : 3 -> 7 -> 6/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Ring 02 : 6 -> 5 -> 4
jwb0061:16371:16434 [3] NCCL INFO Tree 7 : -1 -> 7 -> 6/3/-1
jwb0061:16370:16435 [2] NCCL INFO Ring 00 : 5 -> 6 -> 7
jwb0061:16369:16436 [1] NCCL INFO Ring 03 : 6 -> 5 -> 4
jwb0038:16068:16138 [1] NCCL INFO Tree 0 : 0 -> 1 -> 2/-1/-1
jwb0038:16067:16132 [0] NCCL INFO Tree 0 : -1 -> 0 -> 1/4/-1
jwb0038:16068:16138 [1] NCCL INFO Tree 4 : 0 -> 1 -> 2/-1/-1
jwb0038:16067:16132 [0] NCCL INFO Tree 4 : 4 -> 0 -> 1/-1/-1
jwb0038:16068:16138 [1] NCCL INFO Tree 1 : -1 -> 1 -> 3/5/-1
jwb0038:16067:16132 [0] NCCL INFO Tree 2 : 2 -> 0 -> 3/-1/-1
jwb0038:16070:16137 [3] NCCL INFO Tree 1 : 1 -> 3 -> 0/-1/-1
jwb0038:16068:16138 [1] NCCL INFO Tree 5 : 5 -> 1 -> 3/-1/-1
jwb0038:16067:16132 [0] NCCL INFO Tree 6 : 2 -> 0 -> 3/-1/-1
jwb0038:16070:16137 [3] NCCL INFO Tree 5 : 1 -> 3 -> 0/-1/-1
jwb0038:16069:16139 [2] NCCL INFO Tree 2 : -1 -> 2 -> 0/6/-1
jwb0038:16070:16137 [3] NCCL INFO Tree 3 : -1 -> 3 -> 2/7/-1
jwb0038:16069:16139 [2] NCCL INFO Tree 6 : 6 -> 2 -> 0/-1/-1
jwb0038:16070:16137 [3] NCCL INFO Tree 7 : 7 -> 3 -> 2/-1/-1
jwb0038:16069:16139 [2] NCCL INFO Tree 3 : 3 -> 2 -> 1/-1/-1
jwb0038:16069:16139 [2] NCCL INFO Tree 7 : 3 -> 2 -> 1/-1/-1
jwb0038:16068:16138 [1] NCCL INFO Ring 00 : 0 -> 1 -> 2
jwb0038:16068:16138 [1] NCCL INFO Ring 01 : 6 -> 1 -> 0
jwb0038:16067:16132 [0] NCCL INFO Channel 00/08 : 0 1 2 3 4 5 6 7
jwb0038:16068:16138 [1] NCCL INFO Ring 02 : 2 -> 1 -> 0
jwb0038:16070:16137 [3] NCCL INFO Ring 00 : 2 -> 3 -> 4
jwb0038:16067:16132 [0] NCCL INFO Channel 01/08 : 0 3 2 5 4 7 6 1
jwb0038:16068:16138 [1] NCCL INFO Ring 03 : 2 -> 1 -> 0
jwb0038:16070:16137 [3] NCCL INFO Ring 01 : 0 -> 3 -> 2
jwb0038:16069:16139 [2] NCCL INFO Ring 00 : 1 -> 2 -> 3
jwb0038:16068:16138 [1] NCCL INFO Ring 04 : 0 -> 1 -> 2
jwb0038:16067:16132 [0] NCCL INFO Channel 02/08 : 0 3 6 5 4 7 2 1
jwb0038:16070:16137 [3] NCCL INFO Ring 02 : 0 -> 3 -> 6
jwb0038:16069:16139 [2] NCCL INFO Ring 01 : 3 -> 2 -> 5
jwb0038:16068:16138 [1] NCCL INFO Ring 05 : 6 -> 1 -> 0
jwb0038:16070:16137 [3] NCCL INFO Ring 03 : 4 -> 3 -> 2
jwb0038:16069:16139 [2] NCCL INFO Ring 02 : 7 -> 2 -> 1
jwb0061:16370:16435 [2] NCCL INFO Ring 01 : 7 -> 6 -> 1
jwb0061:16369:16436 [1] NCCL INFO Ring 04 : 4 -> 5 -> 6
jwb0038:16067:16132 [0] NCCL INFO Channel 03/08 : 0 7 6 5 4 3 2 1
jwb0061:16370:16435 [2] NCCL INFO Ring 02 : 3 -> 6 -> 5
jwb0061:16369:16436 [1] NCCL INFO Ring 05 : 2 -> 5 -> 4
jwb0061:16371:16434 [3] NCCL INFO Ring 00 : 6 -> 7 -> 0
jwb0061:16370:16435 [2] NCCL INFO Ring 03 : 7 -> 6 -> 5
jwb0061:16369:16436 [1] NCCL INFO Ring 06 : 6 -> 5 -> 4
jwb0061:16371:16434 [3] NCCL INFO Ring 01 : 4 -> 7 -> 6
jwb0061:16370:16435 [2] NCCL INFO Ring 04 : 5 -> 6 -> 7
jwb0061:16369:16436 [1] NCCL INFO Ring 07 : 6 -> 5 -> 4
jwb0061:16371:16434 [3] NCCL INFO Ring 02 : 4 -> 7 -> 2
jwb0061:16370:16435 [2] NCCL INFO Ring 05 : 7 -> 6 -> 1
jwb0061:16368:16432 [0] NCCL INFO Tree 0 : 0 -> 4 -> 5/-1/-1
jwb0061:16369:16436 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 7/-1/-1->5->1 [2] -1/-1/-1->5->7 [3] 4/-1/-1->5->6 [4] 6/-1/-1->5->4 [5] 7/1/-1->5->-1 [6] -1/-1/-1->5->7 [7] 4/-1/-1->5->6
jwb0038:16068:16138 [1] NCCL INFO Ring 06 : 2 -> 1 -> 0
jwb0038:16070:16137 [3] NCCL INFO Ring 04 : 2 -> 3 -> 4
jwb0038:16069:16139 [2] NCCL INFO Ring 03 : 3 -> 2 -> 1
jwb0038:16067:16132 [0] NCCL INFO Channel 04/08 : 0 1 2 3 4 5 6 7
jwb0038:16070:16137 [3] NCCL INFO Ring 05 : 0 -> 3 -> 2
jwb0038:16068:16138 [1] NCCL INFO Ring 07 : 2 -> 1 -> 0
jwb0038:16069:16139 [2] NCCL INFO Ring 04 : 1 -> 2 -> 3
jwb0038:16068:16138 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 3/5/-1->1->-1 [2] -1/-1/-1->1->3 [3] 0/-1/-1->1->2 [4] 2/-1/-1->1->0 [5] 3/-1/-1->1->5 [6] -1/-1/-1->1->3 [7] 0/-1/-1->1->2
jwb0038:16070:16137 [3] NCCL INFO Ring 06 : 0 -> 3 -> 6
jwb0038:16067:16132 [0] NCCL INFO Channel 05/08 : 0 3 2 5 4 7 6 1
jwb0061:16371:16434 [3] NCCL INFO Ring 03 : 0 -> 7 -> 6
jwb0061:16370:16435 [2] NCCL INFO Ring 06 : 3 -> 6 -> 5
jwb0061:16368:16432 [0] NCCL INFO Tree 4 : -1 -> 4 -> 5/0/-1
jwb0061:16371:16434 [3] NCCL INFO Ring 04 : 6 -> 7 -> 0
jwb0061:16370:16435 [2] NCCL INFO Ring 07 : 7 -> 6 -> 5
jwb0038:16070:16137 [3] NCCL INFO Ring 07 : 4 -> 3 -> 2
jwb0038:16069:16139 [2] NCCL INFO Ring 05 : 3 -> 2 -> 5
jwb0038:16067:16132 [0] NCCL INFO Channel 06/08 : 0 3 6 5 4 7 2 1
jwb0038:16070:16137 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] 0/-1/-1->3->1 [2] 1/-1/-1->3->0 [3] 2/7/-1->3->-1 [4] -1/-1/-1->3->2 [5] 0/-1/-1->3->1 [6] 1/-1/-1->3->0 [7] 2/-1/-1->3->7
jwb0038:16069:16139 [2] NCCL INFO Ring 06 : 7 -> 2 -> 1
jwb0061:16370:16435 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] -1/-1/-1->6->4 [2] 4/-1/-1->6->2 [3] 5/-1/-1->6->7 [4] 7/-1/-1->6->5 [5] -1/-1/-1->6->4 [6] 4/2/-1->6->-1 [7] 5/-1/-1->6->7
jwb0061:16368:16432 [0] NCCL INFO Tree 2 : 6 -> 4 -> 7/-1/-1
jwb0061:16371:16434 [3] NCCL INFO Ring 05 : 4 -> 7 -> 6
jwb0061:16368:16432 [0] NCCL INFO Tree 6 : 6 -> 4 -> 7/-1/-1
jwb0038:16067:16132 [0] NCCL INFO Channel 07/08 : 0 7 6 5 4 3 2 1
jwb0038:16069:16139 [2] NCCL INFO Ring 07 : 3 -> 2 -> 1
jwb0038:16069:16139 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] -1/-1/-1->2->0 [2] 0/6/-1->2->-1 [3] 1/-1/-1->2->3 [4] 3/-1/-1->2->1 [5] -1/-1/-1->2->0 [6] 0/-1/-1->2->6 [7] 1/-1/-1->2->3
jwb0038:16067:16132 [0] NCCL INFO Ring 00 : 7 -> 0 -> 1
jwb0061:16371:16434 [3] NCCL INFO Ring 06 : 4 -> 7 -> 2
jwb0061:16371:16434 [3] NCCL INFO Ring 07 : 0 -> 7 -> 6
jwb0061:16371:16434 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] 4/-1/-1->7->5 [2] 5/-1/-1->7->4 [3] 6/-1/-1->7->3 [4] -1/-1/-1->7->6 [5] 4/-1/-1->7->5 [6] 5/-1/-1->7->4 [7] 6/3/-1->7->-1
jwb0038:16067:16132 [0] NCCL INFO Ring 01 : 1 -> 0 -> 3
jwb0038:16068:16138 [1] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16369:16436 [1] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO Ring 00 : 3 -> 4 -> 5
jwb0061:16370:16435 [2] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16371:16434 [3] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO Ring 01 : 5 -> 4 -> 7
jwb0038:16067:16132 [0] NCCL INFO Ring 02 : 1 -> 0 -> 3
jwb0038:16070:16137 [3] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0038:16067:16132 [0] NCCL INFO Ring 03 : 1 -> 0 -> 7
jwb0038:16069:16139 [2] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO Ring 02 : 5 -> 4 -> 7
jwb0061:16368:16432 [0] NCCL INFO Ring 03 : 5 -> 4 -> 3
jwb0061:16368:16432 [0] NCCL INFO Ring 04 : 3 -> 4 -> 5
jwb0038:16067:16132 [0] NCCL INFO Ring 04 : 7 -> 0 -> 1
jwb0038:16067:16132 [0] NCCL INFO Ring 05 : 1 -> 0 -> 3
jwb0061:16368:16432 [0] NCCL INFO Ring 05 : 5 -> 4 -> 7
jwb0061:16368:16432 [0] NCCL INFO Ring 06 : 5 -> 4 -> 7
jwb0038:16067:16132 [0] NCCL INFO Ring 06 : 1 -> 0 -> 3
jwb0038:16067:16132 [0] NCCL INFO Ring 07 : 1 -> 0 -> 7
jwb0038:16067:16132 [0] NCCL INFO Trees [0] 1/4/-1->0->-1 [1] 2/-1/-1->0->3 [2] 3/-1/-1->0->2 [3] -1/-1/-1->0->1 [4] 1/-1/-1->0->4 [5] 2/-1/-1->0->3 [6] 3/-1/-1->0->2 [7] -1/-1/-1->0->1
jwb0061:16368:16432 [0] NCCL INFO Ring 07 : 5 -> 4 -> 3
jwb0061:16368:16432 [0] NCCL INFO Trees [0] 5/-1/-1->4->0 [1] 6/-1/-1->4->7 [2] 7/-1/-1->4->6 [3] -1/-1/-1->4->5 [4] 5/0/-1->4->-1 [5] 6/-1/-1->4->7 [6] 7/-1/-1->4->6 [7] -1/-1/-1->4->5
jwb0038:16067:16132 [0] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61000000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61000e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43000000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61001000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43000e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61001e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61002000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43001000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61002e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43001e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61003000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43002000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61003e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43002e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61004000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43003000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61004e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43003e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61005000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f000000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43004000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61005e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f000e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43004e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61006000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f001000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43005000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61006e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43005e00
jwb0061:16370:16435 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ca61007000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f001e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f000000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43006000
jwb0061:16370:16435 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ca61007e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f002000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d000000
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43006e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x145c43007000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f000e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f002e00
jwb0061:16368:16432 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x145c43007e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f001000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f001e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f003000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f003e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f002000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d000e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f002e00
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f003000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f004000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f003e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f004e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d001000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f004000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d001e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f005000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f004e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f005e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d002000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d002e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f006000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f006e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d003000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d003e00
jwb0038:16068:16138 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14f22f007000
jwb0038:16068:16138 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14f22f007e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d004000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d004e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d005000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d005e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d006000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d006e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f005000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f005e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f006000
jwb0038:16069:16139 [2] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14660d007000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f006e00
jwb0061:16371:16434 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x15320f007000
jwb0061:16371:16434 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x15320f007e00
jwb0038:16069:16139 [2] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14660d007e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d000000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75000000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75000e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75001000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75001e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75002000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75002e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75003000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75003e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75004000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75004e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75005000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75005e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75006000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75006e00
jwb0038:16067:16132 [0] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x151a75007000
jwb0038:16067:16132 [0] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x151a75007e00
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d000e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d001000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d001e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d002000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d002e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d003000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d003e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d004000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d004e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d005000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d005e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d006000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d006e00
jwb0061:16369:16436 [1] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x14ce1d007000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7000000
jwb0061:16369:16436 [1] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x14ce1d007e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7000e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7001000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7001e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7002000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7002e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7003000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7003e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7004000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7004e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7005000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7005e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7006000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7006e00
jwb0038:16070:16137 [3] NCCL INFO channel.cc:23 Cuda Alloc Size 3456 pointer 0x148ae7007000
jwb0038:16070:16137 [3] NCCL INFO channel.cc:27 Cuda Alloc Size 32 pointer 0x148ae7007e00
jwb0061:16370:16455 [2] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x14ca5c004c70
jwb0061:16368:16453 [0] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x145c34004c70
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004c90
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 0 from local rank 2, transport 0
jwb0061:16368:16453 [0] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-orxzPj
jwb0061:16371:16454 [3] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x153200004c70
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004c90
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 0 from local rank 3, transport 0
jwb0038:16068:16152 [1] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x14f220004c70
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004c90
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 0 from local rank 1, transport 0
jwb0038:16069:16154 [2] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x146608004c70
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004c90
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 0 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca61200000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004cd0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 1 from local rank 2, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x15320f200000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004cd0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 1 from local rank 3, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f22f200000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004cd0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 1 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x151a6c004c70
jwb0038:16067:16155 [0] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-0POx2g
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14660d200000
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca61800000
jwb0061:16370:16435 [2] NCCL INFO Channel 00/0 : 6[84000] -> 7[c4000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x15320f800000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004d10
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 2 from local rank 2, transport 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f22f800000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004cd0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 1 from local rank 2, transport 0
jwb0038:16068:16138 [1] NCCL INFO Channel 00/0 : 1[44000] -> 2[84000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004d10
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 2 from local rank 1, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca56000000
jwb0061:16370:16435 [2] NCCL INFO Channel 04/0 : 6[84000] -> 7[c4000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004d50
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 3 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14660d800000
jwb0038:16069:16139 [2] NCCL INFO Channel 00/0 : 2[84000] -> 3[c4000] via P2P/IPC/read
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f226000000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004d10
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 2 from local rank 2, transport 0
jwb0038:16068:16138 [1] NCCL INFO Channel 04/0 : 1[44000] -> 2[84000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004d50
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 3 from local rank 1, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca56600000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x146602000000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f226600000
jwb0038:16069:16139 [2] NCCL INFO Channel 04/0 : 2[84000] -> 3[c4000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004d50
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 3 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x146602600000
jwb0061:16369:16456 [1] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x14ce18004c70
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004c90
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 0 from local rank 1, transport 0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 0 from local rank 0, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004c90
jwb0061:16368:16432 [0] NCCL INFO Channel 00/0 : 3[c4000] -> 4[3000] [receive] via NET/IB/0/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 1 from local rank 0, transport 2
jwb0038:16070:16153 [3] NCCL INFO Mem Realloc old size 0, new size 8 pointer 0x148ad8004c70
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004c90
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 0 from local rank 3, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce1d200000
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 0 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004c90
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004cd0
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 1 from local rank 1, transport 0
jwb0038:16067:16132 [0] NCCL INFO Channel 00/0 : 7[c4000] -> 0[3000] [receive] via NET/IB/0/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 1 from local rank 0, transport 2
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce1d800000
jwb0061:16369:16436 [1] NCCL INFO Channel 00/0 : 5[44000] -> 6[84000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004d10
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 2 from local rank 1, transport 0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 2 from local rank 3, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004d10
jwb0061:16371:16434 [3] NCCL INFO Channel 00/0 : 7[c4000] -> 0[3000] [send] via NET/IB/0(4)/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ae7200000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004cd0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 1 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 3 from local rank 3, transport 2
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce12000000
jwb0061:16369:16436 [1] NCCL INFO Channel 04/0 : 5[44000] -> 6[84000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004d50
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 3 from local rank 1, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ae7800000
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce12600000
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 2 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004d10
jwb0038:16070:16137 [3] NCCL INFO Channel 00/0 : 3[c4000] -> 4[3000] [send] via NET/IB/0(0)/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 3 from local rank 3, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004cd0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004cd0
jwb0038:16067:16132 [0] NCCL INFO Channel 04/0 : 7[c4000] -> 0[3000] [receive] via NET/IB/0/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO Channel 00/0 : 0[3000] -> 1[44000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004d90
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 4 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Channel 04/0 : 3[c4000] -> 4[3000] [receive] via NET/IB/0/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO Channel 00/0 : 4[3000] -> 5[44000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004d90
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 4 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c43200000
jwb0061:16368:16432 [0] NCCL INFO Channel 04/0 : 4[3000] -> 5[44000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004dd0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 5 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a75200000
jwb0038:16067:16132 [0] NCCL INFO Channel 04/0 : 0[3000] -> 1[44000] via P2P/IPC/read
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c43800000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004dd0
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 5 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a75800000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004d50
jwb0061:16371:16434 [3] NCCL INFO Channel 04/0 : 7[c4000] -> 0[3000] [send] via NET/IB/0(4)/GDRDMA
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x145c34023000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004d50
jwb0038:16070:16137 [3] NCCL INFO Channel 04/0 : 3[c4000] -> 4[3000] [send] via NET/IB/0(0)/GDRDMA
jwb0061:16368:16453 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 31123 mtu 5 LID 5812
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x145c34045000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x151a6c023000
jwb0038:16067:16155 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 20179 mtu 5 LID 5858
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x151a6c045000
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x145c32000000
jwb0061:16369:16456 [1] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-Z9g10P
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16152 [1] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-mb3ePP
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x151a6ac00000
jwb0038:16067:16155 [0] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-5Bbcxj
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 4 from local rank 1, transport 2
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004d90
jwb0038:16067:16155 [0] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x151a6c031010
jwb0061:16369:16436 [1] NCCL INFO Channel 01/0 : 2[84000] -> 5[44000] [receive] via NET/IB/1/GDRDMA
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 5 from local rank 1, transport 2
jwb0061:16370:16455 [2] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-hRKy3B
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 4 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004d90
jwb0038:16068:16138 [1] NCCL INFO Channel 01/0 : 6[84000] -> 1[44000] [receive] via NET/IB/1/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 5 from local rank 1, transport 2
jwb0061:16368:16453 [0] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-uQtt2F
jwb0061:16368:16453 [0] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x145c34031010
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16154 [2] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-6JqO6H
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 4 from local rank 2, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004d90
jwb0061:16370:16435 [2] NCCL INFO Channel 02/0 : 3[c4000] -> 6[84000] [receive] via NET/IB/2/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 5 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x151a6c047000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x151a6c04e000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x145c34047000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x145c3404e000
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 4 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004d90
jwb0038:16069:16139 [2] NCCL INFO Channel 02/0 : 7[c4000] -> 2[84000] [receive] via NET/IB/2/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 5 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0061:16368:16453 [0] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x151a6c06a000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x151a6b600000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x151a74600200
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x145c3406a000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x145c33600000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x145c42600200
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x151a6c06c000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x145c3406c000
jwb0038:16067:16155 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 20182 mtu 5 LID 5858
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x151a6c083000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x151a62000000
jwb0038:16067:16155 [0] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-dXNMuf
jwb0061:16368:16453 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 31126 mtu 5 LID 5812
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x145c34083000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x145c30000000
jwb0061:16368:16453 [0] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-q9QV0x
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x151a6c085000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x151a6c08c000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x145c34085000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x145c3408c000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x151a6c0a7000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004d10
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 2 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x151a62a00000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x151a74602200
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x145c340a7000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ad6000000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004d50
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 3 from local rank 3, transport 0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004d10
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 2 from local rank 3, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ad6600000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004d90
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 4 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x145c30a00000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x145c42602200
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ad6c00000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004dd0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 5 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531fe000000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ad7200000
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004d50
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 3 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531fe600000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004d90
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 4 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531fec00000
jwb0038:16067:16132 [0] NCCL INFO Channel 01/0 : 0[3000] -> 3[c4000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004e10
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 6 from local rank 0, transport 0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004dd0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 5 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a63400000
jwb0038:16067:16132 [0] NCCL INFO Channel 02/0 : 0[3000] -> 3[c4000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ff200000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004e50
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 7 from local rank 0, transport 0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a63a00000
jwb0038:16067:16132 [0] NCCL INFO Channel 05/0 : 0[3000] -> 3[c4000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004e90
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 8 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a60000000
jwb0038:16067:16132 [0] NCCL INFO Channel 06/0 : 0[3000] -> 3[c4000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004ed0
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 9 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a60600000
jwb0061:16368:16432 [0] NCCL INFO Channel 01/0 : 4[3000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004e10
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 6 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c31400000
jwb0061:16368:16432 [0] NCCL INFO Channel 02/0 : 4[3000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004e50
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 7 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c31a00000
jwb0061:16368:16432 [0] NCCL INFO Channel 05/0 : 4[3000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004e90
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 8 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c2e000000
jwb0061:16368:16432 [0] NCCL INFO Channel 06/0 : 4[3000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004ed0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 9 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c2e600000
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 6 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004e10
jwb0038:16070:16137 [3] NCCL INFO Channel 02/0 : 3[c4000] -> 6[84000] [send] via NET/IB/2(2)/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 7 from local rank 3, transport 2
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 6 from local rank 3, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004e10
jwb0061:16371:16434 [3] NCCL INFO Channel 02/0 : 7[c4000] -> 2[84000] [send] via NET/IB/2(6)/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 7 from local rank 3, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004dd0
jwb0061:16369:16436 [1] NCCL INFO Channel 05/0 : 2[84000] -> 5[44000] [receive] via NET/IB/1/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004dd0
jwb0038:16068:16138 [1] NCCL INFO Channel 05/0 : 6[84000] -> 1[44000] [receive] via NET/IB/1/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004dd0
jwb0061:16370:16435 [2] NCCL INFO Channel 06/0 : 3[c4000] -> 6[84000] [receive] via NET/IB/2/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004dd0
jwb0038:16069:16139 [2] NCCL INFO Channel 06/0 : 7[c4000] -> 2[84000] [receive] via NET/IB/2/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 6 from local rank 2, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004e10
jwb0061:16370:16435 [2] NCCL INFO Channel 01/0 : 6[84000] -> 1[44000] [send] via NET/IB/1(5)/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 7 from local rank 2, transport 2
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 6 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004e10
jwb0038:16069:16139 [2] NCCL INFO Channel 01/0 : 2[84000] -> 5[44000] [send] via NET/IB/1(1)/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 7 from local rank 2, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004e50
jwb0038:16070:16137 [3] NCCL INFO Channel 06/0 : 3[c4000] -> 6[84000] [send] via NET/IB/2(2)/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004e50
jwb0061:16371:16434 [3] NCCL INFO Channel 06/0 : 7[c4000] -> 2[84000] [send] via NET/IB/2(6)/GDRDMA
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 10 from local rank 0, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004f10
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5e600000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004f50
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 11 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5ec00000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004f90
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 12 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5f200000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c004fd0
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 13 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5f800000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005010
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 14 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5c000000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005050
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 15 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004f10
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 10 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a5c600000
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2c600000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004f50
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 11 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2cc00000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004f90
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 12 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2d200000
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 13 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34004fd0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2d800000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005010
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 14 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2a000000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005050
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 15 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c2a600000
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16070:16153 [3] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-1i6b16
jwb0061:16371:16454 [3] NCCL INFO Allocated 4194656 bytes of shared memory in /dev/shm/nccl-KboMPu
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 6 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004e10
jwb0038:16067:16132 [0] NCCL INFO Channel 03/0 : 0[3000] -> 7[c4000] [send] via NET/IB/3(3)/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 7 from local rank 0, transport 2
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 6 from local rank 0, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004e10
jwb0061:16368:16432 [0] NCCL INFO Channel 03/0 : 4[3000] -> 3[c4000] [send] via NET/IB/3(7)/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 7 from local rank 0, transport 2
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x146608024000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004e50
jwb0061:16370:16435 [2] NCCL INFO Channel 05/0 : 6[84000] -> 1[44000] [send] via NET/IB/1(5)/GDRDMA
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ce18024000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14f22001f000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14f220026000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004e50
jwb0038:16069:16139 [2] NCCL INFO Channel 05/0 : 2[84000] -> 5[44000] [send] via NET/IB/1(1)/GDRDMA
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14f220032000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ca5c024000
jwb0038:16069:16154 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 18879 mtu 5 LID 5813
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x146608046000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x146600600000
jwb0038:16069:16154 [2] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-BltQnw
jwb0038:16069:16154 [2] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x1466080453b0
jwb0061:16369:16456 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 30060 mtu 5 LID 5803
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ce18046000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14ce10600000
jwb0061:16369:16456 [1] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-POYe1w
jwb0061:16369:16456 [1] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x14ce180453b0
jwb0038:16068:16152 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 17331 mtu 5 LID 5814
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14f220054000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14f21e600000
jwb0038:16068:16152 [1] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-dQDje2
jwb0061:16370:16455 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 30063 mtu 5 LID 5808
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ca5c046000
jwb0038:16068:16152 [1] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x14f220041320
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14ca54600000
jwb0061:16370:16455 [2] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-DZhFyF
jwb0061:16370:16455 [2] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x14ca5c0453b0
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ce18048000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ce1804f000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x146608048000
jwb0061:16369:16456 [1] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0038:16069:16154 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 18880 mtu 5 LID 5813
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14660805f000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x1465fe000000
jwb0038:16069:16154 [2] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-OsqIiL
jwb0038:16068:16152 [1] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ce1806b000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ca5c048000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ce11000000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ce1c600200
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14f22006b000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14f21f000000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14f22e600200
jwb0061:16370:16455 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 30064 mtu 5 LID 5808
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ca5c05f000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ce1806d000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14ca52000000
jwb0061:16370:16455 [2] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-WEXrGv
jwb0061:16369:16456 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 30063 mtu 5 LID 5803
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ce18084000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14ce0e000000
jwb0061:16369:16456 [1] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-KQze9u
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14f22006d000
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 8 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004e90
jwb0038:16070:16137 [3] NCCL INFO Channel 03/0 : 4[3000] -> 3[c4000] [receive] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 9 from local rank 3, transport 2
jwb0038:16068:16152 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 17334 mtu 5 LID 5814
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14f220084000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x14f216000000
jwb0038:16068:16152 [1] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-40NnVL
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 8 from local rank 3, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004e90
jwb0061:16371:16434 [3] NCCL INFO Channel 03/0 : 0[3000] -> 7[c4000] [receive] via NET/IB/3/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 9 from local rank 3, transport 2
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ce18086000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ce1808d000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14f220086000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14f22008d000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ca5c061000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ca5c068000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x146608061000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x146608068000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ce180a8000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14f2200a8000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ce0ea00000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14f216a00000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14f22e602200
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ce1c602200
jwb0061:16370:16455 [2] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0038:16069:16154 [2] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ca5c084000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004e90
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 8 from local rank 1, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ca53400000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004e90
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 8 from local rank 1, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x146608084000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ca60600200
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce0f400000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004ed0
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 9 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f217400000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004ed0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 9 from local rank 1, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1465ff400000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14660c600200
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce0fa00000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f217a00000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004f10
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 10 from local rank 1, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004f10
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 10 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce11a00000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004f50
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 11 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f21fa00000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004f50
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 11 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f214000000
jwb0038:16068:16138 [1] NCCL INFO Channel 01/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004f90
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 12 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce06000000
jwb0061:16369:16436 [1] NCCL INFO Channel 01/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004f90
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 12 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce06600000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f214600000
jwb0038:16068:16138 [1] NCCL INFO Channel 02/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Channel 02/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220004fd0
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 13 from local rank 1, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18004fd0
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 13 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f214c00000
jwb0038:16068:16138 [1] NCCL INFO Channel 03/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 14 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005010
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce06c00000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f215200000
jwb0038:16068:16138 [1] NCCL INFO Channel 05/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Channel 03/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005050
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 15 from local rank 1, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005010
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 14 from local rank 1, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004e50
jwb0038:16067:16132 [0] NCCL INFO Channel 07/0 : 0[3000] -> 7[c4000] [send] via NET/IB/3(3)/GDRDMA
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f215800000
jwb0038:16068:16138 [1] NCCL INFO Channel 06/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005090
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 16 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce07200000
jwb0061:16369:16436 [1] NCCL INFO Channel 05/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005050
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 15 from local rank 1, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ca5c086000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ca5c08d000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce07800000
jwb0061:16369:16436 [1] NCCL INFO Channel 06/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005090
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 16 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f212000000
jwb0038:16068:16138 [1] NCCL INFO Channel 07/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 17 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200050d0
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x146608086000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce04000000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14660808d000
jwb0061:16369:16436 [1] NCCL INFO Channel 07/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 17 from local rank 1, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180050d0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f212600000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce04600000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004e50
jwb0061:16368:16432 [0] NCCL INFO Channel 07/0 : 4[3000] -> 3[c4000] [send] via NET/IB/3(7)/GDRDMA
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ca5c0a8000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ca4a000000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ca60602200
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x1466080a8000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1465f6000000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14660c602200
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004e90
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 8 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca4aa00000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004ed0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 9 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca4b000000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004f10
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 10 from local rank 2, transport 0
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004e90
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 8 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca4b600000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004f50
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 11 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465f6a00000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004ed0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 9 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca55a00000
jwb0061:16370:16435 [2] NCCL INFO Channel 02/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004f90
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 12 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca48000000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465f7000000
jwb0061:16370:16435 [2] NCCL INFO Channel 03/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c004fd0
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004f10
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 10 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 13 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca48600000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465f7600000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004f50
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 11 from local rank 2, transport 0
jwb0061:16370:16435 [2] NCCL INFO Channel 06/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005010
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 14 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca48c00000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x146601a00000
jwb0038:16069:16139 [2] NCCL INFO Channel 02/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004f90
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 12 from local rank 2, transport 0
jwb0061:16370:16435 [2] NCCL INFO Channel 07/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005050
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 15 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca49200000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465f4000000
jwb0038:16069:16139 [2] NCCL INFO Channel 03/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608004fd0
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 13 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465f4600000
jwb0038:16069:16139 [2] NCCL INFO Channel 06/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005010
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 14 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465f4c00000
jwb0038:16069:16139 [2] NCCL INFO Channel 07/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005050
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 15 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465f5200000
jwb0061:16369:16436 [1] NCCL INFO Connected all rings
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005110
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 18 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce00c00000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005150
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 19 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ce01200000
jwb0061:16369:16436 [1] NCCL INFO Channel 03/0 : 5[44000] -> 6[84000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005190
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 20 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connected all rings
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005110
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 18 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ce01800000
jwb0061:16369:16436 [1] NCCL INFO Channel 07/0 : 5[44000] -> 6[84000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180051d0
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 21 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f20ec00000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005150
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 19 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdfe000000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f20f200000
jwb0038:16068:16138 [1] NCCL INFO Channel 03/0 : 1[44000] -> 2[84000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005190
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 20 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20f800000
jwb0038:16068:16138 [1] NCCL INFO Channel 07/0 : 1[44000] -> 2[84000] via P2P/IPC/read
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 21 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200051d0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20c000000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004ed0
jwb0038:16070:16137 [3] NCCL INFO Channel 07/0 : 4[3000] -> 3[c4000] [receive] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO Channel 01/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004f10
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 10 from local rank 3, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148acc600000
jwb0038:16070:16137 [3] NCCL INFO Channel 03/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004f50
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 11 from local rank 3, transport 0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004ed0
jwb0061:16371:16434 [3] NCCL INFO Channel 07/0 : 0[3000] -> 7[c4000] [receive] via NET/IB/3/GDRDMA
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148accc00000
jwb0061:16371:16434 [3] NCCL INFO Channel 01/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004f10
jwb0038:16070:16137 [3] NCCL INFO Channel 05/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 10 from local rank 3, transport 0
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004f90
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 12 from local rank 3, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148acd200000
jwb0038:16070:16137 [3] NCCL INFO Channel 07/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 13 from local rank 3, transport 0
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8004fd0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531f4600000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148acd800000
jwb0061:16371:16434 [3] NCCL INFO Channel 03/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004f50
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 11 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531f4c00000
jwb0061:16371:16434 [3] NCCL INFO Channel 05/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004f90
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 12 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531f5200000
jwb0061:16371:16434 [3] NCCL INFO Channel 07/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x153200028000
jwb0061:16371:16454 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 30056 mtu 5 LID 5809
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x15320004a000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x1531f2000000
jwb0061:16371:16454 [3] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-1jjBtL
jwb0061:16371:16454 [3] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x1532000372e0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200004fd0
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 13 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531f2a00000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x148ad8029000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x15320004c000
jwb0061:16371:16454 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 30057 mtu 5 LID 5809
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x153200063000
jwb0038:16070:16153 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 17333 mtu 5 LID 5857
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x148ad804b000
jwb0038:16069:16139 [2] NCCL INFO Connected all rings
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005090
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 16 from local rank 2, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x148ac8000000
jwb0038:16070:16153 [3] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-dpTnGO
jwb0038:16070:16153 [3] NCCL INFO Mem Realloc old size 0, new size 768 pointer 0x148ad80387c0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465f0c00000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080050d0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 17 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465f1200000
jwb0038:16069:16139 [2] NCCL INFO Channel 03/0 : 2[84000] -> 3[c4000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005110
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 18 from local rank 2, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x1531f3600000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465f1800000
jwb0038:16069:16139 [2] NCCL INFO Channel 07/0 : 2[84000] -> 3[c4000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005150
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 19 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465ee000000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x148ad804b000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x148ad8052000
jwb0038:16070:16153 [3] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0061:16371:16454 [3] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-LF6jQ7
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x148ad806f000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x148ac8a00000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x148ae6600200
jwb0061:16370:16435 [2] NCCL INFO Connected all rings
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005090
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 16 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca44c00000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0050d0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 17 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca45200000
jwb0061:16370:16435 [2] NCCL INFO Channel 03/0 : 6[84000] -> 7[c4000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005110
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 18 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca45800000
jwb0061:16370:16435 [2] NCCL INFO Channel 07/0 : 6[84000] -> 7[c4000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005150
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 19 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca42000000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x148ad8071000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x153200065000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x15320006c000
jwb0038:16070:16153 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 17336 mtu 5 LID 5857
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x148ad8088000
jwb0061:16371:16454 [3] NCCL INFO NCCL_IB_TIMEOUT set by environment to 20.
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 10485760 pointer 0x148ac9400000
jwb0038:16070:16153 [3] NCCL INFO Allocated 532480 bytes of shared memory in /dev/shm/nccl-BylfPW
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x153200088000
jwb0061:16368:16432 [0] NCCL INFO Connected all rings
jwb0061:16368:16432 [0] NCCL INFO Channel 03/0 : 4[3000] -> 5[44000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1531f0c00000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005090
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x15320e600200
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 16 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c26600000
jwb0061:16368:16432 [0] NCCL INFO Channel 07/0 : 4[3000] -> 5[44000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340050d0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 17 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c26c00000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x148ad808a000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x148ad8091000
jwb0061:16368:16432 [0] NCCL INFO Channel 01/0 : 4[3000] -> 6[84000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005110
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 18 from local rank 0, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x148ad80ac000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x148ac6000000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x148ae6602200
jwb0038:16067:16132 [0] NCCL INFO Connected all rings
jwb0038:16067:16132 [0] NCCL INFO Channel 03/0 : 0[3000] -> 1[44000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005090
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 16 from local rank 0, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x15320008a000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x153200091000
jwb0061:16369:16436 [1] NCCL INFO Channel 01/0 : 5[44000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c24000000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005210
jwb0061:16368:16432 [0] NCCL INFO Channel 02/0 : 4[3000] -> 6[84000] via P2P/IPC/read
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 22 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a58600000
jwb0038:16067:16132 [0] NCCL INFO Channel 07/0 : 0[3000] -> 1[44000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005150
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 19 from local rank 0, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0050d0
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 17 from local rank 0, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a58c00000
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c24600000
jwb0061:16368:16432 [0] NCCL INFO Channel 05/0 : 4[3000] -> 6[84000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005190
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 20 from local rank 0, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdfc000000
jwb0061:16369:16436 [1] NCCL INFO Channel 02/0 : 5[44000] -> 7[c4000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005250
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 23 from local rank 1, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c24c00000
jwb0061:16368:16432 [0] NCCL INFO Channel 06/0 : 4[3000] -> 6[84000] via P2P/IPC/read
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340051d0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 21 from local rank 0, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdfc600000
jwb0061:16369:16436 [1] NCCL INFO Channel 05/0 : 5[44000] -> 7[c4000] via P2P/IPC/read
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x145c25200000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005290
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 24 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdfcc00000
jwb0061:16369:16436 [1] NCCL INFO Channel 06/0 : 5[44000] -> 7[c4000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180052d0
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 25 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdfd200000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x1532000ac000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1531f1600000
jwb0038:16067:16132 [0] NCCL INFO Channel 01/0 : 0[3000] -> 2[84000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005110
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 18 from local rank 0, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x15320e602200
jwb0038:16070:16137 [3] NCCL INFO Connected all rings
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005010
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 14 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a56000000
jwb0061:16371:16434 [3] NCCL INFO Connected all rings
jwb0038:16068:16138 [1] NCCL INFO Channel 01/0 : 1[44000] -> 3[c4000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Channel 02/0 : 0[3000] -> 2[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005010
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 14 from local rank 3, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005150
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 22 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 19 from local rank 0, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005210
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac6a00000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005050
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 15 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ee000000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005050
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 15 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a56600000
jwb0038:16067:16132 [0] NCCL INFO Channel 05/0 : 0[3000] -> 2[84000] via P2P/IPC/read
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20a000000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005190
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 20 from local rank 0, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac7000000
jwb0038:16068:16138 [1] NCCL INFO Channel 02/0 : 1[44000] -> 3[c4000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ee600000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005250
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 23 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a56c00000
jwb0038:16067:16132 [0] NCCL INFO Channel 06/0 : 0[3000] -> 2[84000] via P2P/IPC/read
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0051d0
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 21 from local rank 0, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20a600000
jwb0038:16068:16138 [1] NCCL INFO Channel 05/0 : 1[44000] -> 3[c4000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005290
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 24 from local rank 1, transport 0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 16 from local rank 3, transport 0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005090
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ef800000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000050d0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 17 from local rank 3, transport 0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 16 from local rank 3, transport 0
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005090
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20ac00000
jwb0038:16068:16138 [1] NCCL INFO Channel 06/0 : 1[44000] -> 3[c4000] via P2P/IPC/read
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ec000000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200052d0
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 25 from local rank 1, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac4000000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005110
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 18 from local rank 3, transport 0
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005190
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 20 from local rank 2, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x151a57200000
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ec600000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005150
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 19 from local rank 3, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca40000000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0051d0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 21 from local rank 2, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1531ecc00000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80050d0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 17 from local rank 3, transport 0
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005190
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 20 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca40600000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f20b200000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac4600000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005110
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 18 from local rank 3, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465ec000000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080051d0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 21 from local rank 2, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac4c00000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005150
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 19 from local rank 3, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465ec600000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x148ac5200000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005210
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 22 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465ecc00000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005210
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 22 from local rank 2, transport 0
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005250
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 23 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca40c00000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005250
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 23 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465ed200000
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 20 from local rank 3, transport 2
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca41200000
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 26 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 26 from local rank 1, transport 2
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 20 from local rank 3, transport 2
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 22 from local rank 0, transport 2
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 24 from local rank 2, transport 2
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 22 from local rank 0, transport 2
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 24 from local rank 2, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005190
jwb0061:16371:16434 [3] NCCL INFO Channel 03/0 : 3[c4000] -> 7[c4000] [receive] via NET/IB/3/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0061:16371:16454 [3] NCCL INFO New proxy recv connection 21 from local rank 3, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005310
jwb0061:16369:16436 [1] NCCL INFO Channel 01/0 : 1[44000] -> 5[44000] [receive] via NET/IB/1/GDRDMA
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 27 from local rank 1, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005190
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005310
jwb0038:16070:16137 [3] NCCL INFO Channel 03/0 : 7[c4000] -> 3[c4000] [receive] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 0
jwb0038:16068:16138 [1] NCCL INFO Channel 01/0 : 5[44000] -> 1[44000] [receive] via NET/IB/1/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 0
jwb0038:16070:16153 [3] NCCL INFO New proxy recv connection 21 from local rank 3, transport 2
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 27 from local rank 1, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005210
jwb0061:16368:16432 [0] NCCL INFO Channel 00/0 : 0[3000] -> 4[3000] [receive] via NET/IB/0/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 23 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005210
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005290
jwb0038:16069:16139 [2] NCCL INFO Channel 02/0 : 6[84000] -> 2[84000] [receive] via NET/IB/2/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO Channel 00/0 : 4[3000] -> 0[3000] [receive] via NET/IB/0/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 25 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 23 from local rank 0, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005290
jwb0061:16370:16435 [2] NCCL INFO Channel 02/0 : 2[84000] -> 6[84000] [receive] via NET/IB/2/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 25 from local rank 2, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005350
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000051d0
jwb0061:16371:16434 [3] NCCL INFO Channel 07/0 : 3[c4000] -> 7[c4000] [receive] via NET/IB/3/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO Channel 05/0 : 1[44000] -> 5[44000] [receive] via NET/IB/1/GDRDMA
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 22 from local rank 3, transport 2
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 28 from local rank 1, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80051d0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005350
jwb0038:16068:16138 [1] NCCL INFO Channel 05/0 : 5[44000] -> 1[44000] [receive] via NET/IB/1/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 28 from local rank 1, transport 2
jwb0038:16070:16137 [3] NCCL INFO Channel 07/0 : 7[c4000] -> 3[c4000] [receive] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 22 from local rank 3, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005250
jwb0061:16368:16432 [0] NCCL INFO Channel 04/0 : 0[3000] -> 4[3000] [receive] via NET/IB/0/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 24 from local rank 0, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0052d0
jwb0061:16370:16435 [2] NCCL INFO Channel 06/0 : 2[84000] -> 6[84000] [receive] via NET/IB/2/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 26 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080052d0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005250
jwb0038:16069:16139 [2] NCCL INFO Channel 06/0 : 6[84000] -> 2[84000] [receive] via NET/IB/2/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO Channel 04/0 : 4[3000] -> 0[3000] [receive] via NET/IB/0/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 26 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 24 from local rank 0, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005390
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005210
jwb0061:16371:16434 [3] NCCL INFO Channel 03/0 : 7[c4000] -> 3[c4000] [send] via NET/IB/3/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO Channel 01/0 : 5[44000] -> 1[44000] [send] via NET/IB/1/GDRDMA
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 23 from local rank 3, transport 2
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 29 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005390
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005210
jwb0038:16070:16137 [3] NCCL INFO Channel 03/0 : 3[c4000] -> 7[c4000] [send] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO Channel 01/0 : 1[44000] -> 5[44000] [send] via NET/IB/1/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 23 from local rank 3, transport 2
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 29 from local rank 1, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005290
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005310
jwb0061:16368:16432 [0] NCCL INFO Channel 00/0 : 4[3000] -> 0[3000] [send] via NET/IB/0/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO Channel 02/0 : 6[84000] -> 2[84000] [send] via NET/IB/2/GDRDMA
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 27 from local rank 2, transport 2
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 25 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005290
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005310
jwb0038:16067:16132 [0] NCCL INFO Channel 00/0 : 0[3000] -> 4[3000] [send] via NET/IB/0/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO Channel 02/0 : 2[84000] -> 6[84000] [send] via NET/IB/2/GDRDMA
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 27 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 25 from local rank 0, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180053d0
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005250
jwb0061:16369:16436 [1] NCCL INFO Channel 05/0 : 5[44000] -> 1[44000] [send] via NET/IB/1/GDRDMA
jwb0061:16371:16434 [3] NCCL INFO Channel 07/0 : 7[c4000] -> 3[c4000] [send] via NET/IB/3/GDRDMA
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005250
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200053d0
jwb0038:16070:16137 [3] NCCL INFO Channel 07/0 : 3[c4000] -> 7[c4000] [send] via NET/IB/3/GDRDMA
jwb0038:16068:16138 [1] NCCL INFO Channel 05/0 : 1[44000] -> 5[44000] [send] via NET/IB/1/GDRDMA
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x1532000c2000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x148ad80c7000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14f2200c7000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ce180c6000
jwb0061:16371:16454 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 30062 mtu 5 LID 5809
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x1532000da000
jwb0061:16369:16456 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 30066 mtu 5 LID 5803
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ce180de000
jwb0038:16070:16153 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 17339 mtu 5 LID 5857
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x148ad8025000
jwb0038:16068:16152 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 17337 mtu 5 LID 5814
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14f2200df000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x1531eac00000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x15320e604200
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x148ac3200000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14cdfb200000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x148ae6604200
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14f209200000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14ce1c604200
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14f22e604200
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x1532000da000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ce180e0000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x148ad80de000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14f2200e1000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340052d0
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005350
jwb0061:16368:16432 [0] NCCL INFO Channel 04/0 : 4[3000] -> 0[3000] [send] via NET/IB/0/GDRDMA
jwb0061:16370:16435 [2] NCCL INFO Channel 06/0 : 6[84000] -> 2[84000] [send] via NET/IB/2/GDRDMA
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005350
jwb0038:16069:16139 [2] NCCL INFO Channel 06/0 : 2[84000] -> 6[84000] [send] via NET/IB/2/GDRDMA
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x1466080c5000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ca5c0c5000
jwb0061:16371:16454 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 30063 mtu 5 LID 5809
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x1532000f2000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x1531eb600000
jwb0038:16070:16153 [3] NCCL INFO NET/IB: Dev 3 Port 1 qpn 17340 mtu 5 LID 5857
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x148ad8027000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x15320e686200
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0052d0
jwb0038:16067:16132 [0] NCCL INFO Channel 04/0 : 0[3000] -> 4[3000] [send] via NET/IB/0/GDRDMA
jwb0061:16369:16456 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 30067 mtu 5 LID 5803
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ce180f7000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x148ac0000000
jwb0038:16068:16152 [1] NCCL INFO NET/IB: Dev 1 Port 1 qpn 17338 mtu 5 LID 5814
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x151a6c0c3000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14f2200f8000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x148ae6686200
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14cdf8000000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14ce1c686200
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14f206000000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x145c340c3000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14f22e686200
jwb0061:16370:16455 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 30069 mtu 5 LID 5808
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ca5c0dd000
jwb0038:16069:16154 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 18885 mtu 5 LID 5813
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x1466080dd000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14ca3f200000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14ca60604200
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x1465eb200000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14660c604200
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x1532000f4000
jwb0038:16067:16155 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 20185 mtu 5 LID 5858
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x151a6c0db000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x1532000fb000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x148ad80f5000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x148ad80fc000
jwb0061:16368:16453 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 31129 mtu 5 LID 5812
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x145c340db000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x151a55200000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x151a74604200
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ce180f9000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ce18100000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14f2200fa000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14f220101000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x145c23200000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x145c42604200
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x14ca5c0df000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x1466080df000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x153200117000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x151a6c0dd000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x148ad8118000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1531e8000000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x148ac0a00000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x15320e708200
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x148ae6708200
jwb0061:16370:16455 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 30070 mtu 5 LID 5808
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x14ca5c0f6000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:596 Ib Alloc Size 26560 pointer 0x145c340dd000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x14ca3c000000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14ca60686200
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14f22011d000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ce1811c000
jwb0038:16069:16154 [2] NCCL INFO NET/IB: Dev 2 Port 1 qpn 18886 mtu 5 LID 5813
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x1466080f6000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14f206a00000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14f22e708200
jwb0038:16067:16155 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 20186 mtu 5 LID 5858
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x151a6c0f4000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x1465e8000000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14cdf8a00000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ce1c708200
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x14660c686200
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x151a52000000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x151a74686200
jwb0061:16368:16453 [0] NCCL INFO NET/IB: Dev 0 Port 1 qpn 31130 mtu 5 LID 5812
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:653 Ib Alloc Size 552 pointer 0x145c340f5000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x153200119000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x153200120000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:569 Cuda Alloc Size 9109504 pointer 0x145c20000000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:577 Cuda Host Alloc Size 532480 pointer 0x145c42686200
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x148ad811a000
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x148ad8121000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ca5c0f8000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ca5c0ff000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ce1811e000
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ce18125000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x1466080f8000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x1466080ff000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14f22011f000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14f220126000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x151a6c0f6000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x151a6c0fd000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x145c340f7000
jwb0061:16371:16454 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x15320013b000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x145c340fe000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1531e8a00000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x15320e70a200
jwb0038:16070:16153 [3] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x148ad813c000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x148ac1400000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x148ae670a200
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ca5c11b000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ca3ca00000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ca60708200
jwb0061:16369:16456 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ce18140000
jwb0038:16068:16152 [1] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14f220141000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14f207400000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14f22e70a200
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14cdf9400000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ce1c70a200
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x151a6c119000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14660811b000
jwb0061:16371:16434 [3] NCCL INFO Channel 01/0 : 7[c4000] -> 4[3000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005290
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 24 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e9400000
jwb0061:16371:16434 [3] NCCL INFO Channel 02/0 : 7[c4000] -> 4[3000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000052d0
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 25 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x145c3411a000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x151a52a00000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x151a74708200
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1465e8a00000
jwb0038:16070:16137 [3] NCCL INFO Channel 01/0 : 3[c4000] -> 0[3000] via P2P/IPC/read
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14660c708200
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005290
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 24 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e9a00000
jwb0061:16371:16434 [3] NCCL INFO Channel 05/0 : 7[c4000] -> 4[3000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005310
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 26 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x145c20a00000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x145c42708200
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abe000000
jwb0038:16070:16137 [3] NCCL INFO Channel 02/0 : 3[c4000] -> 0[3000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80052d0
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 25 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e6000000
jwb0061:16371:16434 [3] NCCL INFO Channel 06/0 : 7[c4000] -> 4[3000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005350
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 27 from local rank 3, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abe600000
jwb0038:16070:16137 [3] NCCL INFO Channel 05/0 : 3[c4000] -> 0[3000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005310
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 26 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e6600000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14ca5c11d000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x14ca5c124000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abec00000
jwb0038:16070:16137 [3] NCCL INFO Channel 06/0 : 3[c4000] -> 0[3000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005350
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 27 from local rank 3, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005410
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 30 from local rank 1, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf6000000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005450
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 31 from local rank 1, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abf200000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf6600000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005490
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 32 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005410
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 30 from local rank 1, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x145c3411c000
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x145c34123000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf6c00000
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 33 from local rank 1, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180054d0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f204000000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005450
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 31 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x151a6c11b000
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x151a6c122000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf7200000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f204600000
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:683 Ib Alloc Size 21688 pointer 0x14660811d000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005490
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 32 from local rank 1, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:696 Ib Alloc Size 552 pointer 0x146608124000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f204c00000
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200054d0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 33 from local rank 1, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f205200000
jwb0061:16370:16455 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14ca5c13f000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x14ca3d400000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14ca6070a200
jwb0061:16368:16453 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x145c3413e000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x145c21400000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x145c4270a200
jwb0038:16067:16155 [0] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x151a6c13d000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x151a53400000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x151a7470a200
jwb0038:16069:16154 [2] NCCL INFO transport/net_ib.cc:771 Ib Alloc Size 552 pointer 0x14660813f000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:700 Cuda Alloc Size 9633792 pointer 0x1465e9400000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:704 Cuda Host Alloc Size 8192 pointer 0x14660c70a200
jwb0061:16370:16435 [2] NCCL INFO Channel 01/0 : 6[84000] -> 4[3000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005390
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 28 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca3a000000
jwb0061:16370:16435 [2] NCCL INFO Channel 02/0 : 6[84000] -> 4[3000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0053d0
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 29 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca3a600000
jwb0061:16370:16435 [2] NCCL INFO Channel 05/0 : 6[84000] -> 4[3000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005410
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 30 from local rank 2, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca3ac00000
jwb0061:16370:16435 [2] NCCL INFO Channel 06/0 : 6[84000] -> 4[3000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005450
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 31 from local rank 2, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005310
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 26 from local rank 0, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca3b200000
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1e000000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005350
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 27 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1e600000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005310
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 26 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005390
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 28 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1ec00000
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a50000000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340053d0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 29 from local rank 0, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005350
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 27 from local rank 0, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1f200000
jwb0038:16069:16139 [2] NCCL INFO Channel 01/0 : 2[84000] -> 0[3000] via P2P/IPC/read
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a50600000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005390
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 28 from local rank 2, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005390
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 28 from local rank 0, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e6000000
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a50c00000
jwb0038:16069:16139 [2] NCCL INFO Channel 02/0 : 2[84000] -> 0[3000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080053d0
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 29 from local rank 2, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0053d0
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 29 from local rank 0, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e6600000
jwb0038:16069:16139 [2] NCCL INFO Channel 05/0 : 2[84000] -> 0[3000] via P2P/IPC/read
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a51200000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005410
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 30 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e6c00000
jwb0038:16069:16139 [2] NCCL INFO Channel 06/0 : 2[84000] -> 0[3000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005450
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 31 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e7200000
jwb0061:16371:16434 [3] NCCL INFO Channel 01/0 : 7[c4000] -> 5[44000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005390
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 28 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 30 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005410
jwb0038:16070:16137 [3] NCCL INFO Channel 01/0 : 3[c4000] -> 1[44000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005390
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 28 from local rank 3, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e4600000
jwb0061:16371:16434 [3] NCCL INFO Channel 02/0 : 7[c4000] -> 5[44000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000053d0
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 29 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1d200000
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 31 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005450
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e4c00000
jwb0061:16371:16434 [3] NCCL INFO Channel 05/0 : 7[c4000] -> 5[44000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005410
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 30 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1d800000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005490
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 32 from local rank 0, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e5200000
jwb0061:16371:16434 [3] NCCL INFO Channel 06/0 : 7[c4000] -> 5[44000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005450
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 31 from local rank 3, transport 0
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1a000000
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 30 from local rank 0, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005410
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340054d0
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 33 from local rank 0, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e5800000
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abd200000
jwb0038:16070:16137 [3] NCCL INFO Channel 02/0 : 3[c4000] -> 1[44000] via P2P/IPC/read
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c1a600000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80053d0
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 29 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4f200000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005450
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 31 from local rank 0, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148abd800000
jwb0038:16070:16137 [3] NCCL INFO Channel 05/0 : 3[c4000] -> 1[44000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005410
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 30 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4f800000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005490
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 32 from local rank 0, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148aba000000
jwb0038:16070:16137 [3] NCCL INFO Channel 06/0 : 3[c4000] -> 1[44000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005450
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 31 from local rank 3, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4c000000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0054d0
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 33 from local rank 0, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148aba600000
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4c600000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005510
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 34 from local rank 1, transport 0
jwb0061:16371:16434 [3] NCCL INFO Channel 00/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005490
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 32 from local rank 3, transport 0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 32 from local rank 2, transport 0
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005490
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf5200000
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e3800000
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005550
jwb0061:16369:16456 [1] NCCL INFO New proxy recv connection 35 from local rank 1, transport 0
jwb0061:16371:16434 [3] NCCL INFO Channel 04/0 : 7[c4000] -> 6[84000] via P2P/IPC/read
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000054d0
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 33 from local rank 3, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca39200000
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0054d0
jwb0061:16370:16455 [2] NCCL INFO New proxy recv connection 33 from local rank 2, transport 0
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 34 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005510
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14cdf5800000
jwb0038:16070:16137 [3] NCCL INFO Channel 00/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0061:16369:16436 [1] NCCL INFO Channel 00/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005490
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 32 from local rank 3, transport 0
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005590
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 36 from local rank 1, transport 0
jwb0061:16371:16454 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1531e0000000
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 32 from local rank 2, transport 0
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005490
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 34 from local rank 0, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005510
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14ca39800000
jwb0061:16370:16435 [2] NCCL INFO Channel 00/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005510
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 34 from local rank 2, transport 0
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdf2000000
jwb0061:16369:16436 [1] NCCL INFO Channel 04/0 : 5[44000] -> 4[3000] via P2P/IPC/read
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c18600000
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 34 from local rank 0, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005510
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180055d0
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 37 from local rank 1, transport 0
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005550
jwb0061:16368:16453 [0] NCCL INFO New proxy recv connection 35 from local rank 0, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca36000000
jwb0061:16370:16435 [2] NCCL INFO Channel 04/0 : 6[84000] -> 5[44000] via P2P/IPC/read
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005550
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 35 from local rank 2, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f203200000
jwb0061:16369:16456 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14cdf2600000
jwb0061:16368:16453 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x145c18c00000
jwb0038:16068:16152 [1] NCCL INFO New proxy recv connection 35 from local rank 1, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005550
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148ab8600000
jwb0038:16070:16137 [3] NCCL INFO Channel 04/0 : 3[c4000] -> 2[84000] via P2P/IPC/read
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465e5200000
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80054d0
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 33 from local rank 3, transport 0
jwb0061:16370:16455 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14ca36600000
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4a600000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080054d0
jwb0038:16069:16154 [2] NCCL INFO New proxy recv connection 33 from local rank 2, transport 0
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005550
jwb0038:16067:16155 [0] NCCL INFO New proxy recv connection 35 from local rank 0, transport 0
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x14f203800000
jwb0038:16068:16138 [1] NCCL INFO Channel 00/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005590
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 36 from local rank 1, transport 0
jwb0038:16067:16155 [0] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x151a4ac00000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:449 Cuda Alloc Size 6291456 pointer 0x1465e5800000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f200000000
jwb0038:16069:16139 [2] NCCL INFO Channel 00/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16068:16138 [1] NCCL INFO Channel 04/0 : 1[44000] -> 0[3000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005510
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 34 from local rank 2, transport 0
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200055d0
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 37 from local rank 1, transport 0
jwb0038:16070:16153 [3] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x148ab8c00000
jwb0038:16068:16152 [1] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x14f200600000
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e2000000
jwb0038:16069:16139 [2] NCCL INFO Channel 04/0 : 2[84000] -> 1[44000] via P2P/IPC/read
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005550
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 35 from local rank 2, transport 0
jwb0038:16069:16154 [2] NCCL INFO transport/p2p.cc:430 Cuda Alloc Size 6291456 pointer 0x1465e2600000
jwb0038:16067:16132 [0] NCCL INFO Connected all trees
jwb0038:16067:16132 [0] NCCL INFO Latency/AlgBw | Tree/ LL | Tree/ LL128 | Tree/Simple | Ring/ LL | Ring/ LL128 | Ring/Simple | CollNetDirect/ LL | CollNetDirect/ LL128 | CollNetDirect/Simple | CollNetChain/ LL | CollNetChain/ LL128 | CollNetChain/Simple |
jwb0038:16067:16132 [0] NCCL INFO Max NThreads | 512 | 640 | 512 | 512 | 640 | 512 | 0 | 0 | 512 | 0 | 0 | 512 |
jwb0038:16067:16132 [0] NCCL INFO Broadcast | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 25.2/ 22.5 | 42.0/ 0.0 | 232.4/ 96.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 |
jwb0038:16067:16132 [0] NCCL INFO Reduce | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 25.2/ 22.5 | 42.0/ 0.0 | 232.4/ 96.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 |
jwb0038:16067:16132 [0] NCCL INFO AllGather | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 11.4/ 25.7 | 25.4/ 0.0 | 38.4/ 109.7 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 |
jwb0038:16067:16132 [0] NCCL INFO ReduceScatter | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 11.4/ 25.7 | 25.4/ 0.0 | 38.4/ 109.7 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 | 0.0/ 0.0 |
jwb0038:16067:16132 [0] NCCL INFO AllReduce | 20.0/ 11.2 | 28.9/ 0.0 | 224.0/ 47.2 | 22.9/ 12.9 | 45.0/ 0.0 | 80.8/ 54.9 | 7.4/ 0.0 | 7.4/ 0.0 | 29.7/ 0.0 | 4.4/ 0.0 | 4.4/ 0.0 | 48.0/ 0.0 |
jwb0038:16067:16132 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0038:16067:16132 [0] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 36 from local rank 0, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connected all trees
jwb0061:16368:16432 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0061:16368:16432 [0] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 36 from local rank 0, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connected all trees
jwb0061:16371:16434 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0061:16371:16434 [3] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 34 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connected all trees
jwb0038:16070:16137 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0038:16070:16137 [3] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 34 from local rank 3, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connected all trees
jwb0061:16369:16436 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0061:16369:16436 [1] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 38 from local rank 1, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connected all trees
jwb0061:16370:16435 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0061:16370:16435 [2] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 36 from local rank 2, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connected all trees
jwb0038:16068:16138 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0038:16068:16138 [1] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 38 from local rank 1, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connected all trees
jwb0038:16069:16139 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
jwb0038:16069:16139 [2] NCCL INFO 8 coll channels, 8 p2p channels, 2 p2p channels per peer
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 36 from local rank 2, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005590
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x151a7045dfb0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x151a704b6c80
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x151a7045dfb0
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16067:16132 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x151a48000000
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005590
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16368:16432 [0] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x145c3c45dfb0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16368:16432 [0] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x145c3c4b6820
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16368:16432 [0] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x145c3c45dfb0
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16368:16432 [0] NCCL INFO GPU Direct RDMA Enabled for GPU 3000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x145c16000000
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005510
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005510
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16070:16137 [3] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x148ae045dfb0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16070:16137 [3] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x148ae04c2650
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16070:16137 [3] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x148ae045dfb0
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16070:16137 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x15320845dfb0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x1532084c2600
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x15320845dfb0
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO GPU Direct RDMA Enabled for GPU c4000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c340055d0
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 37 from local rank 3, transport 2
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 37 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c0055d0
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 39 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005650
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x145c17000000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x148ab6000000
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x151a49000000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14f1fe600000
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 39 from local rank 0, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005650
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14cdf0600000
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 40 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005690
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 40 from local rank 3, transport 2
jwb0038:16070:16137 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x1466080055d0
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 37 from local rank 3, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005690
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14f1fc000000
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 37 from local rank 3, transport 2
jwb0061:16371:16434 [3] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c0055d0
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1465e0600000
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 38 from local rank 0, transport 2
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005610
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14cdee000000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1531de000000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14ca34600000
jwb0038:16067:16132 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005550
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 35 from local rank 0, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005590
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce18005610
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f220005610
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x14f22845dfb0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x14f2284ab120
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x14f22845dfb0
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 1
jwb0038:16068:16138 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1465de000000
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x14ca5845dfb0
jwb0061:16369:16436 [1] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x14ce1445dfb0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x14ca584b6ea0
jwb0061:16369:16436 [1] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x14ce144ab080
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x14ca5845dfb0
jwb0061:16369:16436 [1] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x14ce1445dfb0
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 0 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 2 (distance 3 <= 4), read 1
jwb0061:16370:16435 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16369:16436 [1] NCCL INFO GPU Direct RDMA Enabled for GPU 44000 / HCA 3 (distance 3 <= 4), read 1
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14cdef000000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14f1fd000000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x148ab7000000
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14ca32000000
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 38 from local rank 0, transport 2
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005610
jwb0061:16368:16432 [0] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005550
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 35 from local rank 0, transport 2
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14ca33000000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1531df000000
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005590
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO Mem Realloc old size 0, new size 4 pointer 0x14660445dfb0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO Mem Realloc old size 4, new size 8 pointer 0x1466044b6ea0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO Mem Realloc old size 8, new size 12 pointer 0x14660445dfb0
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 0 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 1 (distance 3 <= 4), read 1
jwb0038:16069:16139 [2] NCCL INFO GPU Direct RDMA Enabled for GPU 84000 / HCA 3 (distance 3 <= 4), read 1
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1465df000000
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 38 from local rank 2, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005610
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 38 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005610
jwb0061:16368:16453 [0] NCCL INFO New proxy send connection 39 from local rank 1, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 0 -> connection 0x145c34005650
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14ce180056d0
jwb0061:16369:16456 [1] NCCL INFO New proxy send connection 41 from local rank 2, transport 2
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x151a46000000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x145c14000000
jwb0061:16369:16456 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14cdec000000
jwb0061:16368:16453 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x145c15000000
jwb0038:16067:16155 [0] NCCL INFO New proxy send connection 39 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 0 -> connection 0x151a6c005650
jwb0038:16068:16152 [1] NCCL INFO New proxy send connection 41 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 1 -> connection 0x14f2200056d0
jwb0038:16067:16155 [0] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x151a47000000
jwb0038:16068:16152 [1] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14f1fa000000
jwb0038:16069:16154 [2] NCCL INFO New proxy send connection 39 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 2 -> connection 0x146608005650
jwb0061:16370:16455 [2] NCCL INFO New proxy send connection 39 from local rank 1, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 2 -> connection 0x14ca5c005650
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 36 from local rank 2, transport 2
jwb0061:16370:16435 [2] NCCL INFO Connection to proxy localRank 3 -> connection 0x153200005590
jwb0061:16370:16455 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x14ca30000000
jwb0038:16069:16154 [2] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1465dc000000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1531dc000000
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 36 from local rank 2, transport 2
jwb0038:16069:16139 [2] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad8005590
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x148ab4000000
jwb0038:16070:16153 [3] NCCL INFO New proxy send connection 37 from local rank 1, transport 2
jwb0038:16068:16138 [1] NCCL INFO Connection to proxy localRank 3 -> connection 0x148ad80055d0
jwb0061:16371:16454 [3] NCCL INFO New proxy send connection 37 from local rank 1, transport 2
jwb0061:16369:16436 [1] NCCL INFO Connection to proxy localRank 3 -> connection 0x1532000055d0
jwb0038:16069:16139 [2] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x14660d008000
jwb0038:16068:16138 [1] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x14f22f008000
jwb0038:16070:16137 [3] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x148ae7008000
jwb0038:16067:16132 [0] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x151a75008000
jwb0038:16070:16153 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x148ab5000000
jwb0061:16371:16454 [3] NCCL INFO transport/net.cc:381 Cuda Alloc Size 16777216 pointer 0x1531dd000000
jwb0061:16370:16435 [2] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x14ca61008000
jwb0061:16368:16432 [0] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x145c43008000
jwb0061:16369:16436 [1] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x14ce1d008000
jwb0061:16371:16434 [3] NCCL INFO init.cc:367 Cuda Alloc Size 5168 pointer 0x15320f008000
jwb0038:16068:16138 [1] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x14f1f8000000
jwb0038:16068:16138 [1] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x14f22e70c200
jwb0038:16068:16138 [1] NCCL INFO comm 0x37fab430 rank 1 nranks 8 cudaDev 1 busId 44000 - Init COMPLETE
jwb0038:16068:16068 [1] NCCL INFO AllReduce: opCount 0 sendbuff 0x14f266c00000 recvbuff 0x14f266c00000 count 1 datatype 1 op 0 root 0 comm 0x37fab430 [nranks=8] stream 0x37d9e480
jwb0038:16068:16068 [1] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16370:16435 [2] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x14ca2e000000
jwb0061:16370:16435 [2] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x14ca6070c200
jwb0061:16370:16435 [2] NCCL INFO comm 0x3724b850 rank 6 nranks 8 cudaDev 2 busId 84000 - Init COMPLETE
jwb0038:16070:16137 [3] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x148ab2000000
jwb0061:16369:16436 [1] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x14cdea000000
jwb0061:16369:16436 [1] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x14ce1c70c200
jwb0061:16369:16436 [1] NCCL INFO comm 0x37bcc8f0 rank 5 nranks 8 cudaDev 1 busId 44000 - Init COMPLETE
jwb0038:16067:16132 [0] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x151a44000000
jwb0061:16370:16370 [2] NCCL INFO AllReduce: opCount 0 sendbuff 0x14ca96c00000 recvbuff 0x14ca96c00000 count 1 datatype 1 op 0 root 0 comm 0x3724b850 [nranks=8] stream 0x3703fe90
jwb0038:16070:16137 [3] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x148ae670c200
jwb0061:16370:16370 [2] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x145c12000000
jwb0038:16070:16137 [3] NCCL INFO comm 0x38b2ffe0 rank 3 nranks 8 cudaDev 3 busId c4000 - Init COMPLETE
jwb0038:16069:16139 [2] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x1465da000000
jwb0061:16368:16432 [0] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x145c4270c200
jwb0061:16369:16369 [1] NCCL INFO AllReduce: opCount 0 sendbuff 0x14ce52c00000 recvbuff 0x14ce52c00000 count 1 datatype 1 op 0 root 0 comm 0x37bcc8f0 [nranks=8] stream 0x379c0450
jwb0061:16369:16369 [1] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16368:16432 [0] NCCL INFO comm 0x38855390 rank 4 nranks 8 cudaDev 0 busId 3000 - Init COMPLETE
jwb0061:16371:16434 [3] NCCL INFO init.cc:392 Cuda Host Alloc Size 33554432 pointer 0x1531da000000
jwb0038:16069:16139 [2] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x14660c70c200
jwb0038:16069:16139 [2] NCCL INFO comm 0x375f9e90 rank 2 nranks 8 cudaDev 2 busId 84000 - Init COMPLETE
jwb0061:16371:16434 [3] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x15320e70c200
jwb0038:16070:16070 [3] NCCL INFO AllReduce: opCount 0 sendbuff 0x148b1cc00000 recvbuff 0x148b1cc00000 count 1 datatype 1 op 0 root 0 comm 0x38b2ffe0 [nranks=8] stream 0x3891fce0
jwb0038:16070:16070 [3] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0038:16067:16132 [0] NCCL INFO init.cc:398 Cuda Host Alloc Size 128 pointer 0x151a7470c200
jwb0061:16371:16434 [3] NCCL INFO comm 0x36170d70 rank 7 nranks 8 cudaDev 3 busId c4000 - Init COMPLETE
jwb0038:16067:16132 [0] NCCL INFO comm 0x3868f010 rank 0 nranks 8 cudaDev 0 busId 3000 - Init COMPLETE
jwb0061:16368:16368 [0] NCCL INFO AllReduce: opCount 0 sendbuff 0x145c78c00000 recvbuff 0x145c78c00000 count 1 datatype 1 op 0 root 0 comm 0x38855390 [nranks=8] stream 0x386499d0
jwb0061:16368:16368 [0] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0061:16371:16371 [3] NCCL INFO AllReduce: opCount 0 sendbuff 0x153246e00000 recvbuff 0x153246e00000 count 1 datatype 1 op 0 root 0 comm 0x36170d70 [nranks=8] stream 0x35f648d0
jwb0061:16371:16371 [3] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0038:16067:16067 [0] NCCL INFO AllReduce: opCount 0 sendbuff 0x151aaac00000 recvbuff 0x151aaac00000 count 1 datatype 1 op 0 root 0 comm 0x3868f010 [nranks=8] stream 0x3862d2a0
jwb0038:16067:16067 [0] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
jwb0038:16069:16069 [2] NCCL INFO AllReduce: opCount 0 sendbuff 0x146646c00000 recvbuff 0x146646c00000 count 1 datatype 1 op 0 root 0 comm 0x375f9e90 [nranks=8] stream 0x373ed9f0
jwb0038:16069:16069 [2] NCCL INFO misc/utils.cc:235 memory stack hunk malloc(65536)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment