Skip to content

Instantly share code, notes, and snippets.

@pentschev
Last active August 14, 2019 17:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pentschev/b31f3d1f9bce000939754c156689ee4f to your computer and use it in GitHub Desktop.
Save pentschev/b31f3d1f9bce000939754c156689ee4f to your computer and use it in GitHub Desktop.
--- ucx_info_b 2019-08-08 11:01:32.820637475 -0700
+++ ucx_from_source_info_b 2019-08-08 11:02:15.976537113 -0700
@@ -8,6 +8,7 @@
#define HAVE_CUDA 1
#define HAVE_CUDA_H 1
#define HAVE_CUDA_RUNTIME_H 1
+#define HAVE_DC_EXP 1
#define HAVE_DECL_ASPRINTF 1
#define HAVE_DECL_BASENAME 1
#define HAVE_DECL_CPU_ISSET 1
@@ -24,48 +25,58 @@
#define HAVE_DECL_IBV_CREATE_SRQ_EX 1
#define HAVE_DECL_IBV_EVENT_GID_CHANGE 1
#define HAVE_DECL_IBV_EVENT_TYPE_STR 1
-#define HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR 0
-#define HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND 0
+#define HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR 1
+#define HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND 1
#define HAVE_DECL_IBV_EXP_ALLOC_DM 0
-#define HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE 0
-#define HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN 0
-#define HAVE_DECL_IBV_EXP_CQ_MODERATION 0
-#define HAVE_DECL_IBV_EXP_CREATE_QP 0
+#define HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE 1
+#define HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN 1
+#define HAVE_DECL_IBV_EXP_CQ_MODERATION 1
+#define HAVE_DECL_IBV_EXP_CREATE_QP 1
+#define HAVE_DECL_IBV_EXP_CREATE_RES_DOMAIN 1
#define HAVE_DECL_IBV_EXP_CREATE_SRQ 0
#define HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT 0
+#define HAVE_DECL_IBV_EXP_DESTROY_RES_DOMAIN 1
#define HAVE_DECL_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS 0
#define HAVE_DECL_IBV_EXP_DEVICE_ATTR_RESERVED_2 0
-#define HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT 0
-#define HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE 0
+#define HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT 1
+#define HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE 1
#define HAVE_DECL_IBV_EXP_MR_FIXED_BUFFER_SIZE 0
-#define HAVE_DECL_IBV_EXP_MR_INDIRECT_KLMS 0
+#define HAVE_DECL_IBV_EXP_MR_INDIRECT_KLMS 1
#define HAVE_DECL_IBV_EXP_ODP_SUPPORT_IMPLICIT 0
-#define HAVE_DECL_IBV_EXP_POST_SEND 0
-#define HAVE_DECL_IBV_EXP_PREFETCH_MR 0
-#define HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS 0
-#define HAVE_DECL_IBV_EXP_QPT_DC_INI 0
-#define HAVE_DECL_IBV_EXP_QP_CREATE_UMR 0
-#define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG 0
+#define HAVE_DECL_IBV_EXP_POST_SEND 1
+#define HAVE_DECL_IBV_EXP_PREFETCH_MR 1
+#define HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS 1
+#define HAVE_DECL_IBV_EXP_QPT_DC_INI 1
+#define HAVE_DECL_IBV_EXP_QP_CREATE_UMR 1
+#define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG 1
+#define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_RES_DOMAIN 1
#define HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT 0
-#define HAVE_DECL_IBV_EXP_QUERY_DEVICE 0
-#define HAVE_DECL_IBV_EXP_QUERY_GID_ATTR 0
-#define HAVE_DECL_IBV_EXP_REG_MR 0
-#define HAVE_DECL_IBV_EXP_SEND_EXT_ATOMIC_INLINE 0
-#define HAVE_DECL_IBV_EXP_SETENV 0
-#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP 0
-#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD 0
-#define HAVE_DECL_IBV_EXP_WR_NOP 0
+#define HAVE_DECL_IBV_EXP_QUERY_DEVICE 1
+#define HAVE_DECL_IBV_EXP_QUERY_GID_ATTR 1
+#define HAVE_DECL_IBV_EXP_REG_MR 1
+#define HAVE_DECL_IBV_EXP_RES_DOMAIN_THREAD_MODEL 1
+#define HAVE_DECL_IBV_EXP_SEND_EXT_ATOMIC_INLINE 1
+#define HAVE_DECL_IBV_EXP_SETENV 1
+#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP 1
+#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD 1
+#define HAVE_DECL_IBV_EXP_WR_NOP 1
#define HAVE_DECL_IBV_GET_ASYNC_EVENT 1
#define HAVE_DECL_IBV_GET_DEVICE_NAME 1
#define HAVE_DECL_IBV_LINK_LAYER_ETHERNET 1
#define HAVE_DECL_IBV_LINK_LAYER_INFINIBAND 1
+#define HAVE_DECL_IBV_MLX5_EXP_GET_CQ_INFO 1
+#define HAVE_DECL_IBV_MLX5_EXP_GET_QP_INFO 1
+#define HAVE_DECL_IBV_MLX5_EXP_GET_SRQ_INFO 1
+#define HAVE_DECL_IBV_MLX5_EXP_UPDATE_CQ_CI 1
+#define HAVE_DECL_IBV_QUERY_DEVICE_EX 1
#define HAVE_DECL_IBV_QUERY_GID 1
#define HAVE_DECL_IBV_WC_STATUS_STR 1
#define HAVE_DECL_MADV_FREE 0
#define HAVE_DECL_MADV_REMOVE 1
+#define HAVE_DECL_MLX5_WQE_CTRL_SOLICITED 1
#define HAVE_DECL_POSIX_MADV_DONTNEED 1
-#define HAVE_DECL_PR_SET_PTRACER 0
-#define HAVE_DECL_SPEED_UNKNOWN 0
+#define HAVE_DECL_PR_SET_PTRACER 1
+#define HAVE_DECL_SPEED_UNKNOWN 1
#define HAVE_DECL_STRDUPA 1
#define HAVE_DECL_STRERROR_R 1
#define HAVE_DECL_SYS_BRK 1
@@ -78,14 +89,22 @@
#define HAVE_DECL_SYS_SHMDT 1
#define HAVE_DECL___PPC_GET_TIMEBASE_FREQ 0
#define HAVE_DLFCN_H 1
+#define HAVE_EXP_UMR 1
#define HAVE_HW_TIMER 1
#define HAVE_IB 1
+#define HAVE_IBV_EXP_QP_CREATE_UMR 1
+#define HAVE_IBV_EXP_RES_DOMAIN 1
+#define HAVE_IB_EXT_ATOMICS 1
+#define HAVE_INFINIBAND_MLX5_HW_H 1
#define HAVE_INTTYPES_H 1
#define HAVE_LIBRT 1
#define HAVE_MALLOC_GET_STATE 1
#define HAVE_MALLOC_HOOK 1
#define HAVE_MALLOC_SET_STATE 1
+#define HAVE_MASKED_ATOMICS_ENDIANNESS 1
#define HAVE_MEMORY_H 1
+#define HAVE_MLX5_HW 1
+#define HAVE_MLX5_HW_UD 1
#define HAVE_NUMA 1
#define HAVE_NUMAIF_H 1
#define HAVE_NUMA_H 1
@@ -96,12 +115,23 @@
#define HAVE_STRING_H 1
#define HAVE_STRUCT_BITMASK 1
#define HAVE_STRUCT_DL_PHDR_INFO 1
+#define HAVE_STRUCT_IBV_ASYNC_EVENT_ELEMENT_DCT 1
+#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXP_DEVICE_CAP_FLAGS 1
+#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS 1
+#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS_PER_TRANSPORT_CAPS_DC_ODP_CAPS 1
+#define HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_MAX_INL_RECV 1
+#define HAVE_STRUCT_IBV_MLX5_QP_INFO_BF_NEED_LOCK 1
+#define HAVE_STRUCT_MLX5_AH_IBV_AH 1
+#define HAVE_STRUCT_MLX5_GRH_AV_RMAC 1
+#define HAVE_STRUCT_MLX5_WQE_AV_BASE 1
#define HAVE_SYS_STAT_H 1
#define HAVE_SYS_TYPES_H 1
+#define HAVE_TL_DC 1
#define HAVE_TL_RC 1
#define HAVE_TL_UD 1
#define HAVE_UCM_PTMALLOC286 1
#define HAVE_UNISTD_H 1
+#define HAVE_VERBS_EXP_H 1
#define HAVE___CLEAR_CACHE 1
#define HAVE___CURBRK 1
#define LT_OBJDIR ".libs/"
@@ -117,7 +147,7 @@
#define STRERROR_R_CHAR_P 1
#define UCM_BISTRO_HOOKS 1
#define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_TRACE_POLL
-#define UCX_CONFIGURE_FLAGS "--build=x86_64-conda_cos6-linux-gnu --host=x86_64-conda_cos6-linux-gnu --prefix=/home/nfs/pentschev/miniconda3/envs/ucx --disable-cma --enable-mt --with-gnu-ld --with-rdmacm=/usr --with-cuda=/usr/local/cuda"
+#define UCX_CONFIGURE_FLAGS "--prefix=/home/nfs/pentschev/miniconda3/envs/ucx-from-source --enable-debug --with-cuda=/usr/local/cuda-9.2/ --enable-mt --disable-cma CPPFLAGS=-I//usr/local/cuda-9.2/include"
#define UCX_MODULE_SUBDIR "ucx"
#define VERSION "1.7"
#define restrict __restrict
@@ -125,6 +155,6 @@
#define ucm_MODULES ":cuda"
#define uct_MODULES ":cuda:ib:rdmacm"
#define uct_cuda_MODULES ""
-#define uct_ib_MODULES ""
+#define uct_ib_MODULES ":cm"
#define uct_rocm_MODULES ""
#define ucx_perftest_MODULES ":cuda"
--- ucx_info_c 2019-08-08 11:01:44.940609301 -0700
+++ ucx_from_source_info_c 2019-08-08 11:02:20.992525440 -0700
@@ -17,7 +17,7 @@
UCX_PROFILE_FILE=
UCX_PROFILE_LOG_SIZE=4M
UCX_RCACHE_CHECK_PFN=n
-UCX_MODULE_DIR=/home/nfs/pentschev/miniconda3/envs/ucx/lib/ucx
+UCX_MODULE_DIR=/home/nfs/pentschev/miniconda3/envs/ucx-from-source/lib/ucx
UCX_MODULE_LOG_LEVEL=TRACE
UCX_MEM_LOG_LEVEL=WARN
UCX_MEM_ALLOC_ALIGN=16
@@ -116,6 +116,7 @@
UCX_IB_GID_INDEX=auto
UCX_IB_SUBNET_PREFIX=
UCX_IB_GPU_DIRECT_RDMA=try
+UCX_IB_MAX_INLINE_KLM_LIST=inf
UCX_IB_PCI_BW=
UCX_RC_VERBS_MAX_BCOPY=8K
UCX_RC_VERBS_ALLOC=huge,thp,md,mmap,heap
@@ -127,6 +128,10 @@
UCX_RC_VERBS_TX_INLINE_RESP=32
UCX_RC_VERBS_TX_MIN_SGE=3
UCX_RC_VERBS_TX_CQ_MODERATION=64
+UCX_RC_VERBS_TX_EVENT_MOD_COUNT=0
+UCX_RC_VERBS_TX_EVENT_MOD_PERIOD=0.00us
+UCX_RC_VERBS_RX_EVENT_MOD_COUNT=0
+UCX_RC_VERBS_RX_EVENT_MOD_PERIOD=0.00us
UCX_RC_VERBS_TX_MAX_BUFS=-1
UCX_RC_VERBS_TX_BUFS_GROW=1024
UCX_RC_VERBS_RX_QUEUE_LEN=4095
@@ -142,6 +147,7 @@
UCX_RC_VERBS_HOP_LIMIT=255
UCX_RC_VERBS_LID_PATH_BITS=0-17
UCX_RC_VERBS_PKEY=0x7fff
+UCX_RC_VERBS_RESOURCE_DOMAIN=y
UCX_RC_VERBS_PATH_MTU=default
UCX_RC_VERBS_MAX_RD_ATOMIC=4
UCX_RC_VERBS_TIMEOUT=1000000.00us
@@ -156,6 +162,104 @@
UCX_RC_VERBS_TX_MAX_WR=inf
UCX_RC_VERBS_FENCE=y
UCX_RC_VERBS_FC_SOFT_THRESH=0.500
+UCX_RC_MLX5_MAX_BCOPY=8K
+UCX_RC_MLX5_ALLOC=huge,thp,md,mmap,heap
+UCX_RC_MLX5_FAILURE=ERROR
+UCX_RC_MLX5_TX_QUEUE_LEN=256
+UCX_RC_MLX5_TX_MAX_BATCH=16
+UCX_RC_MLX5_TX_MAX_POLL=16
+UCX_RC_MLX5_TX_MIN_INLINE=64
+UCX_RC_MLX5_TX_INLINE_RESP=32
+UCX_RC_MLX5_TX_MIN_SGE=3
+UCX_RC_MLX5_TX_CQ_MODERATION=64
+UCX_RC_MLX5_TX_EVENT_MOD_COUNT=0
+UCX_RC_MLX5_TX_EVENT_MOD_PERIOD=0.00us
+UCX_RC_MLX5_RX_EVENT_MOD_COUNT=0
+UCX_RC_MLX5_RX_EVENT_MOD_PERIOD=0.00us
+UCX_RC_MLX5_TX_MAX_BUFS=-1
+UCX_RC_MLX5_TX_BUFS_GROW=1024
+UCX_RC_MLX5_RX_QUEUE_LEN=4095
+UCX_RC_MLX5_RX_MAX_BATCH=16
+UCX_RC_MLX5_RX_MAX_POLL=16
+UCX_RC_MLX5_RX_INLINE=64
+UCX_RC_MLX5_RX_MAX_BUFS=-1
+UCX_RC_MLX5_RX_BUFS_GROW=0
+UCX_RC_MLX5_ADDR_TYPE=auto
+UCX_RC_MLX5_IS_GLOBAL=n
+UCX_RC_MLX5_SL=0
+UCX_RC_MLX5_TRAFFIC_CLASS=auto
+UCX_RC_MLX5_HOP_LIMIT=255
+UCX_RC_MLX5_LID_PATH_BITS=0-17
+UCX_RC_MLX5_PKEY=0x7fff
+UCX_RC_MLX5_RESOURCE_DOMAIN=y
+UCX_RC_MLX5_PATH_MTU=default
+UCX_RC_MLX5_MAX_RD_ATOMIC=4
+UCX_RC_MLX5_TIMEOUT=1000000.00us
+UCX_RC_MLX5_RETRY_COUNT=7
+UCX_RC_MLX5_RNR_TIMEOUT=1000.00us
+UCX_RC_MLX5_RNR_RETRY_COUNT=7
+UCX_RC_MLX5_TX_CQ_LEN=4096
+UCX_RC_MLX5_FC_ENABLE=y
+UCX_RC_MLX5_FC_WND_SIZE=512
+UCX_RC_MLX5_FC_HARD_THRESH=0.250
+UCX_RC_MLX5_MMIO_MODE=auto
+UCX_RC_MLX5_TX_MAX_BB=inf
+UCX_RC_MLX5_TM_ENABLE=n
+UCX_RC_MLX5_TM_LIST_SIZE=1024
+UCX_RC_MLX5_TM_MAX_BCOPY=48K
+UCX_RC_MLX5_FC_SOFT_THRESH=0.500
+UCX_DC_MLX5_MAX_BCOPY=8K
+UCX_DC_MLX5_ALLOC=huge,thp,md,mmap,heap
+UCX_DC_MLX5_FAILURE=ERROR
+UCX_DC_MLX5_TX_QUEUE_LEN=128
+UCX_DC_MLX5_TX_MAX_BATCH=16
+UCX_DC_MLX5_TX_MAX_POLL=16
+UCX_DC_MLX5_TX_MIN_INLINE=64
+UCX_DC_MLX5_TX_INLINE_RESP=32
+UCX_DC_MLX5_TX_MIN_SGE=3
+UCX_DC_MLX5_TX_CQ_MODERATION=64
+UCX_DC_MLX5_TX_EVENT_MOD_COUNT=0
+UCX_DC_MLX5_TX_EVENT_MOD_PERIOD=0.00us
+UCX_DC_MLX5_RX_EVENT_MOD_COUNT=0
+UCX_DC_MLX5_RX_EVENT_MOD_PERIOD=0.00us
+UCX_DC_MLX5_TX_MAX_BUFS=-1
+UCX_DC_MLX5_TX_BUFS_GROW=1024
+UCX_DC_MLX5_RX_QUEUE_LEN=4095
+UCX_DC_MLX5_RX_MAX_BATCH=16
+UCX_DC_MLX5_RX_MAX_POLL=16
+UCX_DC_MLX5_RX_INLINE=64
+UCX_DC_MLX5_RX_MAX_BUFS=-1
+UCX_DC_MLX5_RX_BUFS_GROW=0
+UCX_DC_MLX5_ADDR_TYPE=auto
+UCX_DC_MLX5_IS_GLOBAL=n
+UCX_DC_MLX5_SL=0
+UCX_DC_MLX5_TRAFFIC_CLASS=auto
+UCX_DC_MLX5_HOP_LIMIT=255
+UCX_DC_MLX5_LID_PATH_BITS=0-17
+UCX_DC_MLX5_PKEY=0x7fff
+UCX_DC_MLX5_RESOURCE_DOMAIN=y
+UCX_DC_MLX5_PATH_MTU=default
+UCX_DC_MLX5_MAX_RD_ATOMIC=4
+UCX_DC_MLX5_TIMEOUT=1000000.00us
+UCX_DC_MLX5_RETRY_COUNT=7
+UCX_DC_MLX5_RNR_TIMEOUT=1000.00us
+UCX_DC_MLX5_RNR_RETRY_COUNT=7
+UCX_DC_MLX5_TX_CQ_LEN=4096
+UCX_DC_MLX5_FC_ENABLE=y
+UCX_DC_MLX5_FC_WND_SIZE=512
+UCX_DC_MLX5_FC_HARD_THRESH=0.250
+UCX_DC_MLX5_MMIO_MODE=auto
+UCX_DC_MLX5_TX_MAX_BB=inf
+UCX_DC_MLX5_TM_ENABLE=n
+UCX_DC_MLX5_TM_LIST_SIZE=1024
+UCX_DC_MLX5_TM_MAX_BCOPY=48K
+UCX_DC_MLX5_RX_QUEUE_LEN_INIT=128
+UCX_DC_MLX5_NUM_DCI=8
+UCX_DC_MLX5_TX_POLICY=dcs_quota
+UCX_DC_MLX5_RAND_DCI_SEED=0
+UCX_DC_MLX5_QUOTA=32
+UCX_DC_MLX5_COMPACT_AV=y
+UCX_DC_MLX5_MMIO_MODE=auto
UCX_UD_VERBS_MAX_BCOPY=8K
UCX_UD_VERBS_ALLOC=huge,thp,md,mmap,heap
UCX_UD_VERBS_FAILURE=ERROR
@@ -166,6 +270,10 @@
UCX_UD_VERBS_TX_INLINE_RESP=32
UCX_UD_VERBS_TX_MIN_SGE=3
UCX_UD_VERBS_TX_CQ_MODERATION=64
+UCX_UD_VERBS_TX_EVENT_MOD_COUNT=0
+UCX_UD_VERBS_TX_EVENT_MOD_PERIOD=0.00us
+UCX_UD_VERBS_RX_EVENT_MOD_COUNT=0
+UCX_UD_VERBS_RX_EVENT_MOD_PERIOD=0.00us
UCX_UD_VERBS_TX_MAX_BUFS=-1
UCX_UD_VERBS_TX_BUFS_GROW=1024
UCX_UD_VERBS_RX_QUEUE_LEN=4096
@@ -181,10 +289,80 @@
UCX_UD_VERBS_HOP_LIMIT=255
UCX_UD_VERBS_LID_PATH_BITS=0-17
UCX_UD_VERBS_PKEY=0x7fff
+UCX_UD_VERBS_RESOURCE_DOMAIN=y
UCX_UD_VERBS_RX_QUEUE_LEN_INIT=128
UCX_UD_VERBS_TIMEOUT=300000000.00us
UCX_UD_VERBS_SLOW_TIMER_BACKOFF=2.000
UCX_UD_VERBS_ETH_DGID_CHECK=y
+UCX_UD_MLX5_MAX_BCOPY=8K
+UCX_UD_MLX5_ALLOC=huge,thp,md,mmap,heap
+UCX_UD_MLX5_FAILURE=ERROR
+UCX_UD_MLX5_TX_QUEUE_LEN=256
+UCX_UD_MLX5_TX_MAX_BATCH=16
+UCX_UD_MLX5_TX_MAX_POLL=16
+UCX_UD_MLX5_TX_MIN_INLINE=64
+UCX_UD_MLX5_TX_INLINE_RESP=32
+UCX_UD_MLX5_TX_MIN_SGE=3
+UCX_UD_MLX5_TX_CQ_MODERATION=64
+UCX_UD_MLX5_TX_EVENT_MOD_COUNT=0
+UCX_UD_MLX5_TX_EVENT_MOD_PERIOD=0.00us
+UCX_UD_MLX5_RX_EVENT_MOD_COUNT=0
+UCX_UD_MLX5_RX_EVENT_MOD_PERIOD=0.00us
+UCX_UD_MLX5_TX_MAX_BUFS=-1
+UCX_UD_MLX5_TX_BUFS_GROW=1024
+UCX_UD_MLX5_RX_QUEUE_LEN=4096
+UCX_UD_MLX5_RX_MAX_BATCH=16
+UCX_UD_MLX5_RX_MAX_POLL=16
+UCX_UD_MLX5_RX_INLINE=0
+UCX_UD_MLX5_RX_MAX_BUFS=-1
+UCX_UD_MLX5_RX_BUFS_GROW=0
+UCX_UD_MLX5_ADDR_TYPE=auto
+UCX_UD_MLX5_IS_GLOBAL=n
+UCX_UD_MLX5_SL=0
+UCX_UD_MLX5_TRAFFIC_CLASS=auto
+UCX_UD_MLX5_HOP_LIMIT=255
+UCX_UD_MLX5_LID_PATH_BITS=0-17
+UCX_UD_MLX5_PKEY=0x7fff
+UCX_UD_MLX5_RESOURCE_DOMAIN=y
+UCX_UD_MLX5_RX_QUEUE_LEN_INIT=128
+UCX_UD_MLX5_TIMEOUT=300000000.00us
+UCX_UD_MLX5_SLOW_TIMER_BACKOFF=2.000
+UCX_UD_MLX5_ETH_DGID_CHECK=y
+UCX_UD_MLX5_MMIO_MODE=auto
+UCX_UD_MLX5_COMPACT_AV=y
UCX_RDMACM_ADDR_RESOLVE_TIMEOUT=500000.00us
UCX_RDMACM_BACKLOG=1024
UCX_RDMACM_CM_ID_QUOTA=64
+UCX_CM_MAX_BCOPY=8K
+UCX_CM_ALLOC=huge,thp,md,mmap,heap
+UCX_CM_FAILURE=ERROR
+UCX_CM_TX_QUEUE_LEN=256
+UCX_CM_TX_MAX_BATCH=16
+UCX_CM_TX_MAX_POLL=16
+UCX_CM_TX_MIN_INLINE=64
+UCX_CM_TX_INLINE_RESP=32
+UCX_CM_TX_MIN_SGE=3
+UCX_CM_TX_CQ_MODERATION=64
+UCX_CM_TX_EVENT_MOD_COUNT=0
+UCX_CM_TX_EVENT_MOD_PERIOD=0.00us
+UCX_CM_RX_EVENT_MOD_COUNT=0
+UCX_CM_RX_EVENT_MOD_PERIOD=0.00us
+UCX_CM_TX_MAX_BUFS=-1
+UCX_CM_TX_BUFS_GROW=1024
+UCX_CM_RX_QUEUE_LEN=4096
+UCX_CM_RX_MAX_BATCH=16
+UCX_CM_RX_MAX_POLL=16
+UCX_CM_RX_INLINE=0
+UCX_CM_RX_MAX_BUFS=-1
+UCX_CM_RX_BUFS_GROW=0
+UCX_CM_ADDR_TYPE=auto
+UCX_CM_IS_GLOBAL=n
+UCX_CM_SL=0
+UCX_CM_TRAFFIC_CLASS=auto
+UCX_CM_HOP_LIMIT=255
+UCX_CM_LID_PATH_BITS=0-17
+UCX_CM_PKEY=0x7fff
+UCX_CM_RESOURCE_DOMAIN=y
+UCX_CM_TIMEOUT=300000.00us
+UCX_CM_RETRY_COUNT=100
+UCX_CM_MAX_OP=1024
--- ucx_info_d 2019-08-08 11:01:08.136694826 -0700
+++ ucx_from_source_info_d 2019-08-08 11:02:32.404498878 -0700
@@ -270,7 +270,7 @@
# put_opt_zcopy_align: <= 512
# put_align_mtu: <= 4K
# get_bcopy: <= 8K
-# get_zcopy: 1..1G, up to 8 iov
+# get_zcopy: 65..1G, up to 8 iov
# get_opt_zcopy_align: <= 512
# get_align_mtu: <= 4K
# am_short: <= 123
@@ -287,6 +287,88 @@
# error handling: peer failure
#
#
+# Transport: rc_mlx5
+#
+# Device: mlx5_3:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 600 nsec + 1 * N
+# overhead: 40 nsec
+# put_short: <= 220
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 234
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 186
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to ep
+# priority: 30
+# device address: 3 bytes
+# ep address: 7 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
+# Transport: dc_mlx5
+#
+# Device: mlx5_3:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 660 nsec
+# overhead: 40 nsec
+# put_short: <= 172
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 186
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 138
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 5 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
# Transport: ud
#
# Device: mlx5_3:1
@@ -309,6 +391,44 @@
# error handling: peer failure
#
#
+# Transport: ud_mlx5
+#
+# Device: mlx5_3:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 610 nsec
+# overhead: 80 nsec
+# am_short: <= 180
+# am_bcopy: <= 4088
+# am_zcopy: <= 4088, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 132
+# connection: to ep, to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 3 bytes
+# ep address: 6 bytes
+# error handling: peer failure
+#
+#
+# Transport: cm
+#
+# Device: mlx5_3:1
+#
+# capabilities:
+# bandwidth: 9329.42 MB/sec
+# latency: 600 nsec
+# overhead: 1200 nsec
+# am_bcopy: <= 214
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 4 bytes
+# error handling: none
+#
+#
# Memory domain: ib/mlx5_2
# component: ib
# register: unlimited, cost: 90 nsec
@@ -329,7 +449,7 @@
# put_opt_zcopy_align: <= 512
# put_align_mtu: <= 4K
# get_bcopy: <= 8K
-# get_zcopy: 1..1G, up to 8 iov
+# get_zcopy: 65..1G, up to 8 iov
# get_opt_zcopy_align: <= 512
# get_align_mtu: <= 4K
# am_short: <= 123
@@ -346,6 +466,88 @@
# error handling: peer failure
#
#
+# Transport: rc_mlx5
+#
+# Device: mlx5_2:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 600 nsec + 1 * N
+# overhead: 40 nsec
+# put_short: <= 220
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 234
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 186
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to ep
+# priority: 30
+# device address: 3 bytes
+# ep address: 7 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
+# Transport: dc_mlx5
+#
+# Device: mlx5_2:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 660 nsec
+# overhead: 40 nsec
+# put_short: <= 172
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 186
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 138
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 5 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
# Transport: ud
#
# Device: mlx5_2:1
@@ -368,6 +570,44 @@
# error handling: peer failure
#
#
+# Transport: ud_mlx5
+#
+# Device: mlx5_2:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 610 nsec
+# overhead: 80 nsec
+# am_short: <= 180
+# am_bcopy: <= 4088
+# am_zcopy: <= 4088, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 132
+# connection: to ep, to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 3 bytes
+# ep address: 6 bytes
+# error handling: peer failure
+#
+#
+# Transport: cm
+#
+# Device: mlx5_2:1
+#
+# capabilities:
+# bandwidth: 9329.42 MB/sec
+# latency: 600 nsec
+# overhead: 1200 nsec
+# am_bcopy: <= 214
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 4 bytes
+# error handling: none
+#
+#
# Memory domain: ib/mlx5_1
# component: ib
# register: unlimited, cost: 90 nsec
@@ -388,7 +628,7 @@
# put_opt_zcopy_align: <= 512
# put_align_mtu: <= 4K
# get_bcopy: <= 8K
-# get_zcopy: 1..1G, up to 8 iov
+# get_zcopy: 65..1G, up to 8 iov
# get_opt_zcopy_align: <= 512
# get_align_mtu: <= 4K
# am_short: <= 123
@@ -405,6 +645,88 @@
# error handling: peer failure
#
#
+# Transport: rc_mlx5
+#
+# Device: mlx5_1:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 600 nsec + 1 * N
+# overhead: 40 nsec
+# put_short: <= 220
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 234
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 186
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to ep
+# priority: 30
+# device address: 3 bytes
+# ep address: 7 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
+# Transport: dc_mlx5
+#
+# Device: mlx5_1:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 660 nsec
+# overhead: 40 nsec
+# put_short: <= 172
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 186
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 138
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 5 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
# Transport: ud
#
# Device: mlx5_1:1
@@ -427,6 +749,44 @@
# error handling: peer failure
#
#
+# Transport: ud_mlx5
+#
+# Device: mlx5_1:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 610 nsec
+# overhead: 80 nsec
+# am_short: <= 180
+# am_bcopy: <= 4088
+# am_zcopy: <= 4088, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 132
+# connection: to ep, to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 3 bytes
+# ep address: 6 bytes
+# error handling: peer failure
+#
+#
+# Transport: cm
+#
+# Device: mlx5_1:1
+#
+# capabilities:
+# bandwidth: 9329.42 MB/sec
+# latency: 600 nsec
+# overhead: 1200 nsec
+# am_bcopy: <= 214
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 4 bytes
+# error handling: none
+#
+#
# Memory domain: ib/mlx5_0
# component: ib
# register: unlimited, cost: 90 nsec
@@ -447,7 +807,7 @@
# put_opt_zcopy_align: <= 512
# put_align_mtu: <= 4K
# get_bcopy: <= 8K
-# get_zcopy: 1..1G, up to 8 iov
+# get_zcopy: 65..1G, up to 8 iov
# get_opt_zcopy_align: <= 512
# get_align_mtu: <= 4K
# am_short: <= 123
@@ -464,6 +824,88 @@
# error handling: peer failure
#
#
+# Transport: rc_mlx5
+#
+# Device: mlx5_0:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 600 nsec + 1 * N
+# overhead: 40 nsec
+# put_short: <= 220
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 234
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 186
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to ep
+# priority: 30
+# device address: 3 bytes
+# ep address: 7 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
+# Transport: dc_mlx5
+#
+# Device: mlx5_0:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 660 nsec
+# overhead: 40 nsec
+# put_short: <= 172
+# put_bcopy: <= 8K
+# put_zcopy: <= 1G, up to 8 iov
+# put_opt_zcopy_align: <= 512
+# put_align_mtu: <= 4K
+# get_bcopy: <= 8K
+# get_zcopy: 65..1G, up to 8 iov
+# get_opt_zcopy_align: <= 512
+# get_align_mtu: <= 4K
+# am_short: <= 186
+# am_bcopy: <= 8190
+# am_zcopy: <= 8190, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 138
+# domain: device
+# atomic_add: 32, 64 bit
+# atomic_and: 32, 64 bit
+# atomic_or: 32, 64 bit
+# atomic_xor: 32, 64 bit
+# atomic_fadd: 32, 64 bit
+# atomic_fand: 32, 64 bit
+# atomic_for: 32, 64 bit
+# atomic_fxor: 32, 64 bit
+# atomic_swap: 32, 64 bit
+# atomic_cswap: 32, 64 bit
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 5 bytes
+# error handling: buffer (zcopy), remote access, peer failure
+#
+#
# Transport: ud
#
# Device: mlx5_0:1
@@ -486,6 +928,44 @@
# error handling: peer failure
#
#
+# Transport: ud_mlx5
+#
+# Device: mlx5_0:1
+#
+# capabilities:
+# bandwidth: 11794.23 MB/sec
+# latency: 610 nsec
+# overhead: 80 nsec
+# am_short: <= 180
+# am_bcopy: <= 4088
+# am_zcopy: <= 4088, up to 3 iov
+# am_opt_zcopy_align: <= 512
+# am_align_mtu: <= 4K
+# am header: <= 132
+# connection: to ep, to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 3 bytes
+# ep address: 6 bytes
+# error handling: peer failure
+#
+#
+# Transport: cm
+#
+# Device: mlx5_0:1
+#
+# capabilities:
+# bandwidth: 9329.42 MB/sec
+# latency: 600 nsec
+# overhead: 1200 nsec
+# am_bcopy: <= 214
+# connection: to iface
+# priority: 30
+# device address: 3 bytes
+# iface address: 4 bytes
+# error handling: none
+#
+#
# Memory domain: rdmacm
# component: rdmacm
# supports client-server connection establishment via sockaddr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment