-
-
Save zarzen/8cef76119dc6008ac5959742cf19f96c to your computer and use it in GitHub Desktop.
fabric-efa.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ec2-user@ip-172-31-11-29:~/efa-bad-practice/build$ FI_EFA_RECVWIN_SIZE=500000000 FI_LOG_LEVEL=Debug ./shm_worker efa-client 1 0 1000000000 | |
libfabric:10452:core:core:fi_param_get_():280<info> variable perf_cntr=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable hook=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_size=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_count=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_monitor=<not set> | |
libfabric:10452:core:mr:ofi_default_cache_size():56<info> default cache size=2754853148 | |
libfabric:10452:core:core:fi_param_get_():280<info> variable provider=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable provider_path=<not set> | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: shm (1.1) | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable tx_size=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable rx_size=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable msg_tx_size=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable msg_rx_size=<not set> | |
libfabric:10452:core:core:fi_param_get_():280<info> variable universe_size=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable cm_progress_interval=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable data_auto_progress=<not set> | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable buffer_size=<not set> | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_rxm (1.0) | |
libfabric:10452:ofi_mrail:core:fi_param_get_():280<info> variable config=<not set> | |
libfabric:10452:ofi_mrail:core:fi_param_get_():280<info> variable addr=<not set> | |
libfabric:10452:ofi_mrail:core:fi_param_get_():280<info> variable addr_strc=<not set> | |
libfabric:10452:ofi_mrail:core:mrail_parse_env_vars():116<info> unable to read FI_OFI_MRAIL_ADDR env variable | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_mrail (1.0) | |
libfabric:10452:ofi_rxd:core:fi_param_get_():280<info> variable spin_count=<not set> | |
libfabric:10452:ofi_rxd:core:fi_param_get_():280<info> variable retry=<not set> | |
libfabric:10452:ofi_rxd:core:fi_param_get_():280<info> variable max_peers=<not set> | |
libfabric:10452:ofi_rxd:core:fi_param_get_():280<info> variable max_unacked=<not set> | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_rxd (1.0) | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable rx_window_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable tx_max_credits=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable tx_min_credits=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable tx_queue_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable enable_sas_ordering=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable enable_shm_transfer=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable shm_av_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable shm_max_medium_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():294<info> read int var recvwin_size=500000000 | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable cq_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable max_memcpy_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable mr_cache_enable=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable mr_max_cached_count=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable mr_max_cached_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable mtu_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable tx_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable rx_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable tx_iov_limit=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable rx_iov_limit=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable rx_copy_unexp=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable rx_copy_ooo=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable max_timeout=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable timeout_interval=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable efa_cq_read_size=<not set> | |
libfabric:10452:efa:core:fi_param_get_():280<info> variable shm_cq_read_size=<not set> | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: efa (2.0) | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: UDP (1.1) | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: sockets (2.0) | |
libfabric:10452:tcp:core:fi_param_get_():280<info> variable port_high_range=<not set> | |
libfabric:10452:tcp:core:fi_param_get_():280<info> variable port_low_range=<not set> | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: tcp (1.0) | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_hook_perf (1.0) | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_hook_debug (1.0) | |
libfabric:10452:core:core:ofi_register_provider():404<info> registering provider: ofi_hook_noop (1.0) | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_MSG_PREFIX | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_LOCAL_MR | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_MSG_PREFIX, FI_LOCAL_MR | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: | |
libfabric:10452:core:core:ofi_layering_ok():885<info> Shm requested, skipping util layering | |
libfabric:10452:core:core:ofi_layering_ok():885<info> Shm requested, skipping util layering | |
libfabric:10452:core:core:ofi_layering_ok():885<info> Shm requested, skipping util layering | |
libfabric:10452:ofi_rxm:core:fi_param_get_():280<info> variable use_srx=<not set> | |
libfabric:10452:efa:core:ofi_check_ep_type():629<info> unsupported endpoint type | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Supported: FI_EP_RDM | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Requested: FI_EP_MSG | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:ofi_check_ep_type():629<info> unsupported endpoint type | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Supported: FI_EP_RDM | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Requested: FI_EP_MSG | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_MSG_PREFIX | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: FI_CONTEXT, FI_RX_CQ_DATA | |
libfabric:10452:core:core:fi_getinfo_():964<warn> fi_getinfo: provider efa returned -61 (No data available) | |
libfabric:10452:core:core:ofi_layering_ok():857<info> Need core provider, skipping ofi_rxd | |
libfabric:10452:core:core:ofi_layering_ok():857<info> Need core provider, skipping ofi_mrail | |
libfabric:10452:core:core:fi_getinfo_():964<warn> fi_getinfo: provider ofi_rxm returned -61 (No data available) | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:ofi_check_ep_type():629<info> unsupported endpoint type | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Supported: FI_EP_RDM | |
libfabric:10452:efa:core:ofi_check_ep_type():630<info> Requested: FI_EP_DGRAM | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:core:core:ofi_layering_ok():857<info> Need core provider, skipping ofi_rxm | |
libfabric:10452:core:core:ofi_layering_ok():857<info> Need core provider, skipping ofi_mrail | |
libfabric:10452:ofi_mrail:fabric:mrail_get_core_info():289<warn> OFI_MRAIL_ADDR_STRC env variable not set! | |
libfabric:10452:core:core:fi_getinfo_():964<warn> fi_getinfo: provider ofi_mrail returned -61 (No data available) | |
Using OFI device: EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:core:core:fi_fabric_():1163<info> Opened fabric: shm | |
libfabric:10452:core:core:fi_fabric_():1163<info> Opened fabric: EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_MSG_PREFIX | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: | |
libfabric:10452:efa:domain:efa_domain_open():168<info> Allocated pd[1]. | |
libfabric:10452:efa:domain:efa_domain_open():196<info> EFA MR cache enabled, max_cnt: 228556 max_size: 92771293593 merge_regions: 1 | |
libfabric:10452:core:core:fi_param_get_():280<info> variable universe_size=<not set> | |
libfabric:10452:efa:av:util_av_init():455<info> AV size 16384 | |
libfabric:10452:shm:av:util_av_init():455<info> AV size 128 | |
--- fi->tx_attr-size: 4096 | |
--- fi->rx_attr->size: 8192 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:fabric:efa_get_matching_info():705<info> found match for interface (null) EFA-fe80::c2:a4ff:fe76:5593 | |
libfabric:10452:efa:core:efa_check_hints():184<info> Required hints mode bits not set | |
libfabric:10452:efa:core:efa_check_hints():185<info> Expected: FI_MSG_PREFIX | |
libfabric:10452:efa:core:efa_check_hints():185<info> Given: | |
-efa-ep-0 rxcq : 0x218abb0 | |
libfabric:10452:efa:ep_ctrl:efa_ep_create_qp():334<info> efa_ep_create_qp(): create QP 0 | |
libfabric:10452:efa:ep_ctrl:efa_ep_getname():61<info> EP addr: GID[fe80::c2:a4ff:fe76:5593] QP[0] (length 32) | |
garbe at instr_ptr | |
instr memory all zeros: 1 | |
libfabric:10452:efa:ep_ctrl:efa_ep_getname():61<info> EP addr: GID[fe80::c2:a4ff:fe76:5593] QP[0] (length 64) | |
Local ep addresses: | |
fi_addr_efa://[fe80::c2:a4ff:fe76:5593]:0 | |
sem_mutex val: 1 | |
instr type 1 | |
task for set efa addr | |
libfabric:10452:efa:av:efa_av_insert_ah():127<info> Insert address: GID[fe80::a3:9ff:fec8:cfbb] QP[0] | |
libfabric:10452:efa:av:efa_av_insert_ah():185<info> av successfully inserted conn[0x21bdbc0] fi_addr[0] | |
verified inserted: fi_addr_efa://[fe80::a3:9ff:fec8:cfbb]:0 | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.1266 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 66.0508 ms | |
2 job cost : 53.592 ms | |
3 job cost : 30.9777 ms | |
4 job cost : 9.68838 ms | |
5 job cost : 0.000238419 ms | |
6 job cost : 25.049 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0424385 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.8674 ms | |
2 job cost : 19.2964 ms | |
3 job cost : 11.3742 ms | |
4 job cost : 3.45922 ms | |
5 job cost : 0.0116825 ms | |
6 job cost : 13.01 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0405312 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.515 ms | |
2 job cost : 18.6818 ms | |
3 job cost : 11.0774 ms | |
4 job cost : 3.46541 ms | |
5 job cost : 0.00619888 ms | |
6 job cost : 12.8081 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0431538 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.4299 ms | |
2 job cost : 18.8479 ms | |
3 job cost : 11.045 ms | |
4 job cost : 3.36385 ms | |
5 job cost : 0.00715256 ms | |
6 job cost : 12.8665 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0391006 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.741 ms | |
2 job cost : 15.429 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 39426048 total_len: 8344576 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
3 job cost : 8.80408 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 44810240 total_len: 39426048 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
4 job cost : 2.6443 ms | |
5 job cost : 0.0932217 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 77922304 total_len: 44810240 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
6 job cost : 11.2352 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0405312 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.4713 ms | |
2 job cost : 18.8553 ms | |
3 job cost : 11.0419 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 44810240 total_len: 39426048 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
4 job cost : 2.51102 ms | |
5 job cost : 0.0214577 ms | |
6 job cost : 12.8634 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0391006 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.4339 ms | |
2 job cost : 18.5425 ms | |
3 job cost : 11.1887 ms | |
4 job cost : 3.24893 ms | |
5 job cost : 0.064373 ms | |
6 job cost : 12.7792 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0388622 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.4849 ms | |
2 job cost : 18.5828 ms | |
3 job cost : 11.2853 ms | |
4 job cost : 3.38268 ms | |
5 job cost : 0.00405312 ms | |
6 job cost : 12.7544 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0405312 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.6044 ms | |
2 job cost : 18.7573 ms | |
3 job cost : 8.82149 ms | |
4 job cost : 2.62427 ms | |
5 job cost : 0.043869 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 77922304 total_len: 26071040 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
6 job cost : 11.2543 ms | |
instr type 3 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 0.0388622 ms | |
instr type 2 | |
task SEND_BATCH/RECV_BATCH | |
1 job cost : 10.5555 ms | |
2 job cost : 18.5368 ms | |
3 job cost : 11.2798 ms | |
libfabric:10452:efa:cq:rxr_cq_write_rx_completion():511<warn> Message truncated: tag: 0 len: 44810240 total_len: 39426048 | |
Completion with error: 265 | |
!!! err: unknown error | |
unknown error | |
4 job cost : 2.46739 ms | |
5 job cost : 0.000238419 ms | |
6 job cost : 12.7378 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment