Created
September 22, 2020 23:08
-
-
Save froody/d35d7571b1a8df0638867066d96ecc6c to your computer and use it in GitHub Desktop.
segfault creating single-element group
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% TORCH_UCC_COLL_BACKEND=xccl LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/public/apps/ucx/1.9/gcc.7.4.0-cuda.10.1/lib/:/private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/lib/ python hello_ucx.py | |
TorchUCC: Thread mode multi is not supportedTorchUCC: Thread mode multi is not supported[1600816014.600420] [devfair0133:73575:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
[1600816014.600420] [devfair0133:73576:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
rank is = 1 | |
rank is = 0 | |
t is tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2.], device='cuda:0') | |
sending tensor([0.9740, 0.3096, 0.8529, 0.4817, 0.5530, 0.1668, 0.1471, 0.9425, 0.9323, | |
0.9054]) | |
recvd tensor([0.9740, 0.3096, 0.8529, 0.4817, 0.5530, 0.1668, 0.1471, 0.9425, 0.9323, | |
0.9054]) | |
creating group 0,1 | |
creating group 0,1 | |
TorchUCC: Thread mode multi is not supportedTorchUCC: Thread mode multi is not supported[1600816016.758399] [devfair0133:73576:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
[1600816016.758418] [devfair0133:73575:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
creating group 0 | |
creating group 1 | |
creating group 0 | |
TorchUCC: Thread mode multi is not supportedTorchUCC: Thread mode multi is not supported[1600816016.937819] [devfair0133:73575:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
[1600816016.937869] [devfair0133:73576:0] xccl_ucx_context.c:81 TEAM_UCX WARN Thread mode multiple is not supported | |
[devfair0133:73575:0:73575] Caught signal 11 (Segmentation fault: address not mapped to object at address 0xffffffff00000001) | |
[devfair0133:73576:0:73576] Caught signal 11 (Segmentation fault: address not mapped to object at address 0xffffffff00000001) | |
==== backtrace (tid: 73575) ==== | |
0 0x0000000000028905 ucs_debug_print_backtrace() /tmp/ucx/ucx-1.9.0/src/ucs/debug/debug.c:656 | |
1 0x0000000000012890 __funlockfile() ???:0 | |
2 0x000000000008c008 c10d::torch_ucx_req_test() /private/home/tbirch/src/torch-ucc/include/torch_ucc_sendrecv.hpp:237 | |
3 0x000000000008c97d c10d::oob_allgather_test() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:70 | |
4 0x000000000008ca64 c10d::oob_allgather() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:100 | |
5 0x0000000000002f98 xccl_ucx_team_create_post() ???:0 | |
6 0x00000000000039cb xccl_team_create_post() ???:0 | |
7 0x000000000008cd15 c10d::torch_xccl_comm_init() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:182 | |
8 0x00000000000579db c10d::ProcessGroupUCC::ProcessGroupUCC() /private/home/tbirch/src/torch-ucc/src/torch_ucc.cpp:179 | |
9 0x0000000000083e5e __gnu_cxx::new_allocator<c10d::ProcessGroupUCC>::construct<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/ext/new_allocator.h:136 | |
10 0x000000000008336c std::allocator_traits<std::allocator<c10d::ProcessGroupUCC> >::construct<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/alloc_traits.h:475 | |
11 0x0000000000081e99 std::_Sp_counted_ptr_inplace<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, (__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr_inplace<std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:526 | |
12 0x000000000007f213 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:637 | |
13 0x000000000007ba8e std::__shared_ptr<c10d::ProcessGroupUCC, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:1295 | |
14 0x0000000000076f63 std::shared_ptr<c10d::ProcessGroupUCC>::shared_ptr<std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:344 | |
15 0x0000000000071a08 std::allocate_shared<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:691 | |
16 0x000000000006c09c std::make_shared<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:707 | |
17 0x00000000000590a3 c10d::ProcessGroupUCC::createProcessGroupUCC() /private/home/tbirch/src/torch-ucc/src/torch_ucc.cpp:476 | |
18 0x000000000007b011 pybind11::detail::argument_loader<std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>::call_impl<std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), 0ul, 1ul, 2ul, 3ul, pybind11::detail::void_type>() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1931 | |
19 0x00000000000766b9 pybind11::detail::argument_loader<std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>::call<std::shared_ptr<c10d::ProcessGroup>, pybind11::detail::void_type, std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&)>() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1908 | |
20 0x0000000000070c1f pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>(std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup> (*)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&))::{lambda(pybind11::detail::function_call&)#3}::operator()() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:155 | |
21 0x0000000000070f54 pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>(std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup> (*)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&))::{lambda(pybind11::detail::function_call&)#3}::_FUN() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:133 | |
22 0x0000000000065293 pybind11::cpp_function::dispatcher() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:620 | |
23 0x00000000001491f4 _PyMethodDef_RawFastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:693 | |
24 0x00000000001c6bb9 _PyCFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:732 | |
25 0x00000000001c6bb9 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4568 | |
26 0x00000000001c6bb9 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3124 | |
27 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
28 0x0000000000138b63 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
29 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
30 0x00000000001c3aee _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3139 | |
31 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
32 0x0000000000138b01 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
33 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
34 0x00000000001c6ef2 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3093 | |
35 0x0000000000137b68 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
36 0x0000000000137b68 _PyFunction_FastCallDict() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:322 | |
37 0x00000000001c43a3 do_call_core() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4645 | |
38 0x00000000001c43a3 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3191 | |
39 0x0000000000137b68 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
40 0x0000000000137b68 _PyFunction_FastCallDict() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:322 | |
41 0x00000000001c43a3 do_call_core() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4645 | |
42 0x00000000001c43a3 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3191 | |
43 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
44 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
45 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
46 0x00000000001c2d01 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3110 | |
47 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
48 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
49 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
50 0x00000000001c2d01 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3110 | |
51 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
52 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
53 0x00000000001c2ae5 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
54 0x00000000001c2ae5 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3124 | |
55 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
56 0x0000000000138b63 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
57 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
58 0x00000000001c3aee _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3139 | |
================================= | |
==== backtrace (tid: 73576) ==== | |
0 0x0000000000028905 ucs_debug_print_backtrace() /tmp/ucx/ucx-1.9.0/src/ucs/debug/debug.c:656 | |
1 0x0000000000012890 __funlockfile() ???:0 | |
2 0x000000000008c008 c10d::torch_ucx_req_test() /private/home/tbirch/src/torch-ucc/include/torch_ucc_sendrecv.hpp:237 | |
3 0x000000000008c97d c10d::oob_allgather_test() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:70 | |
4 0x000000000008ca64 c10d::oob_allgather() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:100 | |
5 0x0000000000002f98 xccl_ucx_team_create_post() ???:0 | |
6 0x00000000000039cb xccl_team_create_post() ???:0 | |
7 0x000000000008cd15 c10d::torch_xccl_comm_init() /private/home/tbirch/src/torch-ucc/src/torch_xccl.cpp:182 | |
8 0x00000000000579db c10d::ProcessGroupUCC::ProcessGroupUCC() /private/home/tbirch/src/torch-ucc/src/torch_ucc.cpp:179 | |
9 0x0000000000083e5e __gnu_cxx::new_allocator<c10d::ProcessGroupUCC>::construct<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/ext/new_allocator.h:136 | |
10 0x000000000008336c std::allocator_traits<std::allocator<c10d::ProcessGroupUCC> >::construct<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/alloc_traits.h:475 | |
11 0x0000000000081e99 std::_Sp_counted_ptr_inplace<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, (__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr_inplace<std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:526 | |
12 0x000000000007f213 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:637 | |
13 0x000000000007ba8e std::__shared_ptr<c10d::ProcessGroupUCC, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr_base.h:1295 | |
14 0x0000000000076f63 std::shared_ptr<c10d::ProcessGroupUCC>::shared_ptr<std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:344 | |
15 0x0000000000071a08 std::allocate_shared<c10d::ProcessGroupUCC, std::allocator<c10d::ProcessGroupUCC>, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:691 | |
16 0x000000000006c09c std::make_shared<c10d::ProcessGroupUCC, std::shared_ptr<c10d::Store> const&, int&, int&>() /usr/include/c++/7/bits/shared_ptr.h:707 | |
17 0x00000000000590a3 c10d::ProcessGroupUCC::createProcessGroupUCC() /private/home/tbirch/src/torch-ucc/src/torch_ucc.cpp:476 | |
18 0x000000000007b011 pybind11::detail::argument_loader<std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>::call_impl<std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), 0ul, 1ul, 2ul, 3ul, pybind11::detail::void_type>() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1931 | |
19 0x00000000000766b9 pybind11::detail::argument_loader<std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>::call<std::shared_ptr<c10d::ProcessGroup>, pybind11::detail::void_type, std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&)>() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1908 | |
20 0x0000000000070c1f pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>(std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup> (*)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&))::{lambda(pybind11::detail::function_call&)#3}::operator()() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:155 | |
21 0x0000000000070f54 pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup>, std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&>(std::shared_ptr<c10d::ProcessGroup> (*&)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&), std::shared_ptr<c10d::ProcessGroup> (*)(std::shared_ptr<c10d::Store> const&, int, int, std::chrono::duration<float, std::ratio<1l, 1l> > const&))::{lambda(pybind11::detail::function_call&)#3}::_FUN() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:133 | |
22 0x0000000000065293 pybind11::cpp_function::dispatcher() /private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:620 | |
23 0x00000000001491f4 _PyMethodDef_RawFastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:693 | |
24 0x00000000001c6bb9 _PyCFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:732 | |
25 0x00000000001c6bb9 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4568 | |
26 0x00000000001c6bb9 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3124 | |
27 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
28 0x0000000000138b63 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
29 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
30 0x00000000001c3aee _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3139 | |
31 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
32 0x0000000000138b01 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
33 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
34 0x00000000001c6ef2 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3093 | |
35 0x0000000000137b68 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
36 0x0000000000137b68 _PyFunction_FastCallDict() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:322 | |
37 0x00000000001c43a3 do_call_core() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4645 | |
38 0x00000000001c43a3 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3191 | |
39 0x0000000000137b68 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
40 0x0000000000137b68 _PyFunction_FastCallDict() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:322 | |
41 0x00000000001c43a3 do_call_core() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4645 | |
42 0x00000000001c43a3 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3191 | |
43 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
44 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
45 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
46 0x00000000001c2d01 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3110 | |
47 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
48 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
49 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
50 0x00000000001c2d01 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3110 | |
51 0x0000000000138767 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
52 0x0000000000138767 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:408 | |
53 0x00000000001c2ae5 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
54 0x00000000001c2ae5 _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3124 | |
55 0x0000000000118db2 PyEval_EvalFrameEx() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:547 | |
56 0x0000000000138b63 _PyFunction_FastCallKeywords() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Objects/call.c:433 | |
57 0x000000000017f335 call_function() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:4616 | |
58 0x00000000001c3aee _PyEval_EvalFrameDefault() /home/conda/feedstock_root/build_artifacts/python_1596159872474/work/Python/ceval.c:3139 | |
================================= | |
Traceback (most recent call last): | |
File "hello_ucx.py", line 47, in <module> | |
mp.spawn(worker, args=(world_size,), nprocs=world_size, join=True) | |
File "/private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 200, in spawn | |
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') | |
File "/private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 158, in start_processes | |
while not context.join(): | |
File "/private/home/tbirch/.conda/envs/torch160-ucx/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 108, in join | |
(error_index, name) | |
Exception: process 0 terminated with signal SIGSEGV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment