Skip to content

Instantly share code, notes, and snippets.

@froody
Created July 27, 2020 21:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save froody/ecc8bc849b8a95f89218fd5b6dc2ecd6 to your computer and use it in GitHub Desktop.
Save froody/ecc8bc849b8a95f89218fd5b6dc2ecd6 to your computer and use it in GitHub Desktop.
(gdb) thread apply all bt
Thread 18 (Thread 0x7f4b2a5d5700 (LWP 11702)):
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f4b2a5d4e00, expected=0, futex_word=0x56462ad3c93c <_PyRuntime+1340>) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7f4b2a5d4e00, mutex=0x56462ad3c940 <_PyRuntime+1344>, cond=0x56462ad3c910 <_PyRuntime+1296>) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x56462ad3c910 <_PyRuntime+1296>, mutex=0x56462ad3c940 <_PyRuntime+1344>, abstime=0x7f4b2a5d4e00) at pthread_cond_wait.c:667
#3 0x000056462abd965e in PyCOND_TIMEDWAIT (cond=0x56462ad3c910 <_PyRuntime+1296>, mut=0x56462ad3c940 <_PyRuntime+1344>, us=5000) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/condvar.h:90
#4 take_gil () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval_gil.h:208
#5 0x000056462ab03be4 in PyEval_RestoreThread () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval.c:271
#6 0x00007f4bedcabb16 in torch::autograd::python::PythonEngine::thread_init(int, std::shared_ptr<torch::autograd::ReadyQueue> const&, bool) ()
from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#7 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x5646951bdce0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#8 0x00007f4c096c26db in start_thread (arg=0x7f4b2a5d5700) at pthread_create.c:463
#9 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 17 (Thread 0x7f4b2bc21700 (LWP 11701)):
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f4b2bc20e00, expected=0, futex_word=0x56462ad3c93c <_PyRuntime+1340>) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7f4b2bc20e00, mutex=0x56462ad3c940 <_PyRuntime+1344>, cond=0x56462ad3c910 <_PyRuntime+1296>) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x56462ad3c910 <_PyRuntime+1296>, mutex=0x56462ad3c940 <_PyRuntime+1344>, abstime=0x7f4b2bc20e00) at pthread_cond_wait.c:667
#3 0x000056462abd965e in PyCOND_TIMEDWAIT (cond=0x56462ad3c910 <_PyRuntime+1296>, mut=0x56462ad3c940 <_PyRuntime+1344>, us=5000) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/condvar.h:90
#4 take_gil () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval_gil.h:208
#5 0x000056462ab03be4 in PyEval_RestoreThread () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval.c:271
#6 0x00007f4bedcabb16 in torch::autograd::python::PythonEngine::thread_init(int, std::shared_ptr<torch::autograd::ReadyQueue> const&, bool) ()
from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#7 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x564694fd6be0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#8 0x00007f4c096c26db in start_thread (arg=0x7f4b2bc21700) at pthread_create.c:463
#9 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 16 (Thread 0x7f4b2dfff700 (LWP 11692)):
#0 0x00007f4c096cc384 in __libc_read (fd=49, buf=buf@entry=0x7f4b2dffee70, nbytes=nbytes@entry=16) at ../sysdeps/unix/sysv/linux/read.c:27
#1 0x00007f4b69df1d5f in read (__nbytes=16, __buf=0x7f4b2dffee70, __fd=<optimized out>) at /usr/include/x86_64-linux-gnu/bits/unistd.h:44
#2 __ibv_get_async_event (context=0x564694fd8300, event=0x7f4b2dffeed0) at src/device.c:770
#3 0x00007f4bb84a5932 in wrap_ibv_get_async_event (context=context@entry=0x564694fd8300, event=event@entry=0x7f4b2dffeed0) at misc/ibvwrap.cc:220
#4 0x00007f4bb845f563 in ncclIbAsyncThreadMain (args=0x564694fd8300) at transport/net_ib.cc:69
#5 0x00007f4c096c26db in start_thread (arg=0x7f4b2dfff700) at pthread_create.c:463
#6 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 15 (Thread 0x7f4b757fa700 (LWP 11690)):
---Type <return> to continue, or q <return> to quit---
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f4b60000b88) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e201bd8, cond=0x7f4b60000b60) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x7f4b60000b60, mutex=0x56462e201bd8) at pthread_cond_wait.c:655
#3 0x00007f4ba5ddbb8d in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007f4ba5d81155 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5 0x00007f4ba5ddadc8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6 0x00007f4c096c26db in start_thread (arg=0x7f4b757fa700) at pthread_create.c:463
#7 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 14 (Thread 0x7f4b75ffb700 (LWP 11689)):
#0 0x00007f4c093debf9 in __GI___poll (fds=0x7f4b64000bd0, nfds=11, timeout=100) at ../sysdeps/unix/sysv/linux/poll.c:29
#1 0x00007f4ba5dd8733 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#2 0x00007f4ba5e674dd in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007f4ba5ddadc8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007f4c096c26db in start_thread (arg=0x7f4b75ffb700) at pthread_create.c:463
#5 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 13 (Thread 0x7f4b77fff700 (LWP 11680)):
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f4b77ffeb60, expected=0, futex_word=0x56462e271190) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7f4b77ffeb60, mutex=0x56462e271110, cond=0x56462e271168) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x56462e271168, mutex=0x56462e271110, abstime=0x7f4b77ffeb60) at pthread_cond_wait.c:667
#3 0x00007f4bee248ba7 in torch::distributed::rpc::ProcessGroupAgent::pollTimedOutRPCs() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#4 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e268130) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#5 0x00007f4c096c26db in start_thread (arg=0x7f4b77fff700) at pthread_create.c:463
#6 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 12 (Thread 0x7f4b7cff9700 (LWP 11678)):
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f4b7cff8b50, expected=0, futex_word=0x7f4b6c001358) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7f4b7cff8b50, mutex=0x7f4b6c001308, cond=0x7f4b6c001330) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x7f4b6c001330, mutex=0x7f4b6c001308, abstime=0x7f4b7cff8b50) at pthread_cond_wait.c:667
#3 0x00007f4beaad2ed3 in gloo::transport::tcp::UnboundBuffer::waitRecv(int*, std::chrono::duration<long, std::ratio<1l, 1000l> >) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#4 0x00007f4bee2b2c69 in c10d::ProcessGroupGloo::RecvWork::wait(std::chrono::duration<long, std::ratio<1l, 1000l> >) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#5 0x00007f4bee24b662 in torch::distributed::rpc::ProcessGroupAgent::listenLoopInternal() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#6 0x00007f4bee24bd25 in torch::distributed::rpc::ProcessGroupAgent::listenLoop() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#7 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e271430) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#8 0x00007f4c096c26db in start_thread (arg=0x7f4b7cff9700) at pthread_create.c:463
#9 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
---Type <return> to continue, or q <return> to quit---
Thread 11 (Thread 0x7f4b7d7fa700 (LWP 11676)):
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f4b7d7f9dc0, expected=0, futex_word=0x56462e270d80) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7f4b7d7f9dc0, mutex=0x56462e270d88, cond=0x56462e270d58) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x56462e270d58, mutex=0x56462e270d88, abstime=0x7f4b7d7f9dc0) at pthread_cond_wait.c:667
#3 0x00007f4be977fb79 in torch::distributed::rpc::RpcAgent::retryExpiredRpcs() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#4 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e270450) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#5 0x00007f4c096c26db in start_thread (arg=0x7f4b7d7fa700) at pthread_create.c:463
#6 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 10 (Thread 0x7f4b7dffb700 (LWP 11672)):
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e271048) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270ff8, cond=0x56462e271020) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e271020, mutex=0x56462e270ff8) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bb441b97f in c10::ThreadPool::main_loop(unsigned long) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libc10.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e270160) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b7dffb700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 9 (Thread 0x7f4b7e7fc700 (LWP 11670)):
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e271048) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270ff8, cond=0x56462e271020) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e271020, mutex=0x56462e270ff8) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bb441b97f in c10::ThreadPool::main_loop(unsigned long) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libc10.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e26ff00) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b7e7fc700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 8 (Thread 0x7f4b7effd700 (LWP 11669)):
---Type <return> to continue, or q <return> to quit---
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e271048) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270ff8, cond=0x56462e271020) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e271020, mutex=0x56462e270ff8) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bb441b97f in c10::ThreadPool::main_loop(unsigned long) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libc10.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e26fca0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b7effd700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 7 (Thread 0x7f4b7f7fe700 (LWP 11668)):
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e271048) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270ff8, cond=0x56462e271020) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e271020, mutex=0x56462e270ff8) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bb441b97f in c10::ThreadPool::main_loop(unsigned long) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libc10.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e2713f0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b7f7fe700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 6 (Thread 0x7f4b7ffff700 (LWP 11665)):
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e270978) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270928, cond=0x56462e270950) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e270950, mutex=0x56462e270928) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bee2b8af1 in c10d::ProcessGroupGloo::runLoop(int) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e2673e0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b7ffff700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
---Type <return> to continue, or q <return> to quit---
Thread 5 (Thread 0x7f4b9cf1a700 (LWP 11664)):
#0 0x00007f4c096c89f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x56462e27097c) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x56462e270928, cond=0x56462e270950) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x56462e270950, mutex=0x56462e270928) at pthread_cond_wait.c:655
#3 0x00007f4bf07cf48b in __gthread_cond_wait (__mutex=<optimized out>, __cond=<optimized out>)
at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/build/build-cc-gcc-final/x86_64-conda_cos6-linux-gnu/libstdc++-v3/include/x86_64-conda_cos6-linux-gnu/bits/gthr-default.h:878
#4 std::condition_variable::wait (this=<optimized out>, __lock=...) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/condition_variable.cc:53
#5 0x00007f4bee2b8af1 in c10d::ProcessGroupGloo::runLoop(int) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#6 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e25eb70) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#7 0x00007f4c096c26db in start_thread (arg=0x7f4b9cf1a700) at pthread_create.c:463
#8 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 4 (Thread 0x7f4b9d71b700 (LWP 11663)):
#0 0x00007f4c093ebbb7 in epoll_wait (epfd=31, events=0x7f4b9d71aba0, maxevents=64, timeout=10) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00007f4beaabd265 in gloo::transport::tcp::Loop::run() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#2 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e25d270) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#3 0x00007f4c096c26db in start_thread (arg=0x7f4b9d71b700) at pthread_create.c:463
#4 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 3 (Thread 0x7f4b9df1c700 (LWP 11661)):
#0 0x00007f4c093debf9 in __GI___poll (fds=0x7f4b94000b60, nfds=4, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#1 0x00007f4bee29c2a8 in c10d::TCPStoreDaemon::run() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#2 0x00007f4bf07d3163 in std::execute_native_thread_routine (__p=0x56462e25e6f0) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
#3 0x00007f4c096c26db in start_thread (arg=0x7f4b9df1c700) at pthread_create.c:463
#4 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7f4b9f971700 (LWP 11657)):
#0 0x00007f4c093ed237 in accept4 (fd=12, addr=..., addr_len=0x7f4b9f970df8, flags=524288) at ../sysdeps/unix/sysv/linux/accept4.c:32
#1 0x00007f4ba5dd96fa in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#2 0x00007f4ba5dcb99d in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007f4ba5ddadc8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007f4c096c26db in start_thread (arg=0x7f4b9f971700) at pthread_create.c:463
#5 0x00007f4c093eb88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 1 (Thread 0x7f4c09aca740 (LWP 11650)):
---Type <return> to continue, or q <return> to quit---
#0 0x00007f4c096c8f85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7ffc2b70eee0, expected=0, futex_word=0x56462ad3c93c <_PyRuntime+1340>) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7ffc2b70eee0, mutex=0x56462ad3c940 <_PyRuntime+1344>, cond=0x56462ad3c910 <_PyRuntime+1296>) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x56462ad3c910 <_PyRuntime+1296>, mutex=0x56462ad3c940 <_PyRuntime+1344>, abstime=0x7ffc2b70eee0) at pthread_cond_wait.c:667
#3 0x000056462abd965e in PyCOND_TIMEDWAIT (cond=0x56462ad3c910 <_PyRuntime+1296>, mut=0x56462ad3c940 <_PyRuntime+1344>, us=5000) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/condvar.h:90
#4 take_gil () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval_gil.h:208
#5 0x000056462abd9692 in PyEval_AcquireThread () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/ceval.c:202
#6 0x00007f4bed9d95ea in pybind11::gil_scoped_acquire::gil_scoped_acquire() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#7 0x00007f4bedcb37af in torch::autograd::PyNode::release_variables() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#8 0x00007f4be9294982 in torch::autograd::deleteNode(torch::autograd::Node*) () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#9 0x00007f4be9291f31 in std::_Sp_counted_ptr_inplace<torch::autograd::ReadyQueue, std::allocator<torch::autograd::ReadyQueue>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#10 0x00007f4bed9d16c6 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from /private/home/tbirch/.conda/envs/torch16/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#11 0x00007f4bf07b5a9d in (anonymous namespace)::run (p=<optimized out>) at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/libsupc++/atexit_thread.cc:75
#12 0x00007f4c0930d041 in __run_exit_handlers (status=1, listp=0x7f4c096b5718 <__exit_funcs>, run_list_atexit=run_list_atexit@entry=true, run_dtors=run_dtors@entry=true) at exit.c:108
#13 0x00007f4c0930d13a in __GI_exit (status=<optimized out>) at exit.c:139
#14 0x000056462ac18308 in Py_Exit (sts=1) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/pylifecycle.c:2292
#15 0x000056462ac183ae in handle_system_exit () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/pythonrun.c:636
#16 0x000056462ac1843c in PyErr_PrintEx () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/pythonrun.c:646
#17 0x000056462ac29672 in PyRun_SimpleStringFlags () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Python/pythonrun.c:457
#18 0x000056462ac29749 in pymain_run_command (cf=0x7ffc2b70f1d0, command=<optimized out>) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Modules/main.c:386
#19 pymain_run_python (pymain=0x7ffc2b70f2e0) at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Modules/main.c:2862
#20 pymain_main () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Modules/main.c:3029
#21 0x000056462ac29aec in _Py_UnixMain () at /home/conda/feedstock_root/build_artifacts/python_1591034797817/work/Modules/main.c:3064
#22 0x00007f4c092ebb97 in __libc_start_main (main=0x56462aae8ab0 <main>, argc=4, argv=0x7ffc2b70f438, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffc2b70f428) at ../csu/libc-start.c:310
#23 0x000056462abce73d in _start () at ../sysdeps/x86_64/elf/start.S:103
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment