Created
December 8, 2020 01:41
-
-
Save rohan-varma/45f407a1fd7015d1a6d4f5d797ad4734 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- Process 0 ----- | |
#0 0x00007fff70b5269e in clock_gettime () | |
#1 0x00007fbe97a5a7fd in clock_gettime () from /lib64/libc.so.6 | |
#2 0x00007fbe3e3f99ae in ?? () from /lib64/libcuda.so.1 | |
#3 0x00007fbe3e4c12c7 in ?? () from /lib64/libcuda.so.1 | |
#4 0x00007fbe3e3a1cac in ?? () from /lib64/libcuda.so.1 | |
#5 0x00007fbe3e3d9502 in ?? () from /lib64/libcuda.so.1 | |
#6 0x00007fbe3e3165e8 in ?? () from /lib64/libcuda.so.1 | |
#7 0x00007fbe3e316cd4 in ?? () from /lib64/libcuda.so.1 | |
#8 0x00007fbe8308b1e7 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#9 0x00007fbe830852a0 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#10 0x00007fbe83091616 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#11 0x00007fbe83094031 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#12 0x00007fbe8308723e in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#13 0x00007fbe830739ce in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#14 0x00007fbe830a8d24 in cudaEventCreate () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#15 0x00007fbe5bf7be68 in torch::autograd::profiler::(anonymous namespace)::CUDAMethods::record ( | |
this=0x7fbe777a8128 <torch::autograd::profiler::(anonymous namespace)::RegisterCUDAMethods::RegisterCUDAMethods()::methods>, device=0x7fff70b1c288, event=0x7fff70b1c290, | |
cpu_ns=0x7fff70b1c220) at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_cuda.cpp:46 | |
#16 0x00007fbe7b883d71 in torch::autograd::profiler::LegacyEvent::record (this=0x7fff70b1c220, record_cuda=true) at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_legacy.cpp:570 | |
#17 0x00007fbe7b889bc1 in torch::autograd::profiler::LegacyEvent::LegacyEvent (this=0x7fff70b1c220, kind=torch::autograd::profiler::EventKind::Mark, name=..., thread_id=1, | |
record_cuda=true, handle=0, shapes=..., node_id=-1) at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_legacy.h:116 | |
#18 0x00007fbe7b881068 in torch::autograd::profiler::ProfilerThreadLocalState::mark (this=0x55f5a180f3b0, name=..., include_cuda=true) | |
at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_legacy.cpp:189 | |
#19 0x00007fbe7b882f20 in torch::autograd::profiler::<lambda(int)>::operator()(int) const (__closure=0x55f5a180f550) | |
at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_legacy.cpp:519 | |
#20 0x00007fbe7b887194 in std::_Function_handler<void(int), torch::autograd::profiler::enableProfilerLegacy(const torch::autograd::profiler::ProfilerConfig&)::<lambda(int)> >::_M_invoke(const std::_Any_data &, int &&) (__functor=..., __args#0=@0x7fff70b1c3b4: 1) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/std_function.h:316 | |
#21 0x00007fbe82024b1e in std::function<void (int)>::operator()(int) const (this=0x7fff70b1c830, __args#0=1) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/std_function.h:706 | |
#22 0x00007fbe5bf7c219 in torch::autograd::profiler::(anonymous namespace)::CUDAMethods::onEachDevice(std::function<void(int)>) const ( | |
this=0x7fbe777a8128 <torch::autograd::profiler::(anonymous namespace)::RegisterCUDAMethods::RegisterCUDAMethods()::methods>, op=...) | |
at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_cuda.cpp:86 | |
#23 0x00007fbe7b883551 in torch::autograd::profiler::enableProfilerLegacy (new_config=...) at /home/rvarm1/pytorch/torch/csrc/autograd/profiler_legacy.cpp:518 | |
#24 0x00007fbe818743c1 in pybind11::detail::argument_loader<torch::autograd::profiler::ProfilerConfig const&>::call_impl<void, void (*&)(torch::autograd::profiler::ProfilerConfig const&), 0ul, pybind11::detail::void_type>(void (*&)(torch::autograd::profiler::ProfilerConfig const&), std::integer_sequence<unsigned long, 0ul>, pybind11::detail::void_type&&) && ( | |
this=0x7fff70b1c990, f=@0x55f52c6604a8: 0x7fbe7b8830aa <torch::autograd::profiler::enableProfilerLegacy(torch::autograd::profiler::ProfilerConfig const&)>) | |
at /home/rvarm1/pytorch/third_party/pybind11/include/pybind11/cast.h:2010 | |
#25 0x00007fbe81870eaf in pybind11::detail::argument_loader<torch::autograd::profiler::ProfilerConfig const&>::call<void, pybind11::detail::void_type, void (*&)(torch::autograd::profiler::ProfilerConfig const&)>(void (*&)(torch::autograd::profiler::ProfilerConfig const&)) && (this=0x7fff70b1c990, | |
---- Process 1 ----- | |
#0 0x00007fff70b5269e in clock_gettime () | |
#1 0x00007fbe97a5a7fd in clock_gettime () from /lib64/libc.so.6 | |
#2 0x00007fbe3e3f99ae in ?? () from /lib64/libcuda.so.1 | |
#3 0x00007fbe3e4c12c7 in ?? () from /lib64/libcuda.so.1 | |
#4 0x00007fbe3e3a1cac in ?? () from /lib64/libcuda.so.1 | |
#5 0x00007fbe3e3a1e60 in ?? () from /lib64/libcuda.so.1 | |
#6 0x00007fbe3e3cc361 in ?? () from /lib64/libcuda.so.1 | |
#7 0x00007fbe3e541526 in ?? () from /lib64/libcuda.so.1 | |
#8 0x00007fbe3e2dc56b in ?? () from /lib64/libcuda.so.1 | |
#9 0x00007fbe3e2dc7e8 in ?? () from /lib64/libcuda.so.1 | |
#10 0x00007fbe3e2dc82e in ?? () from /lib64/libcuda.so.1 | |
#11 0x00007fbe3e46fcd6 in cuLaunchKernel () from /lib64/libcuda.so.1 | |
#12 0x00007fbe8307d8fd in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#13 0x00007fbe8307d987 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#14 0x00007fbe830ab96b in cudaLaunchKernel () from /usr/local/cuda/lib64/libcudart.so.9.2 | |
#15 0x00007fbe5eab6035 in ncclBarrierEnqueueWait (comm=0x7fbdec000dc0) at enqueue.cc:215 | |
#16 0x00007fbe5eaba233 in ncclGroupEnd () at group.cc:282 | |
#17 0x00007fbe82093ce2 in c10d::(anonymous namespace)::AutoNcclGroup::~AutoNcclGroup (this=0x7fff70b1be1f, __in_chrg=<optimized out>) | |
at /home/rvarm1/pytorch/torch/lib/c10d/ProcessGroupNCCL.cpp:33 | |
#18 0x00007fbe8209f166 in c10d::ProcessGroupNCCL::collective<c10d::ProcessGroupNCCL::allgather(std::vector<std::vector<at::Tensor> >&, std::vector<at::Tensor>&, const c10d::AllgatherOptions&)::<lambda(at::Tensor&, at::Tensor&, ncclComm_t, c10::cuda::CUDAStream&)>, c10d::ProcessGroupNCCL::allgather(std::vector<std::vector<at::Tensor> >&, std::vector<at::Tensor>&, const c10d::AllgatherOptions&)::<lambda(std::vector<c10::cuda::CUDAStream>&)>, c10d::ProcessGroupNCCL::allgather(std::vector<std::vector<at::Tensor> >&, std::vector<at::Tensor>&, const c10d::AllgatherOptions&)::<lambda(std::vector<c10::cuda::CUDAStream>&)> >(std::vector<at::Tensor, std::allocator<at::Tensor> > &, std::vector<at::Tensor, std::allocator<at::Tensor> > &, c10d::ProcessGroupNCCL::<lambda(at::Tensor&, at::Tensor&, ncclComm_t, c10::cuda::CUDAStream&)>, c10d::ProcessGroupNCCL::<lambda(std::vector<c10::cuda::CUDAStream, std::allocator<c10::cuda::CUDAStream> >&)>, c10d::ProcessGroupNCCL::<lambda(std::vector<c10::cuda::CUDAStream, std::allocator<c10::cuda::CUDAStream> >&)>, c10d::OpType, const char *) ( | |
this=0x55f52cb1e9c0, inputs=..., outputs=..., fn=..., pre=..., post=..., opType=c10d::OpType::ALLGATHER, profilingTitle=0x7fbe82400974 "nccl:all_gather") | |
at /home/rvarm1/pytorch/torch/lib/c10d/ProcessGroupNCCL.cpp:1101 | |
#19 0x00007fbe8209c9a5 in c10d::ProcessGroupNCCL::allgather (this=0x55f52cb1e9c0, outputTensors=..., inputTensors=..., opts=...) | |
at /home/rvarm1/pytorch/torch/lib/c10d/ProcessGroupNCCL.cpp:1372 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment