Skip to content

Instantly share code, notes, and snippets.

@trevor-m
Last active June 1, 2023 23:41
Show Gist options
  • Save trevor-m/1e7577ce849c6762ca11cb420e15bc6a to your computer and use it in GitHub Desktop.
Save trevor-m/1e7577ce849c6762ca11cb420e15bc6a to your computer and use it in GitHub Desktop.
PJRT Paxml Segfault Backtrace
Thread 1 (Thread 0x7f53cee40000 (LWP 39384)):
#0 __pthread_kill_implementation (no_tid=0, signo=11, threadid=139997930061824) at ./nptl/pthread_kill.c:44
#1 __pthread_kill_internal (signo=11, threadid=139997930061824) at ./nptl/pthread_kill.c:78
#2 __GI___pthread_kill (threadid=139997930061824, signo=signo@entry=11) at ./nptl/pthread_kill.c:89
#3 0x00007f53cee83476 in __GI_raise (sig=11) at ../sysdeps/posix/raise.c:26
#4 <signal handler called>
#5 0x00007f534bf94891 in iree_hal_resource_release (any_resource=0x200000000) at external/iree_core/runtime/src/iree/hal/resource.h:89
#6 0x00007f534bf945ca in iree_hal_resource_set_release_blocks (set=0x555dd36faf40, preserve_set=false) at external/iree_core/runtime/src/iree/hal/utils/resource_set.c:66
#7 0x00007f534bf9454e in iree_hal_resource_set_free (set=0x555dd36faf40) at external/iree_core/runtime/src/iree/hal/utils/resource_set.c:105
#8 0x00007f534bf92d8e in iree_hal_deferred_command_buffer_destroy (base_command_buffer=0x555ddb005c40) at external/iree_core/runtime/src/iree/hal/utils/deferred_command_buffer.c:208
#9 0x00007f534bf9cb7a in iree_hal_command_buffer_destroy (command_buffer=0x555ddb005c40) at external/iree_core/runtime/src/iree/hal/command_buffer.c:169
#10 0x00007f534b74cedb in iree_vm_ref_release (ref=0x7ffde5e7b548) at external/iree_core/runtime/src/iree/vm/ref.c:207
#11 0x00007f534b74d26d in iree_vm_ref_move (ref=0x7ffde5e78300, out_ref=0x7ffde5e7b548) at external/iree_core/runtime/src/iree/vm/ref.c:251
#12 0x00007f534b6ecd8f in iree_vm_bytecode_issue_import_call (stack=0x7ffde5e7ad28, call=..., cconv_results=..., dst_reg_list=0x555ddaa752a8, out_caller_frame=0x7ffde5e7a178, out_caller_registers=0x7ffde5e7a190) at external/iree_core/runtime/src/iree/vm/bytecode/dispatch.c:493
#13 0x00007f534b6eb602 in iree_vm_bytecode_call_import_variadic (stack=0x7ffde5e7ad28, module_state=0x555ddae7a440, import_ordinal=2147483652, caller_registers=..., segment_size_list=0x555ddaa7528e, src_reg_list=0x555ddaa7529c, dst_reg_list=0x555ddaa752a8, out_caller_frame=0x7ffde5e7a178, out_caller_registers=0x7ffde5e7a190) at external/iree_core/runtime/src/iree/vm/bytecode/dispatch.c:609
#14 0x00007f534b6e549e in iree_vm_bytecode_dispatch (stack=0x7ffde5e7ad28, module=0x555ddb3f1ac0, current_frame=0x7ffde5e7ad98, regs=..., call_results=...) at external/iree_core/runtime/src/iree/vm/bytecode/dispatch.c:1667
#15 0x00007f534b6da5a4 in iree_vm_bytecode_dispatch_begin (stack=0x7ffde5e7ad28, module=0x555ddb3f1ac0, call=..., cconv_arguments=..., cconv_results=...) at external/iree_core/runtime/src/iree/vm/bytecode/dispatch.c:636
#16 0x00007f534b6d500a in iree_vm_bytecode_module_begin_call (self=0x555ddb3f1ac0, stack=0x7ffde5e7ad28, call=...) at external/iree_core/runtime/src/iree/vm/bytecode/module.c:779
#17 0x00007f534b7449fd in iree_vm_begin_invoke (state=0x7ffde5e7a740, context=0x555dda75e9f0, function=..., flags=0, policy=0x0, inputs=0x555dd9d0e400, host_allocator=...) at external/iree_core/runtime/src/iree/vm/invocation.c:504
#18 0x00007f534b74402c in iree_vm_invoke (context=0x555dda75e9f0, function=..., flags=0, policy=0x0, inputs=0x555dd9d0e400, outputs=0x555dd9c1f340, host_allocator=...) at external/iree_core/runtime/src/iree/vm/invocation.c:302
#19 0x00007f534b6a2fbc in iree::pjrt::LoadedExecutableInstance::BatchExecute (this=0x555dda85ddf0, args=0x7ffde5e7cc60) at iree/integrations/pjrt/common/api_impl.cc:1797
#20 0x00007f534b6a5bb9 in iree::pjrt::LoadedExecutableInstance::BindApi(PJRT_Api*)::$_54::operator()(PJRT_LoadedExecutable_Execute_Args*) const (this=0x7ffde5e7cc60, args=0x7ffde5e7cc60) at iree/integrations/pjrt/common/api_impl.cc:1590
#21 0x00007f534b6a5b85 in iree::pjrt::LoadedExecutableInstance::BindApi(PJRT_Api*)::$_54::__invoke(PJRT_LoadedExecutable_Execute_Args*) (args=0x7ffde5e7cc60) at iree/integrations/pjrt/common/api_impl.cc:1587
#22 0x00007f53c6cf33bb in xla::PjRtCApiLoadedExecutable::Execute(absl::lts_20230125::Span<std::vector<xla::PjRtBuffer*, std::allocator<xla::PjRtBuffer*> > const>, xla::ExecuteOptions const&, std::optional<std::vector<xla::PjRtFuture<absl::lts_20230125::Status>, std::allocator<xla::PjRtFuture<absl::lts_20230125::Status> > > >&) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#23 0x00007f53c92a66dc in xla::ifrt::PjRtLoadedExecutable::Execute(absl::lts_20230125::Span<tsl::RCReference<xla::ifrt::Array> >, xla::ExecuteOptions const&, std::optional<xla::ifrt::DeviceList>) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#24 0x00007f53c6ca320d in absl::lts_20230125::StatusOr<xla::PyExecuteResults> xla::(anonymous namespace)::ExecuteShardedOnLocalDevicesInternal<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, xla::(anonymous namespace)::ShardedBufferAdapter<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >(xla::ExecuteOptions const&, std::shared_ptr<xla::PyClient> const&, xla::ifrt::LoadedExecutable*, absl::lts_20230125::Span<pybind11::capsule const>, absl::lts_20230125::Span<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > const>, std::optional<std::vector<xla::PjRtFuture<absl::lts_20230125::Status>, std::allocator<xla::PjRtFuture<absl::lts_20230125::Status> > > >&) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#25 0x00007f53c6ca5145 in xla::PyLoadedExecutable::ExecuteSharded(std::vector<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, std::allocator<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >, bool) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#26 0x00007f53c6992c94 in pybind11::cpp_function::initialize<xla::ValueOrThrowWrapper<absl::lts_20230125::StatusOr<xla::PyExecuteResults> (std::vector<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, std::allocator<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >, bool), xla::PyLoadedExecutable>, xla::PyExecuteResults, xla::PyLoadedExecutable&, std::vector<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, std::allocator<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >, bool, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, pybind11::arg_v>(xla::ValueOrThrowWrapper<absl::lts_20230125::StatusOr<xla::PyExecuteResults> (std::vector<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, std::allocator<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >, bool), xla::PyLoadedExecutable>&&, xla::PyExecuteResults (*)(xla::PyLoadedExecutable&, std::vector<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > >, std::allocator<std::variant<xla::PyArray, std::vector<xla::PyArray, std::allocator<xla::PyArray> > > > >, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, pybind11::arg_v const&)::{lambda(pybind11::detail::function_call&)#3}::operator()(pybind11::detail::function_call&) const () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#27 0x00007f53c69676e1 in pybind11::cpp_function::dispatcher(_object*, _object*, _object*) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#28 0x0000555dcb03a99e in ?? ()
#29 0x0000555dcb0314ab in _PyObject_MakeTpCall ()
#30 0x0000555dcb048f0b in ?? ()
#31 0x0000555dcb029462 in _PyEval_EvalFrameDefault ()
#32 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#33 0x0000555dcb025af0 in _PyEval_EvalFrameDefault ()
#34 0x0000555dcb030634 in _PyObject_FastCallDictTstate ()
#35 0x0000555dcb045d11 in _PyObject_Call_Prepend ()
#36 0x0000555dcb163610 in ?? ()
#37 0x0000555dcb04987b in PyObject_Call ()
#38 0x0000555dcb025af0 in _PyEval_EvalFrameDefault ()
#39 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#40 0x0000555dcb025af0 in _PyEval_EvalFrameDefault ()
#41 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#42 0x00007f53c6b9603d in jax::PmapFunction::Call(pybind11::handle, _object* const*, unsigned long, _object*) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#43 0x00007f53c6b9683b in JaxPmapFunction_tp_vectorcall () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so
#44 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#45 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#46 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#47 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#48 0x0000555dcb0238cb in _PyEval_EvalFrameDefault ()
#49 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#50 0x0000555dcb0238cb in _PyEval_EvalFrameDefault ()
#51 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#52 0x0000555dcb0238cb in _PyEval_EvalFrameDefault ()
#53 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#54 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#55 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#56 0x0000555dcb0238cb in _PyEval_EvalFrameDefault ()
#57 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#58 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#59 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#60 0x0000555dcb024adf in _PyEval_EvalFrameDefault ()
#61 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#62 0x0000555dcb024adf in _PyEval_EvalFrameDefault ()
#63 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#64 0x0000555dcb024adf in _PyEval_EvalFrameDefault ()
#65 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#66 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#67 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#68 0x0000555dcb023785 in _PyEval_EvalFrameDefault ()
#69 0x0000555dcb03b1ec in _PyFunction_Vectorcall ()
#70 0x0000555dcb024adf in _PyEval_EvalFrameDefault ()
#71 0x0000555dcb01fed6 in ?? ()
#72 0x0000555dcb116366 in PyEval_EvalCode ()
#73 0x0000555dcb143108 in ?? ()
#74 0x0000555dcb13bf5b in ?? ()
#75 0x0000555dcb142e55 in ?? ()
#76 0x0000555dcb142338 in _PyRun_SimpleFileObject ()
#77 0x0000555dcb142033 in _PyRun_AnyFileObject ()
#78 0x0000555dcb1332de in Py_RunMain ()
#79 0x0000555dcb10932d in Py_BytesMain ()
#80 0x00007f53cee6ad90 in __libc_start_call_main (main=main@entry=0x555dcb1092f0, argc=argc@entry=6, argv=argv@entry=0x7ffde5e80b18) at ../sysdeps/nptl/libc_start_call_main.h:58
#81 0x00007f53cee6ae40 in __libc_start_main_impl (main=0x555dcb1092f0, argc=6, argv=0x7ffde5e80b18, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffde5e80b08) at ../csu/libc-start.c:392
#82 0x0000555dcb109225 in _start ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment