Skip to content

Instantly share code, notes, and snippets.

@heiner
Last active March 19, 2019 16:39
Show Gist options
  • Save heiner/e4b3c6ef92ed3c6b15005798f7b8da91 to your computer and use it in GitHub Desktop.
Save heiner/e4b3c6ef92ed3c6b15005798f7b8da91 to your computer and use it in GitHub Desktop.
libtorch Tensor + std::future bug example
$ OMP_NUM_THREADS=1 gdb python3
GNU gdb (Ubuntu 8.1-0ubuntu3) 8.1.0.20180409-git
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from python3...done.
(gdb) run run.py 2
Starting program: /private/home/hnr/.conda/envs/extdev/bin/python3 run.py 2
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7fffa9c32700 (LWP 17667)]
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
consume(): Got future value. All good.
consume(): Future is ready. Should get it quickly now.
^C
Thread 1 "python3" received signal SIGINT, Interrupt.
0x00007ffff7bc39f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x555556c16028)
at ../sysdeps/unix/sysv/linux/futex-internal.h:88
88 ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory.
(gdb) info threads
Id Target Id Frame
* 1 Thread 0x7ffff7fc3740 (LWP 17643) "python3" 0x00007ffff7bc39f3 in futex_wait_cancelable (private=<optimized out>,
expected=0, futex_word=0x555556c16028) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
2 Thread 0x7fffa9c32700 (LWP 17667) "python3" 0x00007ffff7bc3449 in futex_wait (private=<optimized out>,
expected=32767, futex_word=0x555556c1476c) at ../sysdeps/unix/sysv/linux/futex-internal.h:61
(gdb) thread 2
[Switching to thread 2 (Thread 0x7fffa9c32700 (LWP 17667))]
#0 0x00007ffff7bc3449 in futex_wait (private=<optimized out>, expected=32767, futex_word=0x555556c1476c)
at ../sysdeps/unix/sysv/linux/futex-internal.h:61
61 ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory.
(gdb) bt
#0 0x00007ffff7bc3449 in futex_wait (private=<optimized out>, expected=32767, futex_word=0x555556c1476c)
at ../sysdeps/unix/sysv/linux/futex-internal.h:61
#1 futex_wait_simple (private=<optimized out>, expected=32767, futex_word=0x555556c1476c)
at ../sysdeps/nptl/futex-internal.h:135
#2 __pthread_cond_destroy (cond=0x555556c14748) at pthread_cond_destroy.c:54
#3 0x00007fffc6938bbc in std::__future_base::_State_baseV2::~_State_baseV2() ()
from /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/lib/libcaffe2.so
#4 0x00007fffe869ec75 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() ()
from /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#5 0x00007fffa9c41918 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=<optimized out>,
__in_chrg=<optimized out>) at /usr/include/c++/7/bits/shared_ptr_base.h:684
#6 std::__shared_ptr<std::__future_base::_State_baseV2, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (
this=<optimized out>, __in_chrg=<optimized out>) at /usr/include/c++/7/bits/shared_ptr_base.h:1123
#7 std::__shared_ptr<std::__future_base::_State_baseV2, (__gnu_cxx::_Lock_policy)2>::reset (this=<synthetic pointer>)
at /usr/include/c++/7/bits/shared_ptr_base.h:1235
#8 std::__basic_future<std::vector<at::Tensor, std::allocator<at::Tensor> > >::_Reset::~_Reset (
this=<synthetic pointer>, __in_chrg=<optimized out>) at /usr/include/c++/7/future:753
#9 std::future<std::vector<at::Tensor, std::allocator<at::Tensor> > >::get (this=<synthetic pointer>)
at /usr/include/c++/7/future:795
#10 Runner::consume (this=0x555556c16000) at bug.cc:38
#11 0x00007fffa9c4fc77 in pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}::operator()(Runner*) const (c=<optimized out>, __closure=<optimized out>)
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:74
#12 pybind11::detail::argument_loader<Runner*>::call_impl<void, pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&, 0ul, pybind11::gil_scoped_release>(pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&, std::integer_sequence<unsigned long, 0ul>, pybind11::gil_scoped_release&&) (
f=..., this=<optimized out>)
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1931
#13 pybind11::detail::argument_loader<Runner*>::call<void, pybind11::gil_scoped_release, pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&>(pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&) && (f=..., this=0x7fffa9c315f0)
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1913
#14 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}, void, Runner*, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scop---Type <return> to continue, or q <return> to quit---q
Quit
(gdb)
/*
* CXX=c++ python3 setup.py build develop
* Then python run.py
*/
#include <chrono>
#include <deque>
#include <future>
#include <memory>
#include <mutex>
#include <torch/extension.h>
class Runner {
public:
void consume(int loops = 10) {
for (int i = 0; i < loops; ++i) {
std::promise<torch::Tensor> promise;
std::future<torch::Tensor> future = promise.get_future();
{
std::unique_lock<std::mutex> lock(mu_);
promises_.push_back(std::move(promise));
}
can_produce_.notify_one();
std::future_status status = future.wait_for(std::chrono::seconds(2));
AT_ASSERTM(status == std::future_status::ready,
"Future timeout reached.");
std::cout << "consume(): Future is ready. Should get it quickly now."
<< std::endl;
torch::Tensor tensors = future.get();
std::cout << "consume(): Got future value. All good." << std::endl;
}
}
void produce(torch::Tensor tensor) {
std::unique_lock<std::mutex> lock(mu_);
while (promises_.empty()) {
can_produce_.wait(lock);
}
const int batch_size = promises_.size();
for (int b = 0; b < batch_size; ++b) {
promises_[b].set_value(tensor.select(0, b));
}
promises_.erase(promises_.begin(), promises_.begin() + batch_size);
}
private:
std::condition_variable can_produce_;
std::mutex mu_;
std::deque<std::promise<torch::Tensor>> promises_;
};
PYBIND11_MODULE(tensorbug, m) {
py::class_<Runner>(m, "Runner")
.def(py::init<>())
.def("consume", &Runner::consume,
py::call_guard<py::gil_scoped_release>(), py::arg("loops"))
.def("produce", &Runner::produce,
py::call_guard<py::gil_scoped_release>(), py::arg("tensors"));
}
import sys
import threading
import torch
import tensorbug
batch_size = 1
loops = 10
runner = tensorbug.Runner()
consume_thread = threading.Thread(target=runner.consume,
args=(loops,))
consume_thread.start()
for _ in range(loops):
runner.produce(torch.zeros(batch_size, 1))
consume_thread.join()
# Build with
# CXX=c++ python3 setup.py build develop
import setuptools
import sys
from torch.utils import cpp_extension
extra_compile_args = []
extra_link_args = []
if sys.platform == 'darwin':
extra_compile_args += ['-stdlib=libc++', '-mmacosx-version-min=10.12']
extra_link_args += ['-stdlib=libc++']
tensorbug = cpp_extension.CppExtension(
name='tensorbug',
sources=['bug.cc'],
language='c++',
extra_compile_args=['-std=c++17'] + extra_compile_args,
extra_link_args=extra_link_args,
)
setuptools.setup(
name='tensorbug',
ext_modules=[tensorbug],
cmdclass={'build_ext': cpp_extension.BuildExtension})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment