Skip to content

Instantly share code, notes, and snippets.

@sourcedelica
Last active September 2, 2017 05:06
Show Gist options
  • Save sourcedelica/7d81b085608a69556b902a90ea2340ad to your computer and use it in GitHub Desktop.
Save sourcedelica/7d81b085608a69556b902a90ea2340ad to your computer and use it in GitHub Desktop.
Reproduce hang during ~actor_system
GDB backtrace of all threads after hang
(gdb) thread apply all bt
Thread 9 (Thread 0x7ffff3748700 (LWP 30345)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007ffff7b0891c in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x000000000051b743 in caf::detail::private_thread::await_self_destroyed (this=this@entry=0x7d1790) at /home/eric/.conan/data/caf/0.15.2/sourcedelica/testing/build/b1c1bba64e5b990636f4bcca97c3f5313c24bc3c/actor-framework/libcaf_core/src/private_thread.cpp:106
#3 0x000000000051b7d1 in caf::detail::private_thread::exec (this_ptr=0x7d1790) at /home/eric/.conan/data/caf/0.15.2/sourcedelica/testing/build/b1c1bba64e5b990636f4bcca97c3f5313c24bc3c/actor-framework/libcaf_core/src/private_thread.cpp:90
#4 0x00007ffff7b0dc80 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5 0x00007ffff76296ba in start_thread (arg=0x7ffff3748700) at pthread_create.c:333
#6 0x00007ffff735f82d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
Thread 1 (Thread 0x7ffff7fe9740 (LWP 30337)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007ffff7b0891c in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x00000000004bb3cb in caf::actor_system::await_detached_threads (this=this@entry=0x7fffffffd260) at /home/eric/.conan/data/caf/0.15.2/sourcedelica/testing/build/b1c1bba64e5b990636f4bcca97c3f5313c24bc3c/actor-framework/libcaf_core/src/actor_system.cpp:370
#3 0x00000000004bb5c7 in caf::actor_system::~actor_system (this=0x7fffffffd260, __in_chrg=<optimized out>) at /home/eric/.conan/data/caf/0.15.2/sourcedelica/testing/build/b1c1bba64e5b990636f4bcca97c3f5313c24bc3c/actor-framework/libcaf_core/src/actor_system.cpp:278
#4 0x0000000000408354 in main ()
cmake_minimum_required(VERSION 3.5)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(SOURCES ServerMonitor.cpp ServerMonitorTest.cpp)
set(CAF_ROOT "/usr/local/caf" CACHE PATH "CAF install directory")
set(CAF_INC ${CAF_ROOT}/include)
set(CAF_LIB ${CAF_ROOT}/lib)
set(CAF_LIBS
${CAF_LIB}/libcaf_io_static.a
${CAF_LIB}/libcaf_core_static.a)
include_directories(${CAF_INC})
link_libraries(${CAF_LIBS} -pthread)
add_executable(smt ${SOURCES})
1. Download zip of this gist and unzip on Linux
2. cmake . -DCAF_ROOT=/path/to/caf/install . # Reproduced in CAF 0.15.1 through 0.15.3
3. make
4. gdb smt
5. Run the following commands to run the program repeatedly until it hangs:
set pagination off
break exit
commands
run
end
run
#include "ServerMonitor.h"
#include "caf/actor_ostream.hpp"
#include "caf/io/middleman.hpp"
#include "caf/to_string.hpp"
caf::behavior ServerMonitor::make_behavior() {
set_down_handler([=](const caf::down_msg &dm) { downHandler(dm); });
tryConnect();
return monitorBehavior();
}
void ServerMonitor::downHandler(const caf::down_msg &dm) {
caf::aout(this) << "Lost connection to " << hostname << ":" << port << std::endl;
send(parent, down_atom::value, caf::actor_cast<caf::strong_actor_ptr>(dm.source));
tryConnect();
}
// BUG IN MY CODE: this should do a delayed_send after the first attempt
void ServerMonitor::tryConnect() {
send(this, connect_atom::value);
}
caf::behavior ServerMonitor::monitorBehavior() {
return {
[=](connect_atom) {
caf::aout(this) << "Attempting connection to " << hostname << ":" << port << std::endl;
auto mm = system().middleman().actor_handle();
request(mm, ConnectTimeout, caf::connect_atom::value, hostname, port).then(
[=](const caf::node_id& node, caf::strong_actor_ptr actorPtr, const std::set<std::string>&) {
if (!actorPtr || node == lastNode) {
caf::aout(this) << "No server found at " << hostname << ":" << port << std::endl;
tryConnect();
} else {
caf::aout(this) << "Connected to " << hostname << ":" << port << std::endl;
lastNode = node;
monitor(actorPtr);
send(parent, up_atom::value, actorPtr);
}
},
[=](const caf::error &err) {
caf::aout(this) << "Cannot connect to " << hostname << ":" << port << std::endl;
tryConnect();
}
);
}
};
}
#pragma once
#include "caf/fwd.hpp"
#include "caf/event_based_actor.hpp"
class ServerMonitor : public caf::event_based_actor {
public:
using down_atom = caf::atom_constant<caf::atom("down")>;
using up_atom = caf::atom_constant<caf::atom("up")>;
using failed_atom = caf::atom_constant<caf::atom("failed")>;
ServerMonitor(
caf::actor_config &cfg,
const std::string &hostname,
const uint16_t port,
const caf::actor &parent)
: caf::event_based_actor(cfg),
hostname(hostname),
port(port),
parent(parent) {}
caf::behavior make_behavior() override;
private:
const std::string hostname;
const uint16_t port;
const caf::actor parent;
caf::strong_actor_ptr server = nullptr;
caf::node_id lastNode;
using connect_atom = caf::atom_constant<caf::atom("connect")>;
const std::chrono::seconds ConnectTimeout{1};
const std::chrono::seconds RetryInterval{2};
void downHandler(const caf::down_msg &dm);
caf::behavior monitorBehavior();
void tryConnect();
};
#include <thread>
#include <iostream>
#include <future>
#include <cassert>
#include "caf/atom.hpp"
#include "caf/actor_system.hpp"
#include "caf/actor_system_config.hpp"
#include "caf/scoped_actor.hpp"
#include "caf/io/middleman.hpp"
#include "caf/actor_ostream.hpp"
#include "ServerMonitor.h"
namespace {
const std::chrono::seconds ReceiveTimeout{4};
template <typename T>
bool receiveServerMonitor(caf::scoped_actor &parent) {
std::atomic_bool success{false};
parent->receive(
[&](T, caf::strong_actor_ptr &ptr) { success = true; },
[&](T) { success = true; },
caf::after(ReceiveTimeout) >> [&]{}
);
return success;
}
caf::behavior serverBehavior(caf::event_based_actor *self) {
return {
[=](const std::string &s) { return s; }
};
}
void startServerSystem(uint16_t port, std::promise<caf::expected<uint16_t>> &portPromise,
std::promise<void> &stopPromise) {
std::cout << "Server thread starting, requested port=" << port << std::endl;
caf::actor_system_config config;
config.load<caf::io::middleman>();
caf::actor_system system(config);
auto server = system.spawn(serverBehavior);
auto portExp = system.middleman().publish(server, port, nullptr, true);
portPromise.set_value(portExp);
if (portExp) {
std::cout << "Server actor published on port " << portExp.value() << std::endl;
stopPromise.get_future().wait();
}
caf::anon_send_exit(server, caf::exit_reason::user_shutdown);
system.await_all_actors_done();
std::cout << "Server thread complete" << std::endl;
};
uint16_t startServerThread(uint16_t port, std::thread &serverThread,
std::promise<caf::expected<uint16_t>> &portPromise, std::promise<void> &stopPromise) {
portPromise = std::move(std::promise<caf::expected<uint16_t>>{});
stopPromise = std::move(std::promise<void>{});
serverThread = std::move(std::thread{startServerSystem, port, std::ref(portPromise), std::ref(stopPromise)});
auto portExp = portPromise.get_future().get();
assert(portExp);
return portExp.value();
}
}
int main(int argc, char **argv) {
caf::actor_system_config config;
config.load<caf::io::middleman>();
caf::actor_system system{config};
caf::scoped_actor parent{system};
caf::actor serverActor;
std::thread serverThread;
std::promise<caf::expected<uint16_t>> portPromise;
std::promise<void> stopPromise;
auto port = startServerThread(0, serverThread, portPromise, stopPromise);
auto serverMonitor = system.spawn<ServerMonitor>("localhost", port, parent);
auto guard = caf::detail::make_scope_guard([&] {
anon_send_exit(serverMonitor, caf::exit_reason::user_shutdown);
});
assert(receiveServerMonitor<ServerMonitor::up_atom>(parent));
stopPromise.set_value();
serverThread.join();
assert(receiveServerMonitor<ServerMonitor::down_atom>(parent));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment