Last active
August 29, 2015 14:07
-
-
Save rukletsov/3f19ecc7389fa51e65c0 to your computer and use it in GitHub Desktop.
Task force shutdown test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This test runs two long-living tasks, one responsive to SIGTERM and | |
// one not, with a small graceful shutdown period. The tasks are unable | |
// to shutdown in the given timeout and are killed by the executor. | |
TEST_F(SlaveTest, MesosExecutorForceShutdown) | |
{ | |
Try<PID<Master>> master = StartMaster(); | |
ASSERT_SOME(master); | |
// Make the grace period not bigger than the reap interval. | |
// TODO(alex): Use libprocess constant once it's available. | |
auto minReapInterval = Milliseconds(100); | |
slave::Flags flags = CreateSlaveFlags(); | |
flags.executor_shutdown_grace_period = minReapInterval; | |
// Ensure escalation timeout is less than 100ms (minimal reap interval). | |
auto timeout = mesos::internal::slave::adjustCommandExecutorShutdownTimeout( | |
flags.executor_shutdown_grace_period); | |
EXPECT_GT(minReapInterval, timeout); | |
Try<MesosContainerizer*> containerizer = MesosContainerizer::create( | |
flags, true); | |
ASSERT_SOME(containerizer); | |
Try<PID<Slave>> slave = StartSlave(containerizer.get(), flags); | |
ASSERT_SOME(slave); | |
MockScheduler sched; | |
MesosSchedulerDriver driver( | |
&sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); | |
EXPECT_CALL(sched, registered(&driver, _, _)) | |
.Times(1); | |
Future<vector<Offer>> offers; | |
EXPECT_CALL(sched, resourceOffers(&driver, _)) | |
.WillOnce(FutureArg<1>(&offers)) | |
.WillRepeatedly(Return()); // Ignore subsequent offers. | |
driver.start(); | |
AWAIT_READY(offers); | |
EXPECT_FALSE(offers.get().empty()); | |
auto offer = offers.get()[0]; | |
// Create one task responsive to SIGTERM and one that is not. | |
TaskInfo taskResponsive = createTask( | |
offer.slave_id(), | |
Resources::parse("cpus:0.1;mem:64").get(), | |
"sleep 1000"); | |
TaskInfo taskHanging = createTask( | |
offer.slave_id(), | |
Resources::parse("cpus:0.1;mem:64").get(), | |
"( handler() { echo SIGTERM; }; trap \'handler TERM\' SIGTERM; echo $$; " | |
"echo $(which sleep); while true; do date; sleep 1; done; exit 0 )"); | |
EXPECT_LE(taskResponsive.resources() + taskHanging.resources(), | |
offer.resources()); | |
vector<TaskInfo> tasks; | |
tasks.push_back(taskResponsive); | |
tasks.push_back(taskHanging); | |
// Separate statusUpdate() calls for responsive and hanging tasks. | |
Future<TaskStatus> taskResponsiveRunning, taskResponsiveKilled; | |
auto updateForTaskResponsive = lambda::bind( | |
&isStatusRelatedToTask, lambda::_1, taskResponsive.task_id()); | |
EXPECT_CALL(sched, statusUpdate(&driver, Truly(updateForTaskResponsive))) | |
.WillOnce(FutureArg<1>(&taskResponsiveRunning)) | |
.WillOnce(FutureArg<1>(&taskResponsiveKilled)); | |
Future<TaskStatus> taskHangingRunning, taskHangingKilled; | |
auto updateForTaskHanging = lambda::bind( | |
&isStatusRelatedToTask, lambda::_1, taskHanging.task_id()); | |
EXPECT_CALL(sched, statusUpdate(&driver, Truly(updateForTaskHanging))) | |
.WillOnce(FutureArg<1>(&taskHangingRunning)) | |
.WillOnce(FutureArg<1>(&taskHangingKilled)); | |
driver.launchTasks(offer.id(), tasks); | |
AWAIT_READY(taskResponsiveRunning); | |
EXPECT_EQ(TASK_RUNNING, taskResponsiveRunning.get().state()); | |
AWAIT_READY(taskHangingRunning); | |
EXPECT_EQ(TASK_RUNNING, taskHangingRunning.get().state()); | |
driver.killTask(taskResponsive.task_id()); | |
driver.killTask(taskHanging.task_id()); | |
AWAIT_READY(taskResponsiveKilled); | |
EXPECT_EQ(TASK_KILLED, taskResponsiveKilled.get().state()); | |
AWAIT_READY(taskHangingKilled); | |
EXPECT_EQ(TASK_KILLED, taskHangingKilled.get().state()); | |
// If the task doesn't react to SIGTERM in a certain timeout, | |
// CommandExecutor sends a SIGKILL. | |
// NOTE: strsignal() behaves differently on Mac OS and Linux. | |
// TODO(alex): By now we have no better way to extract the kill | |
// reason. Change this once we have level 2 enums for task states. | |
EXPECT_EQ(true, taskHangingKilled.get().has_message()); | |
EXPECT_EQ("Command terminated with signal Killed: 9", | |
taskHangingKilled.get().message()); | |
// NOTE: The task may have reacted to SIGTERM, but may not have been | |
// yet reaped due to insufficient timeout. In this case we still send | |
// a SIGKILL, but the task may terminate with either signal. For this | |
// reason we don't check the taskResponsiveKilled.get().message(). | |
driver.stop(); | |
driver.join(); | |
this->Shutdown(); | |
delete containerizer.get(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment