Skip to content

Instantly share code, notes, and snippets.

@planetA
Created August 22, 2017 15:51
Show Gist options
  • Save planetA/d4f9b9a70983e9c673521625cc7ed2d9 to your computer and use it in GitHub Desktop.
Save planetA/d4f9b9a70983e9c673521625cc7ed2d9 to your computer and use it in GitHub Desktop.
Log from coordinator
[15886] NOTE at dmtcp_coordinator.cpp:757 in initializeComputation; REASON='Resetting computation'
[15886] NOTE at dmtcp_coordinator.cpp:975 in validateRestartingWorkerProcess; REASON='FIRST dmtcp_restart connection. Set numPeers. Generate timestamp'
numPeers = 8
curTimeStamp = 4844149844226136
compId = 4b3242429168a16a-40000-113564b9a89a88
[15886] NOTE at dmtcp_coordinator.cpp:1329 in updateCheckpointInterval; REASON='CheckpointInterval updated (for this computation only)'
oldInterval = 0
theCheckpointInterval = 0
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a16a-41000-113564bbb4abc2
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a16a-40000-113564bbe6f584
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a168-46000-113565e83fb84d
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a168-47000-113565e83fb84a
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a01e-43000-11356622bfe25b
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a01e-42000-11356622bfcf1a
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a01f-45000-113564bdaeb90a
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:900 in onConnect; REASON='worker connected'
hello_remote.from = 4b3242429168a01f-44000-113564bdaeb90b
client->progname() = cp2k.popt
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a168-46000-113565e83fb84d
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a16a-41000-113564bbb4abc2
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a01e-43000-11356622bfe25b
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a01f-44000-113564bdaeb90b
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a01e-42000-11356622bfcf1a
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a168-47000-113565e83fb84a
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a01f-45000-113564bdaeb90a
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:589 in onData; REASON='got DMT_BARRIER_LIST message'
msg.from = 4b3242429168a16a-40000-113564bbe6f584
extraData = infiniband::checkpoint,socket::CKPT_REGISTER_PEER_INFO,socket::CKPT_RETRIEVE_PEER_INFO;DMTCP::RESTART,socket::RESTART_NS_SEND_QUERIES,infiniband::restart,infiniband::restart_nameservice_register_data,infiniband::restart_nameservice_send_queries,infiniband::restart_refill
client->state() = WorkerState::RESTARTING
[15886] NOTE at dmtcp_coordinator.cpp:476 in updateMinimumState; REASON='Releasing next restart barrier'
restartBarriers[nextRestartBarrier] = DMTCP::RESTART
[15886] NOTE at dmtcp_coordinator.cpp:728 in onDisconnect; REASON='client disconnected'
client->identity() = 4b3242429168a168-47000-113565e83fb84a
client->progname() = cp2k.popt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment