Created
April 29, 2017 01:03
-
-
Save planetA/cc0cbf7cecec56049c79d7d8d53f7d5d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[11908] NOTE at dmtcp_coordinator.cpp:855 in onConnect; REASON='worker connected' | |
hello_remote.from = 55b24a7c80853818-30373-22c2200d527e | |
[11908] NOTE at dmtcp_coordinator.cpp:855 in onConnect; REASON='worker connected' | |
hello_remote.from = 55b24a7c80853818-30374-22c2202e8d96 | |
[11908] NOTE at dmtcp_coordinator.cpp:855 in onConnect; REASON='worker connected' | |
hello_remote.from = 55b24a7c80853818-30376-22c2205c4835 | |
[11908] NOTE at dmtcp_coordinator.cpp:643 in onData; REASON='Updating process Information after exec()' | |
progname = simple | |
msg.from = 55b24a7c80853818-221000-22c220b60ada | |
client->identity() = 55b24a7c80853818-30374-22c2202e8d96 | |
[11908] NOTE at dmtcp_coordinator.cpp:855 in onConnect; REASON='worker connected' | |
hello_remote.from = 55b24a7c80853818-30379-22c22079bf81 | |
[11908] NOTE at dmtcp_coordinator.cpp:643 in onData; REASON='Updating process Information after exec()' | |
progname = simple | |
msg.from = 55b24a7c80853818-220000-22c220dd7c92 | |
client->identity() = 55b24a7c80853818-30373-22c2200d527e | |
[11908] NOTE at dmtcp_coordinator.cpp:643 in onData; REASON='Updating process Information after exec()' | |
progname = simple | |
msg.from = 55b24a7c80853818-222000-22c220e595fb | |
client->identity() = 55b24a7c80853818-30376-22c2205c4835 | |
[11908] NOTE at dmtcp_coordinator.cpp:643 in onData; REASON='Updating process Information after exec()' | |
progname = simple | |
msg.from = 55b24a7c80853818-223000-22c223c0d3c2 | |
client->identity() = 55b24a7c80853818-30379-22c22079bf81 | |
[11908] NOTE at dmtcp_coordinator.cpp:693 in onDisconnect; REASON='client disconnected' | |
client->identity() = 55b24a7c80853818-221000-22c220b60ada | |
client->progname() = simple | |
[11908] NOTE at dmtcp_coordinator.cpp:1071 in startCheckpoint; REASON='starting checkpoint, suspending all nodes' | |
s.numPeers = 3 | |
[11908] NOTE at dmtcp_coordinator.cpp:1073 in startCheckpoint; REASON='Incremented computationGeneration' | |
compId.computationGeneration() = 1 | |
[11908] NOTE at dmtcp_coordinator.cpp:693 in onDisconnect; REASON='client disconnected' | |
client->identity() = 55b24a7c80853818-222000-22c220e595fb | |
client->progname() = simple | |
[11908] NOTE at dmtcp_coordinator.cpp:413 in updateMinimumState; REASON='locking all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:419 in updateMinimumState; REASON='draining all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:425 in updateMinimumState; REASON='checkpointing all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:449 in updateMinimumState; REASON='building name service database' | |
[11908] NOTE at dmtcp_coordinator.cpp:465 in updateMinimumState; REASON='entertaining queries now' | |
[11908] NOTE at dmtcp_coordinator.cpp:470 in updateMinimumState; REASON='refilling all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:510 in updateMinimumState; REASON='restarting all nodes' | |
JTIMER(checkpoint) : 0.496187 | |
[11908] NOTE at dmtcp_coordinator.cpp:693 in onDisconnect; REASON='client disconnected' | |
client->identity() = 55b24a7c80853818-223000-22c223c0d3c2 | |
client->progname() = simple | |
[11908] NOTE at dmtcp_coordinator.cpp:1071 in startCheckpoint; REASON='starting checkpoint, suspending all nodes' | |
s.numPeers = 1 | |
[11908] NOTE at dmtcp_coordinator.cpp:1073 in startCheckpoint; REASON='Incremented computationGeneration' | |
compId.computationGeneration() = 2 | |
[11908] NOTE at dmtcp_coordinator.cpp:413 in updateMinimumState; REASON='locking all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:419 in updateMinimumState; REASON='draining all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:425 in updateMinimumState; REASON='checkpointing all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:449 in updateMinimumState; REASON='building name service database' | |
[11908] NOTE at dmtcp_coordinator.cpp:465 in updateMinimumState; REASON='entertaining queries now' | |
[11908] NOTE at dmtcp_coordinator.cpp:470 in updateMinimumState; REASON='refilling all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:510 in updateMinimumState; REASON='restarting all nodes' | |
JTIMER(checkpoint) : 0.459036 | |
[11908] NOTE at dmtcp_coordinator.cpp:1071 in startCheckpoint; REASON='starting checkpoint, suspending all nodes' | |
s.numPeers = 1 | |
[11908] NOTE at dmtcp_coordinator.cpp:1073 in startCheckpoint; REASON='Incremented computationGeneration' | |
compId.computationGeneration() = 3 | |
[11908] NOTE at dmtcp_coordinator.cpp:413 in updateMinimumState; REASON='locking all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:419 in updateMinimumState; REASON='draining all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:425 in updateMinimumState; REASON='checkpointing all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:449 in updateMinimumState; REASON='building name service database' | |
[11908] NOTE at dmtcp_coordinator.cpp:465 in updateMinimumState; REASON='entertaining queries now' | |
[11908] NOTE at dmtcp_coordinator.cpp:470 in updateMinimumState; REASON='refilling all nodes' | |
[11908] NOTE at dmtcp_coordinator.cpp:510 in updateMinimumState; REASON='restarting all nodes' | |
JTIMER(checkpoint) : 0.467209 | |
[11908] NOTE at dmtcp_coordinator.cpp:693 in onDisconnect; REASON='client disconnected' | |
client->identity() = 55b24a7c80853818-220000-22c220dd7c92 | |
client->progname() = simple |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <unistd.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <string.h> | |
#include <dmtcp.h> | |
#define CRASH(format, ...) ({ \ | |
fprintf(stderr, "DMTCPTESTEND:DMTCPCRASH:%s:%d " format, \ | |
__FILE__, __LINE__, ##__VA_ARGS__); \ | |
exit(EXIT_FAILURE); \ | |
}) | |
#define OK(format, ...) ({ \ | |
fprintf(stderr, "DMTCPTESTEND:DMTCPOK:%s:%d " format, \ | |
__FILE__, __LINE__, ##__VA_ARGS__); \ | |
exit(EXIT_SUCCESS); \ | |
}) | |
/* By default assume that we are the only process */ | |
int rank = 0; | |
void parse_args(int argc, char ** argv) | |
{ | |
char *rank_str = getenv(MPIRANK); | |
if (rank_str != NULL) { | |
rank = atoi(rank_str); | |
assert(rank >= 0); | |
} | |
} | |
int main(int argc, char **argv) | |
{ | |
if (dmtcp_is_enabled() == 0) { | |
CRASH("dmtcpcheckpoint: DMTCP not enabled.\n"); | |
} | |
parse_args(argc, argv); | |
printf("My rank is %d\n", rank); | |
for (int i = 0; i < 3; i++) { | |
printf("Going sleep\n"); | |
/* sleep(1); */ | |
printf("Woke up. Making checkpoint. %d\n", rank); | |
fflush(stdout); | |
if (rank) | |
continue; | |
sleep(1); | |
printf("Entering ckpt. %d\n", rank); | |
if (dmtcp_checkpoint() == DMTCP_NOT_PRESENT) { | |
CRASH("DMTCP not present.\n"); | |
} | |
printf("Woke up. %d\n", rank); | |
} | |
printf("Preparing to leave the program\n"); | |
/* Comment this out */ | |
sleep(1); | |
printf("Leaving the program: %d\n", rank); | |
OK("Normal exit\n"); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ mpirun -n 4 --oversubscribe -tag-output ../dmtcp/bin/dmtcp_launch --coord-host localhost --coord-port 7779 ./simple checkpoint | |
[1,1]<stdout>:My rank is 1 | |
[1,1]<stdout>:Going sleep | |
[1,1]<stdout>:Woke up. Making checkpoint. 1 | |
[1,1]<stdout>:Going sleep | |
[1,1]<stdout>:Woke up. Making checkpoint. 1 | |
[1,1]<stdout>:Going sleep | |
[1,1]<stdout>:Woke up. Making checkpoint. 1 | |
[1,1]<stdout>:Preparing to leave the program | |
[1,0]<stdout>:My rank is 0 | |
[1,0]<stdout>:Going sleep | |
[1,0]<stdout>:Woke up. Making checkpoint. 0 | |
[1,2]<stdout>:My rank is 2 | |
[1,2]<stdout>:Going sleep | |
[1,2]<stdout>:Woke up. Making checkpoint. 2 | |
[1,2]<stdout>:Going sleep | |
[1,2]<stdout>:Woke up. Making checkpoint. 2 | |
[1,2]<stdout>:Going sleep | |
[1,2]<stdout>:Woke up. Making checkpoint. 2 | |
[1,2]<stdout>:Preparing to leave the program | |
[1,3]<stdout>:My rank is 3 | |
[1,3]<stdout>:Going sleep | |
[1,3]<stdout>:Woke up. Making checkpoint. 3 | |
[1,3]<stdout>:Going sleep | |
[1,3]<stdout>:Woke up. Making checkpoint. 3 | |
[1,3]<stdout>:Going sleep | |
[1,3]<stdout>:Woke up. Making checkpoint. 3 | |
[1,3]<stdout>:Preparing to leave the program | |
[1,1]<stdout>:Leaving the program: 1 | |
[1,1]<stderr>:DMTCPTESTEND:DMTCPOK:simple.c:61 Normal exit | |
[1,0]<stdout>:Entering ckpt. 0 | |
[1,2]<stdout>:Leaving the program: 2 | |
[1,2]<stderr>:DMTCPTESTEND:DMTCPOK:simple.c:61 Normal exit | |
[1,3]<stdout>:Leaving the program: 3 | |
[1,0]<stdout>:Woke up. 0 | |
[1,0]<stdout>:Going sleep | |
[1,3]<stderr>:DMTCPTESTEND:DMTCPOK:simple.c:61 Normal exit | |
[1,0]<stdout>:Woke up. Making checkpoint. 0 | |
[1,0]<stdout>:Entering ckpt. 0 | |
[1,0]<stdout>:Woke up. 0 | |
[1,0]<stdout>:Going sleep | |
[1,0]<stdout>:Woke up. Making checkpoint. 0 | |
[1,0]<stdout>:Entering ckpt. 0 | |
[1,0]<stdout>:Woke up. 0 | |
[1,0]<stdout>:Preparing to leave the program | |
[1,0]<stderr>:DMTCPTESTEND:DMTCPOK:simple.c:61 Normal exit | |
[1,0]<stdout>:Leaving the program: 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ruseykin2@gmail.com