Skip to content

Instantly share code, notes, and snippets.

@planetA
Created April 28, 2017 14:16
Show Gist options
  • Save planetA/a4465558bfd0e16c52b686320676b0cd to your computer and use it in GitHub Desktop.
Save planetA/a4465558bfd0e16c52b686320676b0cd to your computer and use it in GitHub Desktop.
Minimal example for race condition
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <dmtcp.h>
#define CRASH(format, ...) ({ \
fprintf(stderr, "DMTCPTESTEND:DMTCPCRASH:%s:%d " format, \
__FILE__, __LINE__, ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
})
#define OK(format, ...) ({ \
fprintf(stderr, "DMTCPTESTEND:DMTCPOK:%s:%d " format, \
__FILE__, __LINE__, ##__VA_ARGS__); \
exit(EXIT_SUCCESS); \
})
/* By default assume that we are the only process */
int rank = 0;
void parse_args(int argc, char ** argv)
{
char *rank_str = getenv(MPIRANK);
if (rank_str != NULL) {
rank = atoi(rank_str);
assert(rank >= 0);
}
}
int main(int argc, char **argv)
{
if (dmtcp_is_enabled() == 0) {
CRASH("dmtcpcheckpoint: DMTCP not enabled.\n");
}
parse_args(argc, argv);
printf("My rank is %d\n", rank);
for (int i = 0; i < 3; i++) {
printf("Going sleep\n");
sleep(1);
printf("Woke up. Making checkpoint.\n");
fflush(stdout);
if (!rank)
break;
if (dmtcp_checkpoint() == DMTCP_NOT_PRESENT) {
CRASH("DMTCP not present.\n");
}
}
sleep(2);
sleep(2);
sleep(2);
sleep(2);
sleep(2);
OK("Normal exit\n");
}
@rohgarg
Copy link

rohgarg commented Apr 28, 2017

@planetA: Here are the modifications I made to test this locally.

diff --git a/simple.c b/simple.c
index 591f43f..5ccc86f 100644
--- a/simple.c
+++ b/simple.c
@@ -4,6 +4,7 @@
 #include <assert.h>
 #include <string.h>
 
+#include <mpi.h>
 #include <dmtcp.h>
 
 #define CRASH(format, ...) ({                                           \
@@ -21,23 +22,15 @@
 /* By default assume that we are the only process */
 int rank = 0;
 
-void parse_args(int argc, char ** argv)
-{
-  char *rank_str = getenv(MPIRANK);
-  if (rank_str != NULL) {
-      rank = atoi(rank_str);
-      assert(rank >= 0);
-  }
-}
-
 int main(int argc, char **argv)
 {
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
   if (dmtcp_is_enabled() == 0) {
     CRASH("dmtcpcheckpoint:  DMTCP not enabled.\n");
   }
 
-  parse_args(argc, argv);
-
   printf("My rank is %d\n", rank);
   for (int i = 0; i < 3; i++) {
     printf("Going sleep\n");
@@ -45,16 +38,14 @@ int main(int argc, char **argv)
     printf("Woke up. Making checkpoint.\n");
     fflush(stdout);
     if (!rank)
-      break;
+      continue;
     if (dmtcp_checkpoint() == DMTCP_NOT_PRESENT) {
       CRASH("DMTCP not present.\n");
     }
   }
 
-  sleep(2);
-  sleep(2);
-  sleep(2);
-  sleep(2);
-  sleep(2);
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  MPI_Finalize();
   OK("Normal exit\n");
 }

Your example works after these changes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment