Last active
April 13, 2017 21:55
-
-
Save hppritcha/a14773ebfa29e20da04a4163cef61779 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 2a654e59ff06657d8991eb15d727b3488d3cb4c6 Mon Sep 17 00:00:00 2001 | |
From: Howard Pritchard <howardp@lanl.gov> | |
Date: Tue, 11 Apr 2017 14:38:40 -0500 | |
Subject: [PATCH] fix usage of PMI2_Info_GetNodeAttr/PutNodeAttr | |
When using the CH4/OFI device, there are two separate | |
shared memory regions set up, both relying on the | |
PMI2_Info_GetNodeAttr/PutNodeAttr of the same key | |
somehow working. At least on Cray it wasn't. The | |
non-local-rank zero processes were seeing the old | |
value of "sharedFilename" that had been set in | |
the CH4/OFI devic setup when using PMI2. The subsequent | |
creation/attach of the main shared memory segment in | |
MPIDI_POSIX_mpi_init_hook would fail. | |
This commit fixes this problem. | |
Signed-off-by: Howard Pritchard <howardp@lanl.gov> | |
diff --git a/src/mpid/common/shm/mpidu_shm_alloc.c b/src/mpid/common/shm/mpidu_shm_alloc.c | |
index f3e83118..22538d9f 100644 | |
--- a/src/mpid/common/shm/mpidu_shm_alloc.c | |
+++ b/src/mpid/common/shm/mpidu_shm_alloc.c | |
@@ -147,6 +147,10 @@ int MPIDU_shm_seg_commit(MPIDU_shm_seg_t *memory, MPIDU_shm_barrier_t **barrier, | |
char *serialized_hnd = NULL; | |
void *current_addr; | |
void *start_addr ATTRIBUTE((unused)); | |
+#ifdef USE_PMI2_API | |
+ char attrname[512]; | |
+ static int call_count = 0; | |
+#endif | |
size_t size_left; | |
MPIR_CHKPMEM_DECL(1); | |
MPIR_CHKLMEM_DECL(2); | |
@@ -196,6 +200,8 @@ int MPIDU_shm_seg_commit(MPIDU_shm_seg_t *memory, MPIDU_shm_barrier_t **barrier, | |
memory->segment_len = segment_len; | |
#ifdef USE_PMI2_API | |
+ MPL_snprintf(attrname, sizeof(attrname), "sharedFileName.%d", call_count); | |
+ call_count++; | |
/* if there is only one process on this processor, don't use shared memory */ | |
if (num_local == 1) | |
{ | |
@@ -246,7 +252,7 @@ int MPIDU_shm_seg_commit(MPIDU_shm_seg_t *memory, MPIDU_shm_barrier_t **barrier, | |
* serializing operation with our peers on the local node this | |
* ensures that these initializations have occurred before any peer | |
* attempts to use the resources. */ | |
- mpi_errno = PMI2_Info_PutNodeAttr("sharedFilename", serialized_hnd); | |
+ mpi_errno = PMI2_Info_PutNodeAttr(attrname, serialized_hnd); | |
if (mpi_errno) MPIR_ERR_POP(mpi_errno); | |
} else { | |
int found = FALSE; | |
@@ -255,7 +261,7 @@ int MPIDU_shm_seg_commit(MPIDU_shm_seg_t *memory, MPIDU_shm_barrier_t **barrier, | |
MPIR_CHKLMEM_MALLOC(val, char *, PMI2_MAX_VALLEN, mpi_errno, "val"); | |
/* get name of shared file */ | |
- mpi_errno = PMI2_Info_GetNodeAttr("sharedFilename", val, PMI2_MAX_VALLEN, &found, TRUE); | |
+ mpi_errno = PMI2_Info_GetNodeAttr(attrname, val, PMI2_MAX_VALLEN, &found, TRUE); | |
if (mpi_errno) MPIR_ERR_POP(mpi_errno); | |
MPIR_ERR_CHKINTERNAL(!found, mpi_errno, "nodeattr not found"); | |
-- | |
1.7.12.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment