Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hppritcha/c0b074ff9f985b0ab0f0 to your computer and use it in GitHub Desktop.
Save hppritcha/c0b074ff9f985b0ab0f0 to your computer and use it in GitHub Desktop.
cray pmi patch for mpich
From eb7f2fa4d316300c9894b538db4ec7eb49965670 Mon Sep 17 00:00:00 2001
From: Howard Pritchard <howardp@lanl.gov>
Date: Sun, 3 Jan 2016 11:37:03 -0600
Subject: [PATCH] netmod/ofi: changes to be able to use cray pmi
The Cray pmi has some weirdnesses. It supports
the PMI2 interfaces, but then does things like
doesn't define a PMI2_SUCCESS, nor have a pmi2.h
include file.
Add workarounds to the ofi netmod to allow it
to function when using Cray aprun or native SLURM
srun.
Signed-off-by: Howard Pritchard <howardp@lanl.gov>
Conflicts:
src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
---
src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h | 8 +++++++-
src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c | 21 ++++++++++++++++++---
2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index c780bc06..51e2c18d 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -12,7 +12,13 @@
#include "mpid_nem_impl.h"
#include "mpihandlemem.h"
+#ifdef USE_PMI2_API
+#include "pmi2.h"
+#define PMIX_SUCCESS PMI2_SUCCESS
+#else
#include "pmi.h"
+#define PMIX_SUCCESS PMI_SUCCESS
+#endif
#include <rdma/fabric.h>
#include <rdma/fi_errno.h>
#include <rdma/fi_endpoint.h>
@@ -190,7 +196,7 @@ fn_fail: \
do \
{ \
pmi_errno = FUNC; \
- MPIR_ERR_##CHKANDJUMP4(pmi_errno!=PMI_SUCCESS, \
+ MPIR_ERR_##CHKANDJUMP4(pmi_errno!=PMIX_SUCCESS, \
mpi_errno, \
MPI_ERR_OTHER, \
"**ofi_"#STR, \
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index c778af89..ec980e8f 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -42,7 +42,7 @@ cvars:
#define FCNAME DECL_FUNC(MPID_nem_ofi_init)
int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
{
- int ret, fi_version, i, len, pmi_errno;
+ int ret, fi_version, i, len, pmi_errno, vallen;
int mpi_errno = MPI_SUCCESS;
info_t *hints, *prov_tagged, *prov_use;
cq_attr_t cq_attr;
@@ -222,11 +222,17 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
/* Publish the business card */
/* to the KVS */
/* -------------------------------- */
+#ifndef USE_PMI2_API
PMI_RC(PMI_KVS_Get_my_name(kvsname, OFI_KVSAPPSTRLEN), pmi);
+#endif
sprintf(key, "OFI-%d", pg_rank);
- PMI_RC(PMI_KVS_Put(kvsname, key, my_bc), pmi);
- PMI_RC(PMI_KVS_Commit(kvsname), pmi);
+#ifdef USE_PMI2_API
+ PMI_RC(PMI2_KVS_Put(key, my_bc), pmi);
+#else
+ PMI_RC(PMI2_KVS_Put(kvsname, key, my_bc), pmi);
+ PMI_RC(PMI2_KVS_Commit(kvsname), pmi);
+#endif
/* -------------------------------- */
/* Set the MPI maximum tag value */
@@ -238,7 +244,11 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
/* their business card */
/* --------------------------------- */
gl_data.rts_cts_in_flight = 0;
+#ifdef USE_PMI2_API
+ PMI2_KVS_Fence();
+#else
PMI_Barrier();
+#endif
/* --------------------------------- */
/* Retrieve every rank's address */
@@ -250,7 +260,12 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
for (i = 0; i < pg_p->size; ++i) {
sprintf(key, "OFI-%d", i);
+#ifdef USE_PMI2_API
+ PMI_RC(PMI2_KVS_Get(pg_p->id, PMI2_ID_NULL, key, bc, OFI_KVSAPPSTRLEN,
+ &vallen), pmi);
+#else
PMI_RC(PMI_KVS_Get(kvsname, key, bc, OFI_KVSAPPSTRLEN), pmi);
+#endif
ret = MPL_str_get_binary_arg(bc, "OFI",
(char *) &addrs[i * gl_data.bound_addrlen],
gl_data.bound_addrlen, &len);
--
1.7.12.4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment