Skip to content

Instantly share code, notes, and snippets.

@jasonsewall
Last active April 21, 2017 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasonsewall/0d1fb12a93e157795786e560733cbf0c to your computer and use it in GitHub Desktop.
Save jasonsewall/0d1fb12a93e157795786e560733cbf0c to your computer and use it in GitHub Desktop.
manual gatherv + derived types
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cmath>
#include <mpi.h>
#include <algorithm>
#include <cstdarg>
#include <cstdio>
#include <cstring>
#include <cmath>
#include <limits>
#include <vector>
static std::vector<int> get_factors(int num)
{
std::vector<int> res;
while(num > 1)
{
if((num % 2) == 0)
{
num /= 2;
res.push_back(2);
}
else
{
bool divisor = false;
for(int i = 3; i <= std::sqrt(num); i+=2)
if((num % i) == 0)
{
num /= i;
res.push_back(i);
divisor = true;
break;
}
if(!divisor)
{
res.push_back(num);
num = 1;
}
}
}
return res;
}
static double nth_subset_sum(unsigned long long num, const std::vector<double> *set)
{
double res = 0.0;
for(int i = 0; i < set->size(); ++i)
if(num & (1ULL << i))
res += (*set)[i];
return res;
}
static int nth_subset_prod(unsigned long long num, const std::vector<int> *set)
{
int res = 1;
for(int i = 0; i < set->size(); ++i)
if(num & (1ULL << i))
res *= (*set)[i];
return res;
}
static unsigned long long best_subset(const double split, const std::vector<double> *ln_factors)
{
double best = std::numeric_limits<double>::max();
unsigned long long argbest = -1ULL;
for( unsigned long long i = 1; i < (1 << ln_factors->size())-1; ++i)
{
const double prod = std::abs(nth_subset_sum(i, ln_factors) - split);
if(prod < best)
{
best = prod;
argbest = i;
}
}
return argbest;
}
std::vector<int> best_decomp(int num, int groups)
{
const double split = std::log((double)num)/groups;
std::vector<int> res(groups);
int todiv = num;
for(int i = 0; i < groups-1; ++i)
{
std::vector<int> factors = get_factors(todiv);
std::vector<double> ln_factors(factors.size());
for(int j = 0; j < ln_factors.size(); ++j)
ln_factors[j] = std::log(factors[j]);
const unsigned long long set_idx = best_subset(split, &ln_factors);
res[i] = nth_subset_prod(set_idx, &factors);
todiv /= res[i];
}
res[groups-1] = todiv;
std::sort(res.begin(), res.end());
return res;
}
void divvy(int *start, int *end, const int nitems, int chunkno, int nchunks)
{
const int items_per_chunk = nitems/nchunks;
const int remainder = nitems - nchunks*items_per_chunk;
*start = chunkno*items_per_chunk + std::min(chunkno, remainder);
*end = (chunkno+1)*items_per_chunk + std::min(chunkno+1, remainder);
}
struct grid2d
{
double *q;
int ystride;
int varstride;
int N[2];
int MPI_offset[2];
int MPI_decomp[2];
};
void get_rank_decomp(int start[2], int end[2], int position[2], const int decomp[2], int rank, const int n[2])
{
position[0] = rank % decomp[0];
position[1] = rank / decomp[0];
divvy(start+0, end+0, n[0], position[0], decomp[0]);
divvy(start+1, end+1, n[1], position[1], decomp[1]);
}
static const int halow = 2;
void grid2d_init(grid2d *h, int rank, int nranks, const int global_N[2])
{
std::vector<int> decomp(best_decomp(nranks, 2));
h->MPI_decomp[0] = decomp[0];
h->MPI_decomp[1] = decomp[1];
if(rank == 0)
{
printf("Decomp: %d %d\n", h->MPI_decomp[0], h->MPI_decomp[1]);
}
int end[2];
int MPI_position[2];
get_rank_decomp(h->MPI_offset, end, MPI_position, h->MPI_decomp, rank, global_N);
h->N[0] = end[0]-h->MPI_offset[0];
h->N[1] = end[1]-h->MPI_offset[1];
h->ystride = h->N[0] + halow*2;
h->varstride = h->ystride * (h->N[1] + halow*2);
h->q = (double*)malloc(sizeof(double)*h->varstride*4);
assert(h->q);
memset(h->q, 0, sizeof(double)*h->varstride*4);
printf("rank % 4d: (% 3d, % 3d) [% 5d, % 5d]x[% 5d, % 5d]\n", rank, MPI_position[0], MPI_position[1], h->MPI_offset[0], end[0], h->MPI_offset[1], end[1]);
}
void mpi_gather(grid2d *dest, const grid2d *src, uint64_t xfer_mask)
{
int rank, nranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
assert(nranks <= 64);
const uint64_t rank_mask = (1ULL << nranks) -1ULL;
const uint64_t effective_mask = rank_mask & xfer_mask;
printf("R%d GATHER START: xfer_mask %llu, rank_mask: %llu, effective_mask: %llu\n", rank, xfer_mask, rank_mask, effective_mask);
const int nrecvs = rank == 0 ? __builtin_popcountll(effective_mask) : 0;
const int sending = ((1ULL << rank) & effective_mask) != 0ULL;
const int num_types = nrecvs + sending;
const int num_comms = num_types;
int used_types = 0;
int used_comms = 0;
MPI_Datatype grid_types[num_types];
MPI_Request reqs[num_comms];
if(sending)
{
MPI_Datatype tmp[2];
MPI_Type_create_hvector(src->N[0], 1, sizeof(double)*1, MPI_DOUBLE, tmp+0);
MPI_Type_create_hvector(src->N[1], 1, sizeof(double)*src->ystride, tmp[0], tmp+1);
MPI_Type_create_hvector(4, 1, sizeof(double)*src->varstride, tmp[1], grid_types+used_types);
MPI_Type_commit(grid_types+used_types);
MPI_Isend(src->q+halow+halow*src->ystride, 1, grid_types[used_types], 0, rank, MPI_COMM_WORLD, reqs+used_comms);
++used_types;
++used_comms;
}
if(rank == 0)
{
MPI_Status stats[num_comms];
for(int r = 0; r < nranks; ++r)
{
if(((1ULL << r) & effective_mask) == 0ULL)
{
continue;
}
int pos[2], start[2], end[2];
get_rank_decomp(start, end, pos, src->MPI_decomp, r, dest->N);
int n[2];
n[0] = end[0]-start[0];
n[1] = end[1]-start[1];
MPI_Datatype tmp[2];
MPI_Type_create_hvector(n[0], 1, sizeof(double)*1, MPI_DOUBLE, tmp+0);
MPI_Type_create_hvector(n[1], 1, sizeof(double)*dest->ystride, tmp[0], tmp+1);
MPI_Type_create_hvector(4, 1, sizeof(double)*dest->varstride, tmp[1], grid_types+used_types);
MPI_Type_commit(grid_types+used_types);
MPI_Irecv(dest->q + halow + start[0] + dest->ystride*(halow+start[1]), 1, grid_types[used_types], r, r, MPI_COMM_WORLD, reqs+used_comms);
++used_types;
++used_comms;
}
}
if(sending || rank == 0)
{
MPI_Status stats[used_comms];
//MPI_Waitall(used_comms, reqs_stats);
int out_comms = 0;
int out_indices[used_comms];
int req_finished[used_comms];
const char *status_str[2] = {"not ", ""};
memset(req_finished, 0, sizeof(int)*used_comms);
do
{
printf("R%d %d items:\n", rank, used_comms);
for(int i = 0; i < out_comms; ++i)
{
req_finished[out_indices[i]] = 1;
}
for(int i = 0; i < used_comms; ++i)
{
printf("R%d: req: %d %sfinished\n", rank, i, status_str[req_finished[i]]);
}
MPI_Waitsome(used_comms, reqs, &out_comms, out_indices, stats);
}
while(out_comms != MPI_UNDEFINED);
printf("R%d all finished\n", rank);
for(int r = 0; r < used_types; ++r)
{
MPI_Type_free(grid_types + r);
}
}
printf("R%d GATHER END: xfer_mask %llu, rank_mask: %llu, effective_mask: %llu\n", rank, xfer_mask, rank_mask, effective_mask);
}
int check_grid(grid2d *g)
{
for(int v = 0; v < 4; ++v)
{
for(int j = 0; j < g->N[1]+2*halow; ++j)
{
for(int i = 0; i < g->N[0]+2*halow; ++i)
{
double expected;
if(i >= halow && j >= halow && i < g->N[0]+halow && j < g->N[1]+halow)
{
expected = i-halow + (j-halow)*g->N[0] + v*g->N[0]*g->N[1];
}
else
{
expected = 0;
}
const double found = g->q[i+j*g->ystride+v*g->varstride];
if(found != expected)
{
printf("Gather failed !!! %d, %d, %d expected %lg, got %lg\n", i,j,v, expected, found);
return EXIT_FAILURE;
}
}
}
}
printf("Gather okay\n");
return EXIT_SUCCESS;
}
int gather_test(int global_N[2])
{
int rank;
int nranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
MPI_Bcast(global_N, 2, MPI_INT, 0, MPI_COMM_WORLD);
grid2d global_grid;
if(rank == 0)
{
grid2d_init(&global_grid, 0, 1, global_N);
}
grid2d local_grid;
grid2d_init(&local_grid, rank, nranks, global_N);
for(int v = 0; v < 4; ++v)
{
for(int j = 0; j < local_grid.N[1]; ++j)
{
for(int i = 0; i < local_grid.N[0]; ++i)
{
local_grid.q[i+halow + (j+halow)*local_grid.ystride + v*local_grid.varstride] = (i+local_grid.MPI_offset[0]) + (j+local_grid.MPI_offset[1])*global_N[0] + v*global_N[0]*global_N[1];
}
}
}
mpi_gather(&global_grid, &local_grid, -1ULL);
int retval = 0;
if(rank == 0)
{
retval = check_grid(&global_grid);
}
MPI_Bcast(&retval, 1, MPI_INT, 0, MPI_COMM_WORLD);
free(local_grid.q);
if(rank == 0)
{
free(global_grid.q);
}
return retval;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int global_N[2];
if(rank == 0)
{
assert(argc == 3);
global_N[0] = atoi(argv[1]);
global_N[1] = atoi(argv[2]);
}
int ret = gather_test(global_N);
MPI_Finalize();
return ret;
}
$ sh test-gather.sh
mpi-gather.cpp(245): warning #3656: variable "out_indices" may be used before its value is set
req_finished[out_indices[i]] = 1;
^
remark #11074: Inlining inhibited by limit max-size
remark #11074: Inlining inhibited by limit max-total-size
remark #11076: To get full report use -qopt-report=4 -qopt-report-phase ipo
openmpi --version:
mpirun (Open MPI) 2.1.0
Report bugs to http://www.open-mpi.org/community/help/
mpich --version:
HYDRA build details:
Version: 3.2
Release Date: Wed Nov 11 22:06:48 CST 2015
CC: icc
CXX: icpc
F77: ifort
F90: ifort
Configure options: '--disable-option-checking' '--prefix=/home/jsewall/local/mpich' '--cache-file=/dev/null' '--srcdir=.' 'CC=icc' 'CFLAGS= -O2' 'LDFLAGS=' 'LIBS=-lpthread ' 'CPPFLAGS= -
I/home/jsewall/src/mpich-3.2/src/mpl/include -I/home/jsewall/src/mpich-3.2/src/mpl/include -I/home/jsewall/src/mpich-3.2/src/openpa/src -I/home/jsewall/src/mpich-3.2/src/openpa/src -D_REENTRANT -I/home/jsewall/sr
c/mpich-3.2/src/mpi/romio/include'
Process Manager: pmi
Launchers available: ssh rsh fork slurm ll lsf sge manual persist
Topology libraries available: hwloc
Resource management kernels available: user slurm ll lsf sge pbs cobalt
Checkpointing libraries available:
Demux engines available: poll select
impi --version:
Intel(R) MPI Library for Linux* OS, Version 2017 Update 2 Build 20170125 (id: 16752)
Copyright (C) 2003-2017, Intel Corporation. All rights reserved.
openmpi -np 1 ./mpi-gather 64 64 => pass
mpich -np 1 ./mpi-gather 64 64 => pass
impi -np 1 ./mpi-gather 64 64 => pass
openmpi -np 2 ./mpi-gather 64 64 => pass
mpich -np 2 ./mpi-gather 64 64 => pass
impi -np 2 ./mpi-gather 64 64 => pass
openmpi -np 3 ./mpi-gather 64 64 => pass
mpich -np 3 ./mpi-gather 64 64 => pass
impi -np 3 ./mpi-gather 64 64 => pass
openmpi -np 1 ./mpi-gather 64 128 => pass
mpich -np 1 ./mpi-gather 64 128 => pass
impi -np 1 ./mpi-gather 64 128 => pass
openmpi -np 2 ./mpi-gather 64 128 => pass
mpich -np 2 ./mpi-gather 64 128 => pass
impi -np 2 ./mpi-gather 64 128 => pass
openmpi -np 3 ./mpi-gather 64 128 => pass
mpich -np 3 ./mpi-gather 64 128 => pass
impi -np 3 ./mpi-gather 64 128 => pass
openmpi -np 1 ./mpi-gather 64 256 => pass
mpich -np 1 ./mpi-gather 64 256 => pass
impi -np 1 ./mpi-gather 64 256 => pass
openmpi -np 2 ./mpi-gather 64 256 => pass
mpich -np 2 ./mpi-gather 64 256 => pass
impi -np 2 ./mpi-gather 64 256 => pass
openmpi -np 3 ./mpi-gather 64 256 => pass
mpich -np 3 ./mpi-gather 64 256 => pass
impi -np 3 ./mpi-gather 64 256 => pass
openmpi -np 1 ./mpi-gather 128 64 => pass
mpich -np 1 ./mpi-gather 128 64 => pass
impi -np 1 ./mpi-gather 128 64 => pass
openmpi -np 2 ./mpi-gather 128 64 => pass
mpich -np 2 ./mpi-gather 128 64 => pass
impi -np 2 ./mpi-gather 128 64 => ^[OApass
openmpi -np 3 ./mpi-gather 128 64 => pass
mpich -np 3 ./mpi-gather 128 64 => pass
impi -np 3 ./mpi-gather 128 64 => pass
openmpi -np 1 ./mpi-gather 128 128 => pass
mpich -np 1 ./mpi-gather 128 128 => pass
impi -np 1 ./mpi-gather 128 128 => pass
openmpi -np 2 ./mpi-gather 128 128 => pass
mpich -np 2 ./mpi-gather 128 128 => pass
impi -np 2 ./mpi-gather 128 128 => pass
openmpi -np 3 ./mpi-gather 128 128 => pass
mpich -np 3 ./mpi-gather 128 128 => pass
impi -np 3 ./mpi-gather 128 128 => pass
openmpi -np 1 ./mpi-gather 128 256 => pass
mpich -np 1 ./mpi-gather 128 256 => pass
impi -np 1 ./mpi-gather 128 256 => pass
openmpi -np 2 ./mpi-gather 128 256 => pass
mpich -np 2 ./mpi-gather 128 256 => pass
impi -np 2 ./mpi-gather 128 256 => pass
openmpi -np 3 ./mpi-gather 128 256 => pass
mpich -np 3 ./mpi-gather 128 256 => pass
impi -np 3 ./mpi-gather 128 256 => pass
openmpi -np 1 ./mpi-gather 256 64 => pass
mpich -np 1 ./mpi-gather 256 64 => pass
impi -np 1 ./mpi-gather 256 64 => pass
openmpi -np 2 ./mpi-gather 256 64 => pass
mpich -np 2 ./mpi-gather 256 64 => pass
impi -np 2 ./mpi-gather 256 64 => pass
openmpi -np 3 ./mpi-gather 256 64 => pass
mpich -np 3 ./mpi-gather 256 64 => pass
impi -np 3 ./mpi-gather 256 64 => pass
openmpi -np 1 ./mpi-gather 256 128 => pass
mpich -np 1 ./mpi-gather 256 128 => pass
impi -np 1 ./mpi-gather 256 128 => pass
openmpi -np 2 ./mpi-gather 256 128 => pass
mpich -np 2 ./mpi-gather 256 128 => pass
impi -np 2 ./mpi-gather 256 128 => pass
openmpi -np 3 ./mpi-gather 256 128 => pass
mpich -np 3 ./mpi-gather 256 128 => pass
impi -np 3 ./mpi-gather 256 128 => pass
openmpi -np 1 ./mpi-gather 256 256 => pass
mpich -np 1 ./mpi-gather 256 256 => pass
impi -np 1 ./mpi-gather 256 256 => pass
openmpi -np 2 ./mpi-gather 256 256 => pass
mpich -np 2 ./mpi-gather 256 256 => pass
impi -np 2 ./mpi-gather 256 256 => pass
openmpi -np 3 ./mpi-gather 256 256 => pass
Expected to lock up. Issue user interrupt.
mpich -np 3 ./mpi-gather 256 256 => ^Cbroken
Expected to lock up. Issue user interrupt.
impi -np 3 ./mpi-gather 256 256 => ^Cbroken
#!/bin/bash
mpiicpc mpi-gather.cpp -o mpi-gather -Wall -Wextra
declare -A mpilibs
OPENMPI=/home/jsewall/local/openmpi/bin/mpirun
MPICH=/home/jsewall/local/mpich/bin/mpirun
IMPI=/opt/intel/compilers_and_libraries_2017.2.174/linux/mpi/intel64/bin/mpirun
mpilibs=( ["openmpi"]="$OPENMPI" ["mpich"]="$MPICH" ["impi"]="$IMPI" )
SIZES="64 128 256"
PROCS="1 2 3"
MPIEXECS="openmpi mpich impi"
for e in $MPIEXECS
do
echo "$e --version:"
${mpilibs[$e]} --version
done
trap inthandle INT
inthandle()
{
INTERRUPTED=1
}
for sx in $SIZES
do
for sy in $SIZES
do
for p in $PROCS
do
for e in $MPIEXECS
do
INTERRUPTED=0
if [[ ( ${sx} -ge 256 ) && ( ${sy} -ge 256 ) && ( $p -ge 3 ) && ( $e != "openmpi" )]]
then
echo "Expected to lock up. Issue user interrupt."
fi
echo -n "$e -np $p ./mpi-gather $sx $sy => "
${mpilibs[$e]} -np $p ./mpi-gather $sx $sy > /dev/null
EXITSTATUS=$?
if [ ${INTERRUPTED} -ne 0 ]
then
echo "broken"
else
if [ ${EXITSTATUS} == 0 ]
then
echo "pass"
else
echo "fail"
fi
fi
done
echo ""
done
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment