public
Last active

  • Download Gist
MPI_Transpose.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
#include<stdio.h>
#include<stdlib.h>
#include<array_alloc.h>
#include<math.h>
#include<mpi.h>
 
 
struct global_index
{
int x;
int y;
};
 
struct local_index
{
int procX;
int procY;
int arrayX;
int arrayY;
};
 
struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
{
struct global_index result;
result.x = arrayX + (procX * cols);
result.y = arrayY + (procY * rows);
return result;
}
 
struct local_index global_to_local(int x, int y, int cols, int rows)
{
struct local_index result;
result.arrayX = x % cols;
result.procX = (int) x / (int) cols;
result.arrayY = y % rows;
result.procY = (int) y / (int) rows;
return result;
}
 
/* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
/* (Reformatted to fit with my coding style) */
void factorise(int n, int *x, int*y)
{
/* Factorise n into two numbers which are as close to equal as possible */
*x = sqrt( n ) + 0.5;
*y = n / *x;
 
/* Loop will definitely terminate when x == 1 */
while ( *x * *y != n )
{
(*x)--;
*y = n / *x;
}
}
 
int main(int argc, char ** argv)
{
/* ----------- VARIABLE DECLARATIONS ----------- */
 
/* Size of matrix - the matrix will be n x n */
int n = 10;
 
/* Number of processes and rank of this process*/
int nprocs, rank;
/* Size and periodicity of cartesian grid */
int dim_size[2];
int periods[2];
/* Number of processes in x and y directions */
int npx, npy;
/* Offset for this process in x and y directions */
int x_offset, y_offset;
/* The standard (normal) number of rows and cols
per core - not valid for the last in a row or col */
int std_rows_in_core, std_cols_in_core;
/* Coordinates in cartesian grid, and num of rows and cols
this process has */
int coords[2];
int rows_in_core;
int cols_in_core;
/* Array to store the chunk of the matrix that we have */
double **array;
double **new_array;
/* Loop variables */
int i, j;
/* Cartesian communicator */
MPI_Comm cart_comm;
int coords2[2];
int rank2;
 
int global_x1, global_x2;
int global_y1, global_y2;
 
struct global_index gi;
struct local_index li;
MPI_Status status;
 
MPI_Request send_request;
MPI_Request recv_request;
MPI_Request all_requests[1000];
int request_counter;
int tag;
/* ----------- MPI Setup ----------- */
 
/* Initialise MPI */
MPI_Init(&argc, &argv);
 
/* Get the rank for this process, and the number of processes */
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Work out how big the grid should be, given the number of processes */
factorise( nprocs, &npx, &npy );
/* Create the cartesian communicator */
dim_size[0] = npy;
dim_size[1] = npx;
periods[0] = 1;
periods[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);
/* Get our co-ordinates within that communicator */
MPI_Cart_coords(cart_comm, rank, 2, coords);
rows_in_core = ceil(n / (float) npx);
cols_in_core = ceil(n / (float) npy);
std_rows_in_core = rows_in_core;
std_cols_in_core = cols_in_core;
if (coords[0] == (npy - 1))
{
/* We're at the far end of a row */
cols_in_core = n - (cols_in_core * (npy - 1));
}
if (coords[1] == (npx - 1))
{
/* We're at the bottom of a col */
rows_in_core = n - (rows_in_core * (npx - 1));
}
printf("X: %d, Y: %d, RpC: %d, CpC: %d\n", coords[0], coords[1], rows_in_core, cols_in_core);
/* ----------- INITIALISE MATRIX CHUNKS ----------- */
/* Allocate an array to store this chunk of the matrix.
If the allocation fails, print an error */
array = alloc_2d_double(rows_in_core, cols_in_core);
new_array = alloc_2d_double(rows_in_core, cols_in_core);
if (array == NULL)
{
printf("Problem with array allocation.\nExiting\n");
return 1;
}
for (i = 0; i < rows_in_core; i++)
{
for (j = 0; j < cols_in_core; j++)
{
new_array[i][j] = 9999.99;
}
}
/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
top left-hand corner of the whole matrix */
x_offset = coords[0] * std_cols_in_core;
y_offset = coords[1] * std_rows_in_core;
for (j = 0; j < rows_in_core; j++)
{
/* printf("Process (%d, %d): ", coords[0], coords[1]); */
for (i = 0; i < cols_in_core; i++)
{
/* array[j][i] = (float) (j * cols_in_core) + (i + 1); */
/* array[j][i] = (float) ((j * cols_in_core) + x_offset) + (y_offset * rows_in_core) + i; */
array[i][j] = (float) ( (j + y_offset) * n) + ( (i + x_offset) + 1);
/*(printf("%f ", array[j][i]);*/
}
/* printf("\n"); */
}
MPI_Barrier(MPI_COMM_WORLD);
/* ----------- DO TRANSPOSE ----------- */
request_counter = 0;
for (i = 0; i < rows_in_core; i++)
{
for (j = 0; j < cols_in_core; j++)
{
/* For each item in this chunk of the array:
Send it to the opposite index
Receive from the opposite index */
/* Calculate the opposite index */
gi = local_to_global(coords[0], coords[1], j, i, std_cols_in_core, std_rows_in_core);
/* The two global indices for swapping */
global_x1 = gi.x;
global_y1 = gi.y;
global_x2 = global_y1;
global_y2 = global_x1;
 
/* We know which rank one of them is (as we're running as it!)
but we need to calculate the rank of the other which involves:
1. Getting the local index of the swapped global index
2. Converting the X, Y process co-ordinates to a rank */
li = global_to_local(global_x2, global_y2, std_cols_in_core, std_rows_in_core);
coords2[0] = li.procX;
coords2[1] = li.procY;
MPI_Cart_rank(cart_comm, coords2, &rank2);
tag = (global_x2 + 1) * (global_y2 + 1);
printf("Swapping\n");
printf("\tFrom global (%d, %d) - Processor (%d, %d) with local (%d, %d)\n", global_x1, global_y1, coords[0], coords[1], j, i);
printf("\tTo global (%d, %d)- Processor (%d, %d) with local (%d, %d)\n", global_x2, global_y2, coords2[0], coords[1], li.arrayX, li.arrayY);
/* printf("Send: from rank %d (proc (%d, %d)), local (%d, %d), to rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
MPI_Isend(&array[i][j], 1, MPI_DOUBLE, rank2, (global_y2 * n) + global_x2, MPI_COMM_WORLD, &send_request);
/*printf("Receive: on rank %d (proc (%d, %d)), local (%d, %d), from rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
MPI_Irecv(&new_array[i][j], 1, MPI_DOUBLE, rank2, (global_x2 * n) + global_y2, MPI_COMM_WORLD, &recv_request);
all_requests[request_counter] = send_request;
request_counter++;
all_requests[request_counter] = recv_request;
request_counter++;
}
}
MPI_Barrier(cart_comm);
printf("Waiting...\n");
MPI_Waitall(request_counter, all_requests, MPI_STATUSES_IGNORE);
MPI_Barrier(MPI_COMM_WORLD);
/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */
for (j = 0; j < rows_in_core; j++)
{
printf("Test Process (%d, %d): ", coords[0], coords[1]);
for (i = 0; i < cols_in_core; i++)
{
printf("%f ", new_array[i][j]);
}
printf("\n");
}
/* Close down the MPI environment */
MPI_Finalize();
return EXIT_SUCCESS;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.