Skip to content

@robintw /MPI_Transpose.c
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
#include<stdio.h>
#include<stdlib.h>
#include<array_alloc.h>
#include<math.h>
#include<mpi.h>
struct global_index
{
int x;
int y;
};
struct local_index
{
int procX;
int procY;
int arrayX;
int arrayY;
};
struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
{
struct global_index result;
result.x = arrayX + (procX * cols);
result.y = arrayY + (procY * rows);
return result;
}
struct local_index global_to_local(int x, int y, int cols, int rows)
{
struct local_index result;
result.arrayX = x % cols;
result.procX = (int) x / (int) cols;
result.arrayY = y % rows;
result.procY = (int) y / (int) rows;
return result;
}
/* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
/* (Reformatted to fit with my coding style) */
void factorise(int n, int *x, int*y)
{
/* Factorise n into two numbers which are as close to equal as possible */
*x = sqrt( n ) + 0.5;
*y = n / *x;
/* Loop will definitely terminate when x == 1 */
while ( *x * *y != n )
{
(*x)--;
*y = n / *x;
}
}
int main(int argc, char ** argv)
{
/* ----------- VARIABLE DECLARATIONS ----------- */
/* Size of matrix - the matrix will be n x n */
int n = 10;
/* Number of processes and rank of this process*/
int nprocs, rank;
/* Size and periodicity of cartesian grid */
int dim_size[2];
int periods[2];
/* Number of processes in x and y directions */
int npx, npy;
/* Offset for this process in x and y directions */
int x_offset, y_offset;
/* The standard (normal) number of rows and cols
per core - not valid for the last in a row or col */
int std_rows_in_core, std_cols_in_core;
/* Coordinates in cartesian grid, and num of rows and cols
this process has */
int coords[2];
int rows_in_core;
int cols_in_core;
/* Array to store the chunk of the matrix that we have */
double **array;
double **new_array;
/* Loop variables */
int i, j;
/* Cartesian communicator */
MPI_Comm cart_comm;
int coords2[2];
int rank2;
int global_x1, global_x2;
int global_y1, global_y2;
struct global_index gi;
struct local_index li;
MPI_Status status;
MPI_Request send_request;
MPI_Request recv_request;
MPI_Request all_requests[1000];
int request_counter;
int tag;
/* ----------- MPI Setup ----------- */
/* Initialise MPI */
MPI_Init(&argc, &argv);
/* Get the rank for this process, and the number of processes */
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Work out how big the grid should be, given the number of processes */
factorise( nprocs, &npx, &npy );
/* Create the cartesian communicator */
dim_size[0] = npy;
dim_size[1] = npx;
periods[0] = 1;
periods[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);
/* Get our co-ordinates within that communicator */
MPI_Cart_coords(cart_comm, rank, 2, coords);
rows_in_core = ceil(n / (float) npx);
cols_in_core = ceil(n / (float) npy);
std_rows_in_core = rows_in_core;
std_cols_in_core = cols_in_core;
if (coords[0] == (npy - 1))
{
/* We're at the far end of a row */
cols_in_core = n - (cols_in_core * (npy - 1));
}
if (coords[1] == (npx - 1))
{
/* We're at the bottom of a col */
rows_in_core = n - (rows_in_core * (npx - 1));
}
printf("X: %d, Y: %d, RpC: %d, CpC: %d\n", coords[0], coords[1], rows_in_core, cols_in_core);
/* ----------- INITIALISE MATRIX CHUNKS ----------- */
/* Allocate an array to store this chunk of the matrix.
If the allocation fails, print an error */
array = alloc_2d_double(rows_in_core, cols_in_core);
new_array = alloc_2d_double(rows_in_core, cols_in_core);
if (array == NULL)
{
printf("Problem with array allocation.\nExiting\n");
return 1;
}
for (i = 0; i < rows_in_core; i++)
{
for (j = 0; j < cols_in_core; j++)
{
new_array[i][j] = 9999.99;
}
}
/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
top left-hand corner of the whole matrix */
x_offset = coords[0] * std_cols_in_core;
y_offset = coords[1] * std_rows_in_core;
for (j = 0; j < rows_in_core; j++)
{
/* printf("Process (%d, %d): ", coords[0], coords[1]); */
for (i = 0; i < cols_in_core; i++)
{
/* array[j][i] = (float) (j * cols_in_core) + (i + 1); */
/* array[j][i] = (float) ((j * cols_in_core) + x_offset) + (y_offset * rows_in_core) + i; */
array[i][j] = (float) ( (j + y_offset) * n) + ( (i + x_offset) + 1);
/*(printf("%f ", array[j][i]);*/
}
/* printf("\n"); */
}
MPI_Barrier(MPI_COMM_WORLD);
/* ----------- DO TRANSPOSE ----------- */
request_counter = 0;
for (i = 0; i < rows_in_core; i++)
{
for (j = 0; j < cols_in_core; j++)
{
/* For each item in this chunk of the array:
Send it to the opposite index
Receive from the opposite index */
/* Calculate the opposite index */
gi = local_to_global(coords[0], coords[1], j, i, std_cols_in_core, std_rows_in_core);
/* The two global indices for swapping */
global_x1 = gi.x;
global_y1 = gi.y;
global_x2 = global_y1;
global_y2 = global_x1;
/* We know which rank one of them is (as we're running as it!)
but we need to calculate the rank of the other which involves:
1. Getting the local index of the swapped global index
2. Converting the X, Y process co-ordinates to a rank */
li = global_to_local(global_x2, global_y2, std_cols_in_core, std_rows_in_core);
coords2[0] = li.procX;
coords2[1] = li.procY;
MPI_Cart_rank(cart_comm, coords2, &rank2);
tag = (global_x2 + 1) * (global_y2 + 1);
printf("Swapping\n");
printf("\tFrom global (%d, %d) - Processor (%d, %d) with local (%d, %d)\n", global_x1, global_y1, coords[0], coords[1], j, i);
printf("\tTo global (%d, %d)- Processor (%d, %d) with local (%d, %d)\n", global_x2, global_y2, coords2[0], coords[1], li.arrayX, li.arrayY);
/* printf("Send: from rank %d (proc (%d, %d)), local (%d, %d), to rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
MPI_Isend(&array[i][j], 1, MPI_DOUBLE, rank2, (global_y2 * n) + global_x2, MPI_COMM_WORLD, &send_request);
/*printf("Receive: on rank %d (proc (%d, %d)), local (%d, %d), from rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
MPI_Irecv(&new_array[i][j], 1, MPI_DOUBLE, rank2, (global_x2 * n) + global_y2, MPI_COMM_WORLD, &recv_request);
all_requests[request_counter] = send_request;
request_counter++;
all_requests[request_counter] = recv_request;
request_counter++;
}
}
MPI_Barrier(cart_comm);
printf("Waiting...\n");
MPI_Waitall(request_counter, all_requests, MPI_STATUSES_IGNORE);
MPI_Barrier(MPI_COMM_WORLD);
/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */
for (j = 0; j < rows_in_core; j++)
{
printf("Test Process (%d, %d): ", coords[0], coords[1]);
for (i = 0; i < cols_in_core; i++)
{
printf("%f ", new_array[i][j]);
}
printf("\n");
}
/* Close down the MPI environment */
MPI_Finalize();
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.