robintw/MPI_Transpose.c

## MPI_Transpose.c
#include<stdio.h>
#include<stdlib.h>
#include<array_alloc.h>
#include<math.h>
#include<mpi.h>


struct global_index
{
  int x;
  int y;
};

struct local_index
{
	int procX;
	int procY;
	int arrayX;
	int arrayY;
};

struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
{
	struct global_index result;

	result.x = arrayX + (procX * cols);
	result.y = arrayY + (procY * rows);

	return result;
}

struct local_index global_to_local(int x, int y, int cols, int rows)
{
	struct local_index result;

	result.arrayX = x % cols;
	result.procX = (int) x / (int) cols;

	result.arrayY = y % rows;
	result.procY = (int) y / (int) rows;

	return result;
}

/* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
/* (Reformatted to fit with my coding style) */
void factorise(int n, int *x, int*y)
{
    /* Factorise n into two numbers which are as close to equal as possible */
    *x = sqrt( n ) + 0.5;
    *y = n / *x;

    /* Loop will definitely terminate when x == 1 */
    while ( *x * *y != n )
    {
		(*x)--;
		*y = n / *x;
    }
}

int main(int argc, char ** argv)
{
	/* ----------- VARIABLE DECLARATIONS ----------- */

	/* Size of matrix - the matrix will be n x n */
	int n = 10;

	/* Number of processes and rank of this process*/
	int nprocs, rank;

	/* Size and periodicity of cartesian grid */
	int dim_size[2];
	int periods[2];

	/* Number of processes in x and y directions */
	int npx, npy;

	/* Offset for this process in x and y directions */
	int x_offset, y_offset;

	/* The standard (normal) number of rows and cols
	per core - not valid for the last in a row or col */
	int std_rows_in_core, std_cols_in_core;

	/* Coordinates in cartesian grid, and num of rows and cols
	this process has */
	int coords[2];
	int rows_in_core;
	int cols_in_core;

	/* Array to store the chunk of the matrix that we have */
	double **array;
	double **new_array;


	/* Loop variables */
	int i, j;

	/* Cartesian communicator */
	MPI_Comm cart_comm;

	int coords2[2];
	int rank2;

	int global_x1, global_x2;
	int global_y1, global_y2;

	struct global_index gi;
	struct local_index li;

	MPI_Status status;

	MPI_Request send_request;
	MPI_Request recv_request;

	MPI_Request all_requests[1000];

	int request_counter;

	int tag;

	/* ----------- MPI Setup ----------- */

	/* Initialise MPI */
	MPI_Init(&argc, &argv);

	/* Get the rank for this process, and the number of processes */
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	/* Work out how big the grid should be, given the number of processes */
	factorise( nprocs, &npx, &npy );

	/* Create the cartesian communicator */
	dim_size[0] = npy;
	dim_size[1] = npx;
	periods[0] = 1;
	periods[1] = 1;

	MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);

	/* Get our co-ordinates within that communicator */
	MPI_Cart_coords(cart_comm, rank, 2, coords);

	rows_in_core = ceil(n / (float) npx);
	cols_in_core = ceil(n / (float) npy);

	std_rows_in_core = rows_in_core;
	std_cols_in_core = cols_in_core;

	if (coords[0] == (npy - 1))
	{
		/* We're at the far end of a row */
		cols_in_core = n - (cols_in_core * (npy - 1));
	}
	if (coords[1] == (npx - 1))
	{
		/* We're at the bottom of a col */
		rows_in_core = n - (rows_in_core * (npx - 1));
	}

	printf("X: %d, Y: %d, RpC: %d, CpC: %d\n", coords[0], coords[1], rows_in_core, cols_in_core);

	/* ----------- INITIALISE MATRIX CHUNKS ----------- */

	/* Allocate an array to store this chunk of the matrix.
	If the allocation fails, print an error */
	array = alloc_2d_double(rows_in_core, cols_in_core);
	new_array = alloc_2d_double(rows_in_core, cols_in_core);
	if (array == NULL)
	{
		printf("Problem with array allocation.\nExiting\n");
		return 1;
	}

	for (i = 0; i < rows_in_core; i++)
	{
		for (j = 0; j < cols_in_core; j++)
		{
			new_array[i][j] = 9999.99;
		}
	}

	/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
	top left-hand corner of the whole matrix */
	x_offset = coords[0] * std_cols_in_core;
	y_offset = coords[1] * std_rows_in_core;

	for (j = 0; j < rows_in_core; j++)
	{
		/* printf("Process (%d, %d): ", coords[0], coords[1]); */
		for (i = 0; i < cols_in_core; i++)
		{
			/* array[j][i] = (float) (j * cols_in_core) + (i + 1); */

			/* array[j][i] = (float) ((j * cols_in_core) + x_offset) + (y_offset * rows_in_core) + i; */

			array[i][j] = (float) ( (j + y_offset) * n) + ( (i + x_offset) + 1);

			/*(printf("%f ", array[j][i]);*/
		}
		/* printf("\n"); */
	}

	MPI_Barrier(MPI_COMM_WORLD);

	/* ----------- DO TRANSPOSE ----------- */
	request_counter = 0;

	for (i = 0; i < rows_in_core; i++)
	{
		for (j = 0; j < cols_in_core; j++)
		{
			/* For each item in this chunk of the array:

			Send it to the opposite index
			Receive from the opposite index */

			/* Calculate the opposite index */
			gi = local_to_global(coords[0], coords[1], j, i, std_cols_in_core, std_rows_in_core);

			/* The two global indices for swapping */
			global_x1 = gi.x;
			global_y1 = gi.y;

			global_x2 = global_y1;
			global_y2 = global_x1;


			/* We know which rank one of them is (as we're running as it!)
			but we need to calculate the rank of the other which involves:
			1. Getting the local index of the swapped global index
			2. Converting the X, Y process co-ordinates to a rank */
			li = global_to_local(global_x2, global_y2, std_cols_in_core, std_rows_in_core);

			coords2[0] = li.procX;
			coords2[1] = li.procY;
			MPI_Cart_rank(cart_comm, coords2, &rank2);

			tag = (global_x2 + 1) * (global_y2 + 1);

			printf("Swapping\n");
			printf("\tFrom global (%d, %d) - Processor (%d, %d) with local (%d, %d)\n", global_x1, global_y1, coords[0], coords[1], j, i);
			printf("\tTo global (%d, %d)- Processor (%d, %d) with local (%d, %d)\n", global_x2, global_y2, coords2[0], coords[1], li.arrayX, li.arrayY);


			/* printf("Send: from rank %d (proc (%d, %d)), local (%d, %d), to rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
			MPI_Isend(&array[i][j], 1, MPI_DOUBLE, rank2, (global_y2 * n) + global_x2, MPI_COMM_WORLD, &send_request);
			/*printf("Receive: on rank %d (proc (%d, %d)), local (%d, %d), from rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
			MPI_Irecv(&new_array[i][j], 1, MPI_DOUBLE, rank2, (global_x2 * n) + global_y2, MPI_COMM_WORLD, &recv_request);
			all_requests[request_counter] = send_request;
			request_counter++;
			all_requests[request_counter] = recv_request;
			request_counter++;
		}
	}
	MPI_Barrier(cart_comm);
	printf("Waiting...\n");
	MPI_Waitall(request_counter, all_requests, MPI_STATUSES_IGNORE);
	MPI_Barrier(MPI_COMM_WORLD);

	/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */

	for (j = 0; j < rows_in_core; j++)
	{
		printf("Test Process (%d, %d): ", coords[0], coords[1]);
		for (i = 0; i < cols_in_core; i++)
		{
			printf("%f ", new_array[i][j]);
		}
		printf("\n");
	}


	/* Close down the MPI environment */
	MPI_Finalize();

	return EXIT_SUCCESS;
}
	#include<stdio.h>
	#include<stdlib.h>
	#include<array_alloc.h>
	#include<math.h>
	#include<mpi.h>


	struct global_index
	{
	int x;
	int y;
	};

	struct local_index
	{
	int procX;
	int procY;
	int arrayX;
	int arrayY;
	};

	struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
	{
	struct global_index result;

	result.x = arrayX + (procX * cols);
	result.y = arrayY + (procY * rows);

	return result;
	}

	struct local_index global_to_local(int x, int y, int cols, int rows)
	{
	struct local_index result;

	result.arrayX = x % cols;
	result.procX = (int) x / (int) cols;

	result.arrayY = y % rows;
	result.procY = (int) y / (int) rows;

	return result;
	}

	/* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
	/* (Reformatted to fit with my coding style) */
	void factorise(int n, int x, inty)
	{
	/* Factorise n into two numbers which are as close to equal as possible */
	*x = sqrt( n ) + 0.5;
	y = n / x;

	/* Loop will definitely terminate when x == 1 */
	while ( x *y != n )
	{
	(*x)--;
	y = n / x;
	}
	}

	int main(int argc, char ** argv)
	{
	/* ----------- VARIABLE DECLARATIONS ----------- */

	/* Size of matrix - the matrix will be n x n */
	int n = 10;

	/* Number of processes and rank of this process*/
	int nprocs, rank;

	/* Size and periodicity of cartesian grid */
	int dim_size[2];
	int periods[2];

	/* Number of processes in x and y directions */
	int npx, npy;

	/* Offset for this process in x and y directions */
	int x_offset, y_offset;

	/* The standard (normal) number of rows and cols
	per core - not valid for the last in a row or col */
	int std_rows_in_core, std_cols_in_core;

	/* Coordinates in cartesian grid, and num of rows and cols
	this process has */
	int coords[2];
	int rows_in_core;
	int cols_in_core;

	/* Array to store the chunk of the matrix that we have */
	double **array;
	double **new_array;


	/* Loop variables */
	int i, j;

	/* Cartesian communicator */
	MPI_Comm cart_comm;

	int coords2[2];
	int rank2;

	int global_x1, global_x2;
	int global_y1, global_y2;

	struct global_index gi;
	struct local_index li;

	MPI_Status status;

	MPI_Request send_request;
	MPI_Request recv_request;

	MPI_Request all_requests[1000];

	int request_counter;

	int tag;

	/* ----------- MPI Setup ----------- */

	/* Initialise MPI */
	MPI_Init(&argc, &argv);

	/* Get the rank for this process, and the number of processes */
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	/* Work out how big the grid should be, given the number of processes */
	factorise( nprocs, &npx, &npy );

	/* Create the cartesian communicator */
	dim_size[0] = npy;
	dim_size[1] = npx;
	periods[0] = 1;
	periods[1] = 1;

	MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);

	/* Get our co-ordinates within that communicator */
	MPI_Cart_coords(cart_comm, rank, 2, coords);

	rows_in_core = ceil(n / (float) npx);
	cols_in_core = ceil(n / (float) npy);

	std_rows_in_core = rows_in_core;
	std_cols_in_core = cols_in_core;

	if (coords[0] == (npy - 1))
	{
	/* We're at the far end of a row */
	cols_in_core = n - (cols_in_core * (npy - 1));
	}
	if (coords[1] == (npx - 1))
	{
	/* We're at the bottom of a col */
	rows_in_core = n - (rows_in_core * (npx - 1));
	}

	printf("X: %d, Y: %d, RpC: %d, CpC: %d\n", coords[0], coords[1], rows_in_core, cols_in_core);

	/* ----------- INITIALISE MATRIX CHUNKS ----------- */

	/* Allocate an array to store this chunk of the matrix.
	If the allocation fails, print an error */
	array = alloc_2d_double(rows_in_core, cols_in_core);
	new_array = alloc_2d_double(rows_in_core, cols_in_core);
	if (array == NULL)
	{
	printf("Problem with array allocation.\nExiting\n");
	return 1;
	}

	for (i = 0; i < rows_in_core; i++)
	{
	for (j = 0; j < cols_in_core; j++)
	{
	new_array[i][j] = 9999.99;
	}
	}

	/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
	top left-hand corner of the whole matrix */
	x_offset = coords[0] * std_cols_in_core;
	y_offset = coords[1] * std_rows_in_core;

	for (j = 0; j < rows_in_core; j++)
	{
	/* printf("Process (%d, %d): ", coords[0], coords[1]); */
	for (i = 0; i < cols_in_core; i++)
	{
	/* array[j][i] = (float) (j * cols_in_core) + (i + 1); */

	/* array[j][i] = (float) ((j * cols_in_core) + x_offset) + (y_offset * rows_in_core) + i; */

	array[i][j] = (float) ( (j + y_offset) * n) + ( (i + x_offset) + 1);

	/(printf("%f ", array[j][i]);/
	}
	/* printf("\n"); */
	}

	MPI_Barrier(MPI_COMM_WORLD);

	/* ----------- DO TRANSPOSE ----------- */
	request_counter = 0;

	for (i = 0; i < rows_in_core; i++)
	{
	for (j = 0; j < cols_in_core; j++)
	{
	/* For each item in this chunk of the array:

	Send it to the opposite index
	Receive from the opposite index */

	/* Calculate the opposite index */
	gi = local_to_global(coords[0], coords[1], j, i, std_cols_in_core, std_rows_in_core);

	/* The two global indices for swapping */
	global_x1 = gi.x;
	global_y1 = gi.y;

	global_x2 = global_y1;
	global_y2 = global_x1;


	/* We know which rank one of them is (as we're running as it!)
	but we need to calculate the rank of the other which involves:
	1. Getting the local index of the swapped global index
	2. Converting the X, Y process co-ordinates to a rank */
	li = global_to_local(global_x2, global_y2, std_cols_in_core, std_rows_in_core);

	coords2[0] = li.procX;
	coords2[1] = li.procY;
	MPI_Cart_rank(cart_comm, coords2, &rank2);

	tag = (global_x2 + 1) * (global_y2 + 1);

	printf("Swapping\n");
	printf("\tFrom global (%d, %d) - Processor (%d, %d) with local (%d, %d)\n", global_x1, global_y1, coords[0], coords[1], j, i);
	printf("\tTo global (%d, %d)- Processor (%d, %d) with local (%d, %d)\n", global_x2, global_y2, coords2[0], coords[1], li.arrayX, li.arrayY);


	/* printf("Send: from rank %d (proc (%d, %d)), local (%d, %d), to rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); */
	MPI_Isend(&array[i][j], 1, MPI_DOUBLE, rank2, (global_y2 * n) + global_x2, MPI_COMM_WORLD, &send_request);
	/printf("Receive: on rank %d (proc (%d, %d)), local (%d, %d), from rank %d (proc (%d, %d)), tag %d\n", rank, coords[0], coords[1], j, i, rank2, coords2[0], coords2[1], tag); /
	MPI_Irecv(&new_array[i][j], 1, MPI_DOUBLE, rank2, (global_x2 * n) + global_y2, MPI_COMM_WORLD, &recv_request);
	all_requests[request_counter] = send_request;
	request_counter++;
	all_requests[request_counter] = recv_request;
	request_counter++;
	}
	}
	MPI_Barrier(cart_comm);
	printf("Waiting...\n");
	MPI_Waitall(request_counter, all_requests, MPI_STATUSES_IGNORE);
	MPI_Barrier(MPI_COMM_WORLD);

	/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */

	for (j = 0; j < rows_in_core; j++)
	{
	printf("Test Process (%d, %d): ", coords[0], coords[1]);
	for (i = 0; i < cols_in_core; i++)
	{
	printf("%f ", new_array[i][j]);
	}
	printf("\n");
	}


	/* Close down the MPI environment */
	MPI_Finalize();

	return EXIT_SUCCESS;
	}