benrules2/parallel_game_of_life.c Secret

## parallel_game_of_life.c
/************************************
 Conway Game of Life

 4 processors, domain decomposition
 in both i and j directions

 Adapted for N processors by beagan
 *************************************/

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

#define NI 200
#define NJ 200
#define NSTEPS 500

int main(int argc, char *argv[]){

    int i, j, n, im, ip, jm, jp, nsum, isum, isum1, nprocs ,myid;
    int ig, jg, i1g, i2g, j1g, j2g, ninom, njnom, ninj,
    i1, i2, j1, j2, ni, nj, i2m, j2m;
    int niproc, njproc, isumloc;
    int proc_north, proc_south, proc_east, proc_west,
    proc_ne, proc_nw, proc_se, proc_sw;
    int **old, **new, *old1d, *new1d;
    MPI_Status status;
    MPI_Datatype column_type;
    MPI_Comm comm_cart;
    MPI_Request request[16];
    float x;

    /*Variables for cartesion MPI cartography*/
    int dims[2]={0,0};
	int period[2]={1,1};
	int ndims=2;
	int coords[2]={0,0};
	int maxdims=sizeof(coords);

    /* initialize MPI */

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myid);

	MPI_Dims_create(nprocs, ndims, dims);
	MPI_Cart_create(MPI_COMM_WORLD, ndims,dims, period, 0, &comm_cart);
	MPI_Cart_coords(comm_cart, myid, maxdims, coords);

    /* nominal number of i and j points per proc. (without ghost cells,
     assume numbers divide evenly) */

    niproc = nprocs/dims[0];
    njproc = nprocs/dims[1];

    ninom = NI/niproc;
    njnom = NJ/njproc;

    /* For transferring data, define processor number
     above (north), below (south), to the right (east),
     and to the left (west) of the current processor.
     Note that when there are only 2 rows of processors,
     north and south will be equal due to periodicity
     (same for 2 columns for east and west). */

    if(nprocs > 1){

       /*Get rank of processors located in all compass directions in
        cartesian topography*/

        int coords_north[2]={coords[0]+1,coords[1]};
        int coords_south[2]={coords[0]-1,coords[1]};
        int coords_east[2]={coords[0],coords[1]+1};
        int coords_west[2]={coords[0],coords[1]-1};
        int coords_nw[2]={coords[0]+1,coords[1]-1};
        int coords_sw[2]={coords[0]-1,coords[1]-1};
        int coords_ne[2]={coords[0]+1,coords[1]+1};
        int coords_se[2]={coords[0]-1,coords[1]+1};

        MPI_Cart_rank(comm_cart, coords_north, &proc_north);
        MPI_Cart_rank(comm_cart,coords_south, &proc_south);
        MPI_Cart_rank(comm_cart, coords_east , &proc_east);
        MPI_Cart_rank(comm_cart, coords_west , &proc_west);
        MPI_Cart_rank(comm_cart,coords_nw , &proc_nw);
        MPI_Cart_rank(comm_cart, coords_ne, &proc_ne);
        MPI_Cart_rank(comm_cart, coords_sw, &proc_sw);
        MPI_Cart_rank(comm_cart, coords_se, &proc_se);

        printf("myid=%d \n, N E S W = %d %d %d %d \n", myid, proc_north,proc_east,proc_south, proc_west);

    }

    /* local starting and ending index, including 2 ghost cells
     (one at bottom, one at top) */

    i1  = 0;
    i2  = ninom + 1;
    i2m = i2 - 1;
    j1  = 0;
    j2  = njnom + 1;
    j2m = j2 - 1;


    /* global starting and ending indices (without ghost cells) */

    i1g = (myid/2)*ninom + 1;
    j1g = (myid%2)*njnom + 1;
    i2g = i1g + ninom - 1;
    j2g = j1g + njnom - 1;

    /* allocate arrays; want elements to be contiguous,
     so allocate 1-D arrays, then set pointer to each row
     (old and new) to allow use of array notation for convenience */

    ni = i2-i1+1;
    nj = j2-j1+1;
    ninj = ni*nj;  /* number of points on current processor */

    old1d = malloc(ninj*sizeof(int));
    new1d = malloc(ninj*sizeof(int));
    old   = malloc(ni*sizeof(int*));  /* allocate pointers to rows */
    new   = malloc(ni*sizeof(int*));

    for(i=0; i<ni; i++){      /* set row pointers to appropriate */
        old[i] = &old1d[i*nj];  /* locations in 1-D arrays         */
        new[i] = &new1d[i*nj];
    }

    /*  Initialize elements of old to 0 or 1.
     We're doing some sleight of hand here to make sure we
     initialize to the same values as in the serial case.
     The rand() function is called for every i and j, even
     if they are not on the current processor, to get the same
     random distribution as the serial case, but they are
     only used if i and j reside on the current procesor. */

    for(ig=1; ig<=NI; ig++){
        for(jg=1; jg<=NJ; jg++){
            x = rand()/((float)RAND_MAX + 1);

            /* if this ig and jg are on the current processor */
            if( ig >= i1g && ig <= i2g && jg >= j1g && jg <= j2g ){

                /* local i and j indices, accounting for lower ghost cell */
                i = ig - i1g + 1;
                j = jg - j1g + 1;

                if(x<0.5){
                    old[i][j] = 0;
                }else{
                    old[i][j] = 1;
                }
            }

        }
    }

    /* Create derived type for single column of array.
     There are ninom "blocks," each containing 1 element,
     with a stride of nj between the blocks */

    MPI_Type_vector(ninom , 1, nj, MPI_INT, &column_type);
    MPI_Type_commit(&column_type);

    /* iterate */

    for(n=0; n<NSTEPS; n++){

        /* copy or transfer data to ghost cells */

        if(nprocs == 1){
            for(i=1; i<i2; i++){          /* left and right columns */
                old[i][0]  = old[i][j2m];
                old[i][j2] = old[i][1];
            }
            for(j=1; j<j2; j++){          /* top and bottom rows */
                old[0][j]  = old[i2m][j];
                old[i2][j] = old[1][j];
            }
            old[0][0]   = old[i2m][j2m];  /* corners */
            old[0][j2]  = old[i2m][1];
            old[i2][0]  = old[1][j2m];
            old[i2][j2] = old[1][1];
        }else{

            /* send and receive left and right columns using
             our derived type "column_type" */

            MPI_Isend(&old[1][j2m],  1, column_type,
                      proc_east,  0, comm_cart, &request[0]);
            MPI_Irecv(&old[1][j2],    1, column_type,
                      proc_east,  1, comm_cart, &request[1]);
            MPI_Isend(&old[1][1],    1, column_type,
                      proc_west,  1, comm_cart, &request[2]);
            MPI_Irecv(&old[1][0],     1, column_type,
                      proc_west,  0, comm_cart, &request[3]);

            /* send and receive top and bottom rows */

            MPI_Isend(&old[i2m][1],  njnom,  MPI_INT,
                      proc_south, 0, comm_cart, &request[4]);
            MPI_Irecv(&old[i2][1],    njnom,  MPI_INT,
                      proc_south, 1, comm_cart, &request[5]);
            MPI_Isend(&old[1][1],    njnom,  MPI_INT,
                      proc_north, 1, comm_cart, &request[6]);
            MPI_Irecv(&old[0][1],     njnom,  MPI_INT,
                      proc_north, 0, comm_cart, &request[7]);

            /* send and receive corners */

            MPI_Isend(&old[1][1], 1, MPI_INT,
                      proc_nw, 10, comm_cart, &request[8]);
            MPI_Irecv( &old[0][0], 1, MPI_INT,
                      proc_nw, 12, comm_cart, &request[9]);
            MPI_Isend(&old[1][j2m], 1, MPI_INT,
                      proc_ne, 11, comm_cart, &request[10]);
            MPI_Irecv( &old[0][j2], 1, MPI_INT,
                      proc_ne, 13, comm_cart, &request[11]);
            MPI_Isend(&old[i2m][j2m], 1, MPI_INT,
                      proc_se, 12, comm_cart, &request[12]);
            MPI_Irecv( &old[i2][j2], 1, MPI_INT,
                      proc_se, 10, comm_cart, &request[13]);
            MPI_Isend(&old[i2m][1], 1, MPI_INT,
                      proc_sw, 13, comm_cart, &request[14]);
            MPI_Irecv( &old[i2][0], 1, MPI_INT,
                      proc_sw, 11, comm_cart, &request[15]);

            /* Make sure all non-blocking messages have arrived */

            for(i=0; i<16; i++){
                MPI_Wait(&request[i],&status);
            }
        }

        /* update states of cells */

        for(i=1; i<i2; i++){
            for(j=1; j<j2; j++){

                /* Periodic boundary conditions are
                 maintained through ghost cells. */

                im = i-1;
                ip = i+1;
                jm = j-1;
                jp = j+1;
                nsum =  old[im][jp] + old[i][jp] + old[ip][jp]
                + old[im][j ]              + old[ip][j ]
                + old[im][jm] + old[i][jm] + old[ip][jm];

                switch(nsum){
                    case 3:
                        new[i][j] = 1;
                        break;
                    case 2:
                        new[i][j] = old[i][j];
                        break;
                    default:
                        new[i][j] = 0;
                }
            }
        }

        /* copy new state into old state */

        for(i=1; i<i2; i++){
            for(j=1; j<j2; j++){
                old[i][j] = new[i][j];
            }
        }
    }

    /*  Iterations are done; sum the number of live cells */

    isum = 0;
    for(i=1; i<i2; i++){
        for(j=1; j<j2; j++){
            isum = isum + new[i][j];
        }
    }

    /* Print final number of live cells.  For multiple processors,
     must reduce partial sums */

    if(nprocs > 1){
        isumloc = isum;
        MPI_Reduce(&isumloc, &isum, 1, MPI_INT, MPI_SUM, 0, comm_cart);
        printf("myid=%d partial sum=%d\n", myid, isumloc);
    }
    if(myid == 0) printf("Number of live cells = %d \n", isum);

    MPI_Finalize();
}
	/************************************
	Conway Game of Life

	4 processors, domain decomposition
	in both i and j directions

	Adapted for N processors by beagan
	*************************************/

	#include "mpi.h"
	#include <stdio.h>
	#include <stdlib.h>

	#define NI 200
	#define NJ 200
	#define NSTEPS 500

	int main(int argc, char *argv[]){

	int i, j, n, im, ip, jm, jp, nsum, isum, isum1, nprocs ,myid;
	int ig, jg, i1g, i2g, j1g, j2g, ninom, njnom, ninj,
	i1, i2, j1, j2, ni, nj, i2m, j2m;
	int niproc, njproc, isumloc;
	int proc_north, proc_south, proc_east, proc_west,
	proc_ne, proc_nw, proc_se, proc_sw;
	int old, new, old1d, new1d;
	MPI_Status status;
	MPI_Datatype column_type;
	MPI_Comm comm_cart;
	MPI_Request request[16];
	float x;

	/Variables for cartesion MPI cartography/
	int dims[2]={0,0};
	int period[2]={1,1};
	int ndims=2;
	int coords[2]={0,0};
	int maxdims=sizeof(coords);

	/* initialize MPI */

	MPI_Init(&argc,&argv);
	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD,&myid);

	MPI_Dims_create(nprocs, ndims, dims);
	MPI_Cart_create(MPI_COMM_WORLD, ndims,dims, period, 0, &comm_cart);
	MPI_Cart_coords(comm_cart, myid, maxdims, coords);

	/* nominal number of i and j points per proc. (without ghost cells,
	assume numbers divide evenly) */

	niproc = nprocs/dims[0];
	njproc = nprocs/dims[1];

	ninom = NI/niproc;
	njnom = NJ/njproc;

	/* For transferring data, define processor number
	above (north), below (south), to the right (east),
	and to the left (west) of the current processor.
	Note that when there are only 2 rows of processors,
	north and south will be equal due to periodicity
	(same for 2 columns for east and west). */

	if(nprocs > 1){

	/*Get rank of processors located in all compass directions in
	cartesian topography*/

	int coords_north[2]={coords[0]+1,coords[1]};
	int coords_south[2]={coords[0]-1,coords[1]};
	int coords_east[2]={coords[0],coords[1]+1};
	int coords_west[2]={coords[0],coords[1]-1};
	int coords_nw[2]={coords[0]+1,coords[1]-1};
	int coords_sw[2]={coords[0]-1,coords[1]-1};
	int coords_ne[2]={coords[0]+1,coords[1]+1};
	int coords_se[2]={coords[0]-1,coords[1]+1};

	MPI_Cart_rank(comm_cart, coords_north, &proc_north);
	MPI_Cart_rank(comm_cart,coords_south, &proc_south);
	MPI_Cart_rank(comm_cart, coords_east , &proc_east);
	MPI_Cart_rank(comm_cart, coords_west , &proc_west);
	MPI_Cart_rank(comm_cart,coords_nw , &proc_nw);
	MPI_Cart_rank(comm_cart, coords_ne, &proc_ne);
	MPI_Cart_rank(comm_cart, coords_sw, &proc_sw);
	MPI_Cart_rank(comm_cart, coords_se, &proc_se);

	printf("myid=%d \n, N E S W = %d %d %d %d \n", myid, proc_north,proc_east,proc_south, proc_west);

	}

	/* local starting and ending index, including 2 ghost cells
	(one at bottom, one at top) */

	i1 = 0;
	i2 = ninom + 1;
	i2m = i2 - 1;
	j1 = 0;
	j2 = njnom + 1;
	j2m = j2 - 1;


	/* global starting and ending indices (without ghost cells) */

	i1g = (myid/2)*ninom + 1;
	j1g = (myid%2)*njnom + 1;
	i2g = i1g + ninom - 1;
	j2g = j1g + njnom - 1;

	/* allocate arrays; want elements to be contiguous,
	so allocate 1-D arrays, then set pointer to each row
	(old and new) to allow use of array notation for convenience */

	ni = i2-i1+1;
	nj = j2-j1+1;
	ninj = ninj; / number of points on current processor */

	old1d = malloc(ninj*sizeof(int));
	new1d = malloc(ninj*sizeof(int));
	old = malloc(nisizeof(int)); /* allocate pointers to rows */
	new = malloc(nisizeof(int));

	for(i=0; i<ni; i++){ /* set row pointers to appropriate */
	old[i] = &old1d[inj]; / locations in 1-D arrays */
	new[i] = &new1d[i*nj];
	}

	/* Initialize elements of old to 0 or 1.
	We're doing some sleight of hand here to make sure we
	initialize to the same values as in the serial case.
	The rand() function is called for every i and j, even
	if they are not on the current processor, to get the same
	random distribution as the serial case, but they are
	only used if i and j reside on the current procesor. */

	for(ig=1; ig<=NI; ig++){
	for(jg=1; jg<=NJ; jg++){
	x = rand()/((float)RAND_MAX + 1);

	/* if this ig and jg are on the current processor */
	if( ig >= i1g && ig <= i2g && jg >= j1g && jg <= j2g ){

	/* local i and j indices, accounting for lower ghost cell */
	i = ig - i1g + 1;
	j = jg - j1g + 1;

	if(x<0.5){
	old[i][j] = 0;
	}else{
	old[i][j] = 1;
	}
	}

	}
	}

	/* Create derived type for single column of array.
	There are ninom "blocks," each containing 1 element,
	with a stride of nj between the blocks */

	MPI_Type_vector(ninom , 1, nj, MPI_INT, &column_type);
	MPI_Type_commit(&column_type);

	/* iterate */

	for(n=0; n<NSTEPS; n++){

	/* copy or transfer data to ghost cells */

	if(nprocs == 1){
	for(i=1; i<i2; i++){ /* left and right columns */
	old[i][0] = old[i][j2m];
	old[i][j2] = old[i][1];
	}
	for(j=1; j<j2; j++){ /* top and bottom rows */
	old[0][j] = old[i2m][j];
	old[i2][j] = old[1][j];
	}
	old[0][0] = old[i2m][j2m]; /* corners */
	old[0][j2] = old[i2m][1];
	old[i2][0] = old[1][j2m];
	old[i2][j2] = old[1][1];
	}else{

	/* send and receive left and right columns using
	our derived type "column_type" */

	MPI_Isend(&old[1][j2m], 1, column_type,
	proc_east, 0, comm_cart, &request[0]);
	MPI_Irecv(&old[1][j2], 1, column_type,
	proc_east, 1, comm_cart, &request[1]);
	MPI_Isend(&old[1][1], 1, column_type,
	proc_west, 1, comm_cart, &request[2]);
	MPI_Irecv(&old[1][0], 1, column_type,
	proc_west, 0, comm_cart, &request[3]);

	/* send and receive top and bottom rows */

	MPI_Isend(&old[i2m][1], njnom, MPI_INT,
	proc_south, 0, comm_cart, &request[4]);
	MPI_Irecv(&old[i2][1], njnom, MPI_INT,
	proc_south, 1, comm_cart, &request[5]);
	MPI_Isend(&old[1][1], njnom, MPI_INT,
	proc_north, 1, comm_cart, &request[6]);
	MPI_Irecv(&old[0][1], njnom, MPI_INT,
	proc_north, 0, comm_cart, &request[7]);

	/* send and receive corners */

	MPI_Isend(&old[1][1], 1, MPI_INT,
	proc_nw, 10, comm_cart, &request[8]);
	MPI_Irecv( &old[0][0], 1, MPI_INT,
	proc_nw, 12, comm_cart, &request[9]);
	MPI_Isend(&old[1][j2m], 1, MPI_INT,
	proc_ne, 11, comm_cart, &request[10]);
	MPI_Irecv( &old[0][j2], 1, MPI_INT,
	proc_ne, 13, comm_cart, &request[11]);
	MPI_Isend(&old[i2m][j2m], 1, MPI_INT,
	proc_se, 12, comm_cart, &request[12]);
	MPI_Irecv( &old[i2][j2], 1, MPI_INT,
	proc_se, 10, comm_cart, &request[13]);
	MPI_Isend(&old[i2m][1], 1, MPI_INT,
	proc_sw, 13, comm_cart, &request[14]);
	MPI_Irecv( &old[i2][0], 1, MPI_INT,
	proc_sw, 11, comm_cart, &request[15]);

	/* Make sure all non-blocking messages have arrived */

	for(i=0; i<16; i++){
	MPI_Wait(&request[i],&status);
	}
	}

	/* update states of cells */

	for(i=1; i<i2; i++){
	for(j=1; j<j2; j++){

	/* Periodic boundary conditions are
	maintained through ghost cells. */

	im = i-1;
	ip = i+1;
	jm = j-1;
	jp = j+1;
	nsum = old[im][jp] + old[i][jp] + old[ip][jp]
	+ old[im][j ] + old[ip][j ]
	+ old[im][jm] + old[i][jm] + old[ip][jm];

	switch(nsum){
	case 3:
	new[i][j] = 1;
	break;
	case 2:
	new[i][j] = old[i][j];
	break;
	default:
	new[i][j] = 0;
	}
	}
	}

	/* copy new state into old state */

	for(i=1; i<i2; i++){
	for(j=1; j<j2; j++){
	old[i][j] = new[i][j];
	}
	}
	}

	/* Iterations are done; sum the number of live cells */

	isum = 0;
	for(i=1; i<i2; i++){
	for(j=1; j<j2; j++){
	isum = isum + new[i][j];
	}
	}

	/* Print final number of live cells. For multiple processors,
	must reduce partial sums */

	if(nprocs > 1){
	isumloc = isum;
	MPI_Reduce(&isumloc, &isum, 1, MPI_INT, MPI_SUM, 0, comm_cart);
	printf("myid=%d partial sum=%d\n", myid, isumloc);
	}
	if(myid == 0) printf("Number of live cells = %d \n", isum);

	MPI_Finalize();
	}