Skip to content

Instantly share code, notes, and snippets.

@georgebisbas
Created May 6, 2025 08:43
Show Gist options
  • Save georgebisbas/58a8ceb9c123fff1fe5292ed0603162d to your computer and use it in GitHub Desktop.
Save georgebisbas/58a8ceb9c123fff1fe5292ed0603162d to your computer and use it in GitHub Desktop.
demo_laplace.c
#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "mpi.h"
struct dataobj
{
void *restrict data;
int * size;
unsigned long nbytes;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;
struct neighborhood
{
int ll, lc, lr;
int cl, cc, cr;
int rl, rc, rr;
} ;
struct profiler
{
double section0;
} ;
static void sendrecv0(struct dataobj *restrict u_vec, const int x_size, const int y_size, int ogtime, int ogx, int ogy, int ostime, int osx, int osy, int fromrank, int torank, MPI_Comm comm);
static void haloupdate0(struct dataobj *restrict u_vec, MPI_Comm comm, struct neighborhood * nb, int otime);
static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);
static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);
int Kernel(struct dataobj *restrict u_vec, const float dt, const float h_x, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, MPI_Comm comm, struct neighborhood * nb, struct profiler * timers)
{
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;
for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
{
START(section0)
haloupdate0(u_vec,comm,nb,t0);
for (int x = x_m; x <= x_M; x += 1)
{
for (int y = y_m; y <= y_M; y += 1)
{
u[t1][x + 2][y + 2] = dt*(u[t0][x + 1][y + 2]/powf(h_x, 2) - 2.0F*u[t0][x + 2][y + 2]/powf(h_x, 2) + u[t0][x + 3][y + 2]/powf(h_x, 2) + u[t0][x + 2][y + 1]/powf(h_y, 2) - 2.0F*u[t0][x + 2][y + 2]/powf(h_y, 2) + u[t0][x + 2][y + 3]/powf(h_y, 2) + u[t0][x + 2][y + 2]/dt);
}
}
STOP(section0,timers)
}
return 0;
}
static void sendrecv0(struct dataobj *restrict u_vec, const int x_size, const int y_size, int ogtime, int ogx, int ogy, int ostime, int osx, int osy, int fromrank, int torank, MPI_Comm comm)
{
MPI_Request rrecv;
MPI_Request rsend;
float *restrict bufg_vec __attribute__ ((aligned (64)));
posix_memalign((void**)(&bufg_vec),64,x_size*y_size*sizeof(float));
float *restrict bufs_vec __attribute__ ((aligned (64)));
posix_memalign((void**)(&bufs_vec),64,x_size*y_size*sizeof(float));
MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&(rrecv));
if (torank != MPI_PROC_NULL)
{
gather0(bufg_vec,x_size,y_size,u_vec,ogtime,ogx,ogy);
}
MPI_Isend(bufg_vec,x_size*y_size,MPI_FLOAT,torank,13,comm,&(rsend));
MPI_Wait(&(rsend),MPI_STATUS_IGNORE);
MPI_Wait(&(rrecv),MPI_STATUS_IGNORE);
if (fromrank != MPI_PROC_NULL)
{
scatter0(bufs_vec,x_size,y_size,u_vec,ostime,osx,osy);
}
free(bufg_vec);
free(bufs_vec);
}
static void haloupdate0(struct dataobj *restrict u_vec, MPI_Comm comm, struct neighborhood * nb, int otime)
{
sendrecv0(u_vec,u_vec->hsize[3],u_vec->npsize[2],otime,u_vec->oofs[2],u_vec->hofs[4],otime,u_vec->hofs[3],u_vec->hofs[4],nb->rc,nb->lc,comm);
sendrecv0(u_vec,u_vec->hsize[2],u_vec->npsize[2],otime,u_vec->oofs[3],u_vec->hofs[4],otime,u_vec->hofs[2],u_vec->hofs[4],nb->lc,nb->rc,comm);
sendrecv0(u_vec,u_vec->npsize[1],u_vec->hsize[5],otime,u_vec->hofs[2],u_vec->oofs[4],otime,u_vec->hofs[2],u_vec->hofs[5],nb->cr,nb->cl,comm);
sendrecv0(u_vec,u_vec->npsize[1],u_vec->hsize[4],otime,u_vec->hofs[2],u_vec->oofs[5],otime,u_vec->hofs[2],u_vec->hofs[4],nb->cl,nb->cr,comm);
}
static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)
{
float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec;
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;
const int x_m = 0;
const int y_m = 0;
const int x_M = bx_size - 1;
const int y_M = by_size - 1;
for (int x = x_m; x <= x_M; x += 1)
{
for (int y = y_m; y <= y_M; y += 1)
{
buf[0][x][y] = u[otime][x + ox][y + oy];
}
}
}
static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)
{
float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec;
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;
const int x_m = 0;
const int y_m = 0;
const int x_M = bx_size - 1;
const int y_M = by_size - 1;
for (int x = x_m; x <= x_M; x += 1)
{
for (int y = y_m; y <= y_M; y += 1)
{
u[otime][x + ox][y + oy] = buf[0][x][y];
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment