Created
May 6, 2025 08:43
-
-
Save georgebisbas/58a8ceb9c123fff1fe5292ed0603162d to your computer and use it in GitHub Desktop.
demo_laplace.c
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _POSIX_C_SOURCE 200809L | |
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL); | |
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000; | |
#include "stdlib.h" | |
#include "math.h" | |
#include "sys/time.h" | |
#include "mpi.h" | |
struct dataobj | |
{ | |
void *restrict data; | |
int * size; | |
unsigned long nbytes; | |
unsigned long * npsize; | |
unsigned long * dsize; | |
int * hsize; | |
int * hofs; | |
int * oofs; | |
void * dmap; | |
} ; | |
struct neighborhood | |
{ | |
int ll, lc, lr; | |
int cl, cc, cr; | |
int rl, rc, rr; | |
} ; | |
struct profiler | |
{ | |
double section0; | |
} ; | |
static void sendrecv0(struct dataobj *restrict u_vec, const int x_size, const int y_size, int ogtime, int ogx, int ogy, int ostime, int osx, int osy, int fromrank, int torank, MPI_Comm comm); | |
static void haloupdate0(struct dataobj *restrict u_vec, MPI_Comm comm, struct neighborhood * nb, int otime); | |
static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy); | |
static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy); | |
int Kernel(struct dataobj *restrict u_vec, const float dt, const float h_x, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, MPI_Comm comm, struct neighborhood * nb, struct profiler * timers) | |
{ | |
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data; | |
for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2)) | |
{ | |
START(section0) | |
haloupdate0(u_vec,comm,nb,t0); | |
for (int x = x_m; x <= x_M; x += 1) | |
{ | |
for (int y = y_m; y <= y_M; y += 1) | |
{ | |
u[t1][x + 2][y + 2] = dt*(u[t0][x + 1][y + 2]/powf(h_x, 2) - 2.0F*u[t0][x + 2][y + 2]/powf(h_x, 2) + u[t0][x + 3][y + 2]/powf(h_x, 2) + u[t0][x + 2][y + 1]/powf(h_y, 2) - 2.0F*u[t0][x + 2][y + 2]/powf(h_y, 2) + u[t0][x + 2][y + 3]/powf(h_y, 2) + u[t0][x + 2][y + 2]/dt); | |
} | |
} | |
STOP(section0,timers) | |
} | |
return 0; | |
} | |
static void sendrecv0(struct dataobj *restrict u_vec, const int x_size, const int y_size, int ogtime, int ogx, int ogy, int ostime, int osx, int osy, int fromrank, int torank, MPI_Comm comm) | |
{ | |
MPI_Request rrecv; | |
MPI_Request rsend; | |
float *restrict bufg_vec __attribute__ ((aligned (64))); | |
posix_memalign((void**)(&bufg_vec),64,x_size*y_size*sizeof(float)); | |
float *restrict bufs_vec __attribute__ ((aligned (64))); | |
posix_memalign((void**)(&bufs_vec),64,x_size*y_size*sizeof(float)); | |
MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&(rrecv)); | |
if (torank != MPI_PROC_NULL) | |
{ | |
gather0(bufg_vec,x_size,y_size,u_vec,ogtime,ogx,ogy); | |
} | |
MPI_Isend(bufg_vec,x_size*y_size,MPI_FLOAT,torank,13,comm,&(rsend)); | |
MPI_Wait(&(rsend),MPI_STATUS_IGNORE); | |
MPI_Wait(&(rrecv),MPI_STATUS_IGNORE); | |
if (fromrank != MPI_PROC_NULL) | |
{ | |
scatter0(bufs_vec,x_size,y_size,u_vec,ostime,osx,osy); | |
} | |
free(bufg_vec); | |
free(bufs_vec); | |
} | |
static void haloupdate0(struct dataobj *restrict u_vec, MPI_Comm comm, struct neighborhood * nb, int otime) | |
{ | |
sendrecv0(u_vec,u_vec->hsize[3],u_vec->npsize[2],otime,u_vec->oofs[2],u_vec->hofs[4],otime,u_vec->hofs[3],u_vec->hofs[4],nb->rc,nb->lc,comm); | |
sendrecv0(u_vec,u_vec->hsize[2],u_vec->npsize[2],otime,u_vec->oofs[3],u_vec->hofs[4],otime,u_vec->hofs[2],u_vec->hofs[4],nb->lc,nb->rc,comm); | |
sendrecv0(u_vec,u_vec->npsize[1],u_vec->hsize[5],otime,u_vec->hofs[2],u_vec->oofs[4],otime,u_vec->hofs[2],u_vec->hofs[5],nb->cr,nb->cl,comm); | |
sendrecv0(u_vec,u_vec->npsize[1],u_vec->hsize[4],otime,u_vec->hofs[2],u_vec->oofs[5],otime,u_vec->hofs[2],u_vec->hofs[4],nb->cl,nb->cr,comm); | |
} | |
static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy) | |
{ | |
float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec; | |
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data; | |
const int x_m = 0; | |
const int y_m = 0; | |
const int x_M = bx_size - 1; | |
const int y_M = by_size - 1; | |
for (int x = x_m; x <= x_M; x += 1) | |
{ | |
for (int y = y_m; y <= y_M; y += 1) | |
{ | |
buf[0][x][y] = u[otime][x + ox][y + oy]; | |
} | |
} | |
} | |
static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy) | |
{ | |
float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec; | |
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data; | |
const int x_m = 0; | |
const int y_m = 0; | |
const int x_M = bx_size - 1; | |
const int y_M = by_size - 1; | |
for (int x = x_m; x <= x_M; x += 1) | |
{ | |
for (int y = y_m; y <= y_M; y += 1) | |
{ | |
u[otime][x + ox][y + oy] = buf[0][x][y]; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment