Skip to content

Instantly share code, notes, and snippets.

@dappelha
Created December 13, 2023 18:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dappelha/2101e53893c0abaf6273015870a1ec81 to your computer and use it in GitHub Desktop.
Save dappelha/2101e53893c0abaf6273015870a1ec81 to your computer and use it in GitHub Desktop.
3D nested loops on GPUs
for (int l = 0; l < ld; l++)
{
for (int k = 0; k < kd; k++)
{
for (int j = 0; j < jd; j++)
{
if (l > 0 && l < (ld - 1) && k > 0 && k < (kd - 1) && j > 0 && j < (jd - 1))
{
jp = j + 1;
jm = j - 1;
kp = k + 1;
km = k - 1;
lp = l + 1;
lm = l - 1;
temp = (q[IDX(jm, k, l)] + q[IDX(jp,k,l)]);
temp += (q[IDX(j,km,l)] + q[IDX(j,kp,l)]);
temp += (q[IDX(j,k,lm)] + q[IDX(j,k,lp)]);
s[IDX(j,k,l)] = temp / 6.0;
}
}
}
}
CUDA version can combine blocks/threads arbitrarily and at multiple levels.
int l = threadIdx.z+(blockIdx.z-1)*blockDim.z;
int k = threadIdx.y+(blockIdx.y-1)*blockDim.y;
int j = threadIdx.x+(blockIdx.x-1)*blockDim.x;
if( j>0 && j<(jd-1) && k>0 && k<(kd-1) && l>0 && l<(ld-1) ) {
// get indices
jp = j+1;
jm = j-1;
kp = k+1;
km = k-1;
lp = l+1;
lm = l-1;
// Compute the stencil which reaches to neighbor s values.
temp = q[IDX(j,k,l)];
// j direction
temp += (q[IDX(jm, k, l)] + q[IDX(jp,k,l)]);
// k direction
temp += (q[IDX(j,km,l)] + q[IDX(j,kp,l)]);
// l direction
temp += (q[IDX(j,k,lm)] + q[IDX(j,k,lp)]);
// write average out to s
s[IDX(j,k,l)] = temp / 6.0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment