Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
Created December 22, 2022 11:41
Show Gist options
  • Save kaushikcfd/44bc70d3431fcdc7b391505ee4026cfd to your computer and use it in GitHub Desktop.
Save kaushikcfd/44bc70d3431fcdc7b391505ee4026cfd to your computer and use it in GitHub Desktop.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__kernel void __attribute__ ((reqd_work_group_size(9, 10, 1))) loopy_kernel(__global double const *__restrict__ u_ary, __global double const *__restrict__ J_ary, __global double const *__restrict__ D_ary, int const Nel, __global double *__restrict__ out)
{
__local double D_fetch[3 * 9 * 9];
double J_0[3 * 3];
double acc_r_j_tile_j_inner[4];
double acc_x;
__local double subst_0[10 * 9 * 3];
if (-1 + -10 * gid(0) + -1 * lid(1) + Nel >= 0)
{
for (int J_prftch_r = 0; J_prftch_r <= 2; ++J_prftch_r)
for (int J_prftch_x = 0; J_prftch_x <= 2; ++J_prftch_x)
J_0[3 * J_prftch_x + J_prftch_r] = J_ary[Nel * 3 * J_prftch_x + Nel * J_prftch_r + 10 * gid(0) + lid(1)];
{
int const i_inner_outer_0 = 0;
for (int i_tile_0 = 0; i_tile_0 <= ((-1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && lid(0) + -1 * lid(1) >= 0) ? 3 + -1 * lid(0) + (7 + 8 * lid(0)) / 9 : 3 + -1 * lid(1) + (7 + 8 * lid(1)) / 9); ++i_tile_0)
if (34 + -9 * i_tile_0 + -1 * lid(0) >= 0)
acc_r_j_tile_j_inner[i_tile_0] = 0.0;
}
}
for (int j_tile = 0; j_tile <= 3; ++j_tile)
{
barrier(CLK_LOCAL_MEM_FENCE) /* for subst_0 (prcmpt_x_redn rev-depends on insn_r_j_tile_j_inner_update) */;
if (-1 + -1 * lid(1) + -10 * gid(0) + Nel >= 0)
{
int const jprcmpt_subst_outer = 0;
if (34 + -9 * j_tile + -1 * lid(0) >= 0)
for (int rprcmpt_subst = 0; rprcmpt_subst <= 2; ++rprcmpt_subst)
{
acc_x = 0.0;
for (int x = 0; x <= 2; ++x)
acc_x = acc_x + J_0[3 * x + rprcmpt_subst] * u_ary[35 * Nel * x + 35 * (10 * gid(0) + lid(1)) + 9 * j_tile + lid(0)];
subst_0[27 * lid(1) + 3 * lid(0) + rprcmpt_subst] = acc_x;
}
}
for (int i_tile = 0; i_tile <= 3; ++i_tile)
{
barrier(CLK_LOCAL_MEM_FENCE) /* for D_fetch (D rev-depends on insn_r_j_tile_j_inner_update) */;
{
int const iprftchD_outer = 0;
if (34 + -9 * i_tile + -1 * lid(1) >= 0 && 8 + -1 * lid(1) >= 0)
{
int const jprftchD_outer = 0;
if (34 + -9 * j_tile + -1 * lid(0) >= 0)
for (int rprftchD = 0; rprftchD <= 2; ++rprftchD)
D_fetch[81 * rprftchD + 9 * lid(1) + lid(0)] = D_ary[1225 * rprftchD + 35 * (9 * i_tile + lid(1)) + 9 * j_tile + lid(0)];
}
}
barrier(CLK_LOCAL_MEM_FENCE) /* for D_fetch (insn_r_j_tile_j_inner_update depends on D) */;
if (-1 + -1 * lid(1) + -10 * gid(0) + Nel >= 0)
{
int const i_inner_outer = 0;
if (34 + -1 * lid(0) + -9 * i_tile >= 0)
for (int j_inner = 0; j_inner <= ((-3 + j_tile == 0 && -1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && 34 + -9 * i_tile + -1 * lid(1) >= 0 && 8 + -1 * lid(1) >= 0 && 7 + -1 * lid(0) >= 0 && 7 + -1 * lid(0) >= 0) ? 7 : 8); ++j_inner)
if (34 + -1 * j_inner + -9 * j_tile >= 0)
for (int r = 0; r <= 2; ++r)
acc_r_j_tile_j_inner[i_tile] = acc_r_j_tile_j_inner[i_tile] + subst_0[27 * lid(1) + 3 * j_inner + r] * D_fetch[81 * r + 9 * lid(0) + j_inner];
}
}
}
if (-1 + -10 * gid(0) + -1 * lid(1) + Nel >= 0)
{
int const i_inner_outer_1 = 0;
for (int i_tile_1 = 0; i_tile_1 <= ((-1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && lid(0) + -1 * lid(1) >= 0) ? 3 + -1 * lid(0) + (7 + 8 * lid(0)) / 9 : 3 + -1 * lid(1) + (7 + 8 * lid(1)) / 9); ++i_tile_1)
if (34 + -9 * i_tile_1 + -1 * lid(0) >= 0)
out[35 * (10 * gid(0) + lid(1)) + lid(0) + 9 * i_tile_1] = acc_r_j_tile_j_inner[i_tile_1];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment