Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
Created January 5, 2023 11:59
Show Gist options
  • Save kaushikcfd/afe3b4d42956bb29303862845b0ffb10 to your computer and use it in GitHub Desktop.
Save kaushikcfd/afe3b4d42956bb29303862845b0ffb10 to your computer and use it in GitHub Desktop.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__kernel void __attribute__ ((reqd_work_group_size(8, 23, 1))) loopy_kernel(__global double const *__restrict__ A, __global double const *__restrict__ B, __global double *__restrict__ C)
{
__local double A_fetch[8 * 11];
__local double B_fetch[11 * 23];
double acc_k_outer_k_inner;
if (71 + -23 * gid(1) + -1 * lid(1) >= 0)
acc_k_outer_k_inner = 0.0;
for (int k_outer = 0; k_outer <= 2; ++k_outer)
{
barrier(CLK_LOCAL_MEM_FENCE) /* for B_fetch (B_fetch_rule rev-depends on insn_k_outer_k_inner_update) */;
if (71 + -23 * gid(1) + -1 * lid(0) >= 0)
{
int const kprftch_B_outer = 0;
if (31 + -11 * k_outer + -1 * lid(1) >= 0 && 10 + -1 * lid(1) >= 0)
for (int jprftch_B_outer = 0; jprftch_B_outer <= ((-3 + gid(1) == 0 && 2 + -1 * lid(1) >= 0) ? 0 : 2 + -1 * lid(0) + (6 + 7 * lid(0)) / 8); ++jprftch_B_outer)
if (71 + -8 * jprftch_B_outer + -23 * gid(1) + -1 * lid(0) >= 0)
B_fetch[23 * lid(1) + 8 * jprftch_B_outer + lid(0)] = B[72 * (11 * k_outer + lid(1)) + 23 * gid(1) + 8 * jprftch_B_outer + lid(0)];
}
for (int kprftch_A_outer = 0; kprftch_A_outer <= ((7 + -1 * lid(1) >= 0 && 71 + -1 * lid(1) + -23 * gid(1) >= 0 && 1 + -1 * k_outer >= 0) ? 1 + -1 * lid(0) + (2 + 7 * lid(0)) / 8 : -1 + -1 * lid(0) + (17 + 7 * lid(0)) / 8); ++kprftch_A_outer)
{
int const iprftch_A_outer = 0;
if (7 + -1 * lid(1) >= 0)
A_fetch[11 * lid(1) + 8 * kprftch_A_outer + lid(0)] = A[32 * (8 * gid(0) + lid(1)) + 11 * k_outer + 8 * kprftch_A_outer + lid(0)];
}
barrier(CLK_LOCAL_MEM_FENCE) /* for A_fetch (insn_k_outer_k_inner_update depends on A_fetch_rule) */;
if (71 + -1 * lid(1) + -23 * gid(1) >= 0)
for (int k_inner = 0; k_inner <= ((-2 + k_outer == 0 && 71 + -23 * gid(1) + -1 * lid(0) >= 0 && 9 + -1 * lid(1) >= 0) ? 9 : 10); ++k_inner)
if (31 + -1 * k_inner + -11 * k_outer >= 0)
acc_k_outer_k_inner = acc_k_outer_k_inner + A_fetch[11 * lid(0) + k_inner] * B_fetch[23 * k_inner + lid(1)];
}
if (71 + -23 * gid(1) + -1 * lid(1) >= 0)
C[72 * (8 * gid(0) + lid(0)) + 23 * gid(1) + lid(1)] = acc_k_outer_k_inner;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment