Skip to content

Instantly share code, notes, and snippets.

@chick
Created January 6, 2017 20:31
Show Gist options
  • Save chick/5e8280c0af03ffbf332b4bf4cdc70299 to your computer and use it in GitHub Desktop.
Save chick/5e8280c0af03ffbf332b4bf4cdc70299 to your computer and use it in GitHub Desktop.
// <file: kernel_0.cl>
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define encode514_514_514(x0, x1, x2) (264196 * (long) (x0) + 514 * (long) (x1) + 1 * (long) (x2))
#define encode514_514(x0, x1) (514 * (long) (x0) + 1 * (long) (x1))
#define encode10_18(x0, x1) (18 * (long) (x0) + 1 * (long) (x1))
__kernel void kernel_0(__global double* out, __global double* mesh) {
size_t tile_id_1 = get_group_id(0);
size_t tile_id_2 = get_group_id(1);
size_t packed_global_id_1 = get_global_id(0);
size_t packed_global_id_2 = get_global_id(1);
size_t packed_local_id_1 = get_local_id(0);
size_t packed_local_id_2 = get_local_id(1);
size_t thread_id = packed_local_id_1 * 16 + packed_local_id_2;
size_t group_id_0 = get_group_id(0);
size_t group_id_1 = get_group_id(1);
size_t index_0;
size_t index_1;
size_t index_2;
size_t local_index_0;
size_t local_index_1;
size_t local_index_2;
size_t dim_0_offsets[] = {1};
size_t dim_0_strides[] = {1};
size_t dim_1_offsets[] = {1};
size_t dim_1_strides[] = {1};
index_1 = (packed_global_id_1 % 512) * dim_1_strides[(packed_global_id_1 / 512)] + dim_1_offsets[(packed_global_id_1 / 512)];
local_index_1 = (packed_local_id_1 % 512) * dim_1_strides[(packed_global_id_1 / 512)] + dim_1_offsets[(packed_global_id_1 / 512)];
size_t dim_2_offsets[] = {1};
size_t dim_2_strides[] = {1};
index_2 = (packed_global_id_2 % 512) * dim_2_strides[(packed_global_id_2 / 512)] + dim_2_offsets[(packed_global_id_2 / 512)];
local_index_2 = (packed_local_id_2 % 512) * dim_2_strides[(packed_global_id_2 / 512)] + dim_2_offsets[(packed_global_id_2 / 512)];
double register_0 = mesh[encode514_514_514(0, index_1, index_2)];
double register_1 = mesh[encode514_514_514(1, index_1, index_2)];
double register_2 = mesh[encode514_514_514(2, index_1, index_2)];
for (index_0 = 1; index_0 <= 512; index_0 ++) {
out[encode514_514_514(index_0, index_1, index_2)] = register_0 * 0.4733297379 + mesh[encode514_514_514(index_0 + 0, index_1 + -1, index_2 + 0)] * 0.902356566675 + mesh[encode514_514_514(index_0 + 0, index_1 + 0, index_2 + -1)] * 0.611319254973 + register_1 * 0.330945165922 + mesh[encode514_514_514(index_0 + 0, index_1 + 0, index_2 + 1)] * 0.518832399972 + mesh[encode514_514_514(index_0 + 0, index_1 + 1, index_2 + 0)] * 0.401558456331 + register_2 * 0.795811616736;
register_0 = register_1;
register_1 = register_2;
register_2 = mesh[encode514_514_514(index_0 + 2, index_1, index_2)];
barrier(CLK_LOCAL_MEM_FENCE);
};
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment