Created
December 30, 2016 22:30
-
-
Save chick/9407ef6e004dcb2c5de09e5e26b95b6a to your computer and use it in GitHub Desktop.
Simple OpenCL kernel for 7pt operator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// <file: control.c> | |
#include <stdio.h> | |
#include <time.h> | |
#ifdef __APPLE__ | |
#include <OpenCL/opencl.h> | |
#else | |
#include <CL/cl.h> | |
#endif | |
int control(cl_command_queue queue, cl_kernel kernel_0, cl_mem out, cl_mem mesh) { | |
int error_code = 0; | |
size_t global_512_512 [2] = {512, 512}; | |
size_t local_16_32 [2] = {16, 32}; | |
error_code |= clSetKernelArg(kernel_0, 0, 8, & out); | |
error_code |= clSetKernelArg(kernel_0, 1, 8, & mesh); | |
error_code |= clEnqueueNDRangeKernel(queue, kernel_0, 2, NULL, global_512_512 , local_16_32 , 0, NULL, NULL); | |
clFinish(queue); | |
if (error_code != 0) printf("error code %d\n", error_code); | |
return error_code; | |
}; | |
))) | |
INFO:ctree.c.nodes:compilation command: gcc -shared -fPIC -std=c99 -O2 -framework OpenCL -o /var/folders/ls/0xl3wjy949b2b9j36yn36v3c0000gn/T/tmpEfglgb/snowflake/5598646763549957670/_5486821843753659896/_7985492147856592190/PencilCompiler/control.so /var/folders/ls/0xl3wjy949b2b9j36yn36v3c0000gn/T/tmpEfglgb/snowflake/5598646763549957670/_5486821843753659896/_7985492147856592190/PencilCompiler/control.c -framework OpenCL | |
INFO:ctree.nodes:Recreating source | |
INFO:ctree.nodes:file for generated OpenCL: /var/folders/ls/0xl3wjy949b2b9j36yn36v3c0000gn/T/tmpEfglgb/snowflake/5598646763549957670/_5486821843753659896/_7985492147856592190/PencilCompiler/kernel_0.cl | |
INFO:ctree.nodes:generated OpenCL code: ((( | |
// <file: kernel_0.cl> | |
#define encode514_514_514(x0, x1, x2) (264196 * (long) (x0) + 514 * (long) (x1) + 1 * (long) (x2)) | |
#define encode514_514(x0, x1) (514 * (long) (x0) + 1 * (long) (x1)) | |
#define encode18_34(x0, x1) (34 * (long) (x0) + 1 * (long) (x1)) | |
__kernel void kernel_0(__global float* out, __global float* mesh) { | |
__local float local_buf_0[612]; | |
__local float local_buf_1[612]; | |
__local float local_buf_2[612]; | |
__local float* plane_0 = local_buf_0; | |
__local float* plane_1 = local_buf_1; | |
__local float* plane_2 = local_buf_2; | |
__local float* temp_plane; | |
size_t tile_id_1 = get_group_id(0); | |
size_t tile_id_2 = get_group_id(1); | |
size_t packed_global_id_1 = get_global_id(0); | |
size_t packed_global_id_2 = get_global_id(1); | |
size_t packed_local_id_1 = get_local_id(0); | |
size_t packed_local_id_2 = get_local_id(1); | |
size_t thread_id = packed_local_id_1 * 32 + packed_local_id_2; | |
size_t group_id_0 = get_group_id(0); | |
size_t group_id_1 = get_group_id(1); | |
size_t index_0; | |
size_t index_1; | |
size_t index_2; | |
size_t local_index_0; | |
size_t local_index_1; | |
size_t local_index_2; | |
size_t dim_0_offsets[] = {1}; | |
size_t dim_0_strides[] = {1}; | |
size_t dim_1_offsets[] = {1}; | |
size_t dim_1_strides[] = {1}; | |
index_1 = (packed_global_id_1 % 512) * dim_1_strides[(packed_global_id_1 / 512)] + dim_1_offsets[(packed_global_id_1 / 512)]; | |
local_index_1 = (packed_local_id_1 % 512) * dim_1_strides[(packed_global_id_1 / 512)] + dim_1_offsets[(packed_global_id_1 / 512)]; | |
size_t dim_2_offsets[] = {1}; | |
size_t dim_2_strides[] = {1}; | |
index_2 = (packed_global_id_2 % 512) * dim_2_strides[(packed_global_id_2 / 512)] + dim_2_offsets[(packed_global_id_2 / 512)]; | |
local_index_2 = (packed_local_id_2 % 512) * dim_2_strides[(packed_global_id_2 / 512)] + dim_2_offsets[(packed_global_id_2 / 512)]; | |
// | |
// Fill the first local memory planes | |
// | |
plane_1[(thread_id + 0)] = mesh[encode514_514_514(0, tile_id_1 * 16 + ((thread_id + 0) / 34), tile_id_2 * 32 + ((thread_id + 0) % 34))]; | |
if (thread_id + 512 < 612) { | |
plane_1[(thread_id + 512)] = mesh[encode514_514_514(0, tile_id_1 * 16 + ((thread_id + 512) / 34), tile_id_2 * 32 + ((thread_id + 512) % 34))]; | |
}; | |
plane_2[(thread_id + 0)] = mesh[encode514_514_514(1, tile_id_1 * 16 + ((thread_id + 0) / 34), tile_id_2 * 32 + ((thread_id + 0) % 34))]; | |
if (thread_id + 512 < 612) { | |
plane_2[(thread_id + 512)] = mesh[encode514_514_514(1, tile_id_1 * 16 + ((thread_id + 512) / 34), tile_id_2 * 32 + ((thread_id + 512) % 34))]; | |
}; | |
barrier(CLK_LOCAL_MEM_FENCE); | |
for (index_0 = 1; index_0 <= 512; index_0 ++) { | |
temp_plane = plane_0; | |
plane_0 = plane_1; | |
plane_1 = plane_2; | |
plane_2 = temp_plane; | |
plane_2[(thread_id + 0)] = mesh[encode514_514_514(index_0 + 1, tile_id_1 * 16 + ((thread_id + 0) / 34), tile_id_2 * 32 + ((thread_id + 0) % 34))]; | |
if (thread_id + 512 < 612) { | |
plane_2[(thread_id + 512)] = mesh[encode514_514_514(index_0 + 1, tile_id_1 * 16 + ((thread_id + 512) / 34), tile_id_2 * 32 + ((thread_id + 512) % 34))]; | |
}; | |
barrier(CLK_LOCAL_MEM_FENCE); | |
out[encode514_514_514(index_0, index_1, index_2)] = plane_0[encode18_34(local_index_1 + 0, local_index_2 + 0)] + plane_1[encode18_34(local_index_1 + -1, local_index_2 + 0)] + plane_1[encode18_34(local_index_1 + 0, local_index_2 + -1)] + plane_1[encode18_34(local_index_1 + 0, local_index_2 + 0)] * -6 + plane_1[encode18_34(local_index_1 + 0, local_index_2 + 1)] + plane_1[encode18_34(local_index_1 + 1, local_index_2 + 0)] + plane_2[encode18_34(local_index_1 + 0, local_index_2 + 0)]; | |
barrier(CLK_LOCAL_MEM_FENCE); | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment