Created
January 4, 2016 07:45
-
-
Save sukinull/23d6cfb53dd2213997c2 to your computer and use it in GitHub Desktop.
Original OpenCL kernel is here, https://github.com/01org/libxcam/tree/master/cl_kernel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_3a_stats | |
* input: image2d_t as read only | |
* output: XCamGridStat, stats results | |
*/ | |
typedef struct | |
{ | |
unsigned int avg_y; | |
unsigned int avg_r; | |
unsigned int avg_gr; | |
unsigned int avg_gb; | |
unsigned int avg_b; | |
unsigned int valid_wb_count; | |
unsigned int f_value1; | |
unsigned int f_value2; | |
} XCamGridStat; | |
__kernel void kernel_3a_stats (__read_only image2d_t input, __global XCamGridStat *output) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
int w = get_global_size (0); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
int x0 = 16 * x; | |
int y0 = 16 * y; | |
float sum_gr = 0.0f, sum_r = 0.0f, sum_b = 0.0f, sum_gb = 0.0f; | |
float avg_gr = 0.0f, avg_r = 0.0f, avg_b = 0.0f, avg_gb = 0.0f; | |
int i = 0, j = 0; | |
float count = (16.0 / 2) * (16.0 / 2); | |
float4 p[4]; | |
#pragma unroll | |
for (j = 0; j < 16; j += 2) { | |
#pragma unroll | |
for (i = 0; i < 16; i += 2) { | |
p[0] = read_imagef (input, sampler, (int2)(x0 + i, y0 + j)); | |
p[1] = read_imagef (input, sampler, (int2)(x0 + i, y0 + j + 1)); | |
p[2] = read_imagef (input, sampler, (int2)(x0 + i + 1, y0 + j)); | |
p[3] = read_imagef (input, sampler, (int2)(x0 + i + 1, y0 + j + 1)); | |
sum_gr += p[0].x; | |
sum_b += p[1].x; | |
sum_r += p[2].x; | |
sum_gb += p[3].x; | |
} | |
} | |
avg_gr = sum_gr / count; | |
avg_r = sum_r / count; | |
avg_b = sum_b / count; | |
avg_gb = sum_gb / count; | |
output[y * w + x].avg_gr = convert_uint(avg_gr * 256.0); | |
output[y * w + x].avg_r = convert_uint(avg_r * 256.0); | |
output[y * w + x].avg_b = convert_uint(avg_b * 256.0); | |
output[y * w + x].avg_gb = convert_uint(avg_gb * 256.0); | |
output[y * w + x].valid_wb_count = convert_uint(count); | |
output[y * w + x].avg_y = convert_uint(((avg_gr + avg_gb) / 2.0f) * 256.0); | |
output[y * w + x].f_value1 = 0; | |
output[y * w + x].f_value2 = 0; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_bayer_copy | |
* sample code of default kernel arguments | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
//#define ENABLE_IMAGE_2D_INPUT 0 | |
/* | |
* GROUP_PIXEL_X_SIZE = 2 * GROUP_CELL_X_SIZE | |
* GROUP_PIXEL_Y_SIZE = 2 * GROUP_CELL_Y_SIZE | |
*/ | |
#define GROUP_CELL_X_SIZE 64 | |
#define GROUP_CELL_Y_SIZE 4 | |
//float4; 16 | |
#define SLM_X_SIZE (GROUP_CELL_X_SIZE / 4) | |
#define SLM_Y_SIZE GROUP_CELL_Y_SIZE | |
#define STATS_3A_CELL_X_SIZE 8 | |
#define STATS_3A_CELL_Y_SIZE GROUP_CELL_Y_SIZE | |
typedef struct { | |
float level_gr; /* Black level for GR pixels */ | |
float level_r; /* Black level for R pixels */ | |
float level_b; /* Black level for B pixels */ | |
float level_gb; /* Black level for GB pixels */ | |
uint color_bits; | |
} CLBLCConfig; | |
typedef struct | |
{ | |
float r_gain; | |
float gr_gain; | |
float gb_gain; | |
float b_gain; | |
} CLWBConfig; | |
inline int slm_pos (const int x, const int y) | |
{ | |
return mad24 (y, SLM_X_SIZE, x); | |
} | |
inline void gamma_correct(float8 *in_out, __global float *table) | |
{ | |
in_out->s0 = table[clamp(convert_int(in_out->s0 * 255.0f), 0, 255)]; | |
in_out->s1 = table[clamp(convert_int(in_out->s1 * 255.0f), 0, 255)]; | |
in_out->s2 = table[clamp(convert_int(in_out->s2 * 255.0f), 0, 255)]; | |
in_out->s3 = table[clamp(convert_int(in_out->s3 * 255.0f), 0, 255)]; | |
in_out->s4 = table[clamp(convert_int(in_out->s4 * 255.0f), 0, 255)]; | |
in_out->s5 = table[clamp(convert_int(in_out->s5 * 255.0f), 0, 255)]; | |
in_out->s6 = table[clamp(convert_int(in_out->s6 * 255.0f), 0, 255)]; | |
in_out->s7 = table[clamp(convert_int(in_out->s7 * 255.0f), 0, 255)]; | |
} | |
inline float avg_float8 (float8 data) | |
{ | |
return (data.s0 + data.s1 + data.s2 + data.s3 + data.s4 + data.s5 + data.s6 + data.s7) * 0.125f; | |
} | |
inline void stats_3a_calculate ( | |
__local float4 * slm_gr, | |
__local float4 * slm_r, | |
__local float4 * slm_b, | |
__local float4 * slm_gb, | |
__global ushort8 * stats_output, | |
CLWBConfig *wb_config) | |
{ | |
const int group_x_size = get_num_groups (0); | |
const int group_id_x = get_group_id (0); | |
const int group_id_y = get_group_id (1); | |
const int l_id_x = get_local_id (0); | |
const int l_id_y = get_local_id (1); | |
const int l_size_x = get_local_size (0); | |
const int stats_float4_x_count = STATS_3A_CELL_X_SIZE / 4; | |
int count = stats_float4_x_count * STATS_3A_CELL_Y_SIZE / 4; | |
int index = mad24 (l_id_y, l_size_x, l_id_x); | |
int index_x = index % SLM_X_SIZE; | |
int index_y = index / SLM_X_SIZE; | |
if (mad24 (index_y, stats_float4_x_count, index_x % stats_float4_x_count) < count) { | |
int pitch_count = count / stats_float4_x_count * SLM_X_SIZE; | |
int index1 = index + pitch_count; | |
int index2 = index1 + pitch_count; | |
int index3 = index2 + pitch_count; | |
slm_gr[index] = (slm_gr[index] + slm_gr[index1] + slm_gr[index2] + slm_gr[index3]) * 0.25f; | |
slm_r[index] = (slm_r[index] + slm_r[index1] + slm_r[index2] + slm_r[index3]) * 0.25f; | |
slm_b[index] = (slm_b[index] + slm_b[index1] + slm_b[index2] + slm_b[index3]) * 0.25f; | |
slm_gb[index] = (slm_gb[index] + slm_gb[index1] + slm_gb[index2] + slm_gb[index3]) * 0.25f; | |
} | |
barrier (CLK_LOCAL_MEM_FENCE); | |
if (index < SLM_X_SIZE / 2) { | |
float result_gr, result_r, result_b, result_gb, avg_y; | |
float8 tmp; | |
tmp = ((__local float8*)slm_gr)[index]; | |
result_gr = avg_float8 (tmp); | |
tmp = ((__local float8*)slm_r)[index]; | |
result_r = avg_float8 (tmp); | |
tmp = ((__local float8*)slm_b)[index]; | |
result_b = avg_float8 (tmp); | |
tmp = ((__local float8*)slm_gb)[index]; | |
result_gb = avg_float8 (tmp); | |
avg_y = convert_uchar_sat( | |
mad ((result_gr * wb_config->gr_gain + result_gb * wb_config->gb_gain), 74.843f, | |
mad (result_r * wb_config->r_gain, 76.245f, result_b * 29.070f))); | |
int out_index = mad24 (mad24 (group_id_y, group_x_size, group_id_x), | |
(GROUP_CELL_X_SIZE / STATS_3A_CELL_X_SIZE) * (GROUP_CELL_Y_SIZE / STATS_3A_CELL_Y_SIZE), | |
index); | |
//ushort avg_y; avg_r; avg_gr; avg_gb; avg_b; valid_wb_count; f_value1; f_value2; | |
stats_output[out_index] = (ushort8) ( | |
convert_ushort (convert_uchar_sat (avg_y)), | |
convert_ushort (convert_uchar_sat (result_r * 255.0f)), | |
convert_ushort (convert_uchar_sat (result_gr * 255.0f)), | |
convert_ushort (convert_uchar_sat (result_gb * 255.0f)), | |
convert_ushort (convert_uchar_sat (result_b * 255.0f)), | |
STATS_3A_CELL_X_SIZE * STATS_3A_CELL_Y_SIZE, | |
0, | |
0); | |
} | |
} | |
__kernel void kernel_bayer_basic ( | |
#if ENABLE_IMAGE_2D_INPUT | |
__read_only image2d_t input, | |
#else | |
__global const ushort8 *input, | |
#endif | |
uint input_aligned_width, | |
__write_only image2d_t output, | |
uint out_height, | |
CLBLCConfig blc_config, | |
CLWBConfig wb_config, | |
__global float *gamma_table, | |
__global ushort8 *stats_output | |
) | |
{ | |
int g_x = get_global_id (0); | |
int g_y = get_global_id (1); | |
const int l_x = get_local_id (0); | |
const int l_y = get_local_id (1); | |
const int l_x_size = get_local_size (0); | |
const int l_y_size = get_local_size (1); | |
const int group_id_x = get_group_id (0); | |
const int group_id_y = get_group_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
int index = mad24 (l_y, l_x_size, l_x); | |
int x_cell_start = (GROUP_CELL_X_SIZE / 4) * group_id_x; | |
int y_cell_start = GROUP_CELL_Y_SIZE * group_id_y; | |
int x, y; | |
float blc_multiplier = (float)(1 << (16 - blc_config.color_bits)); | |
__local float4 slm_gr[SLM_X_SIZE * SLM_Y_SIZE], slm_r[SLM_X_SIZE * SLM_Y_SIZE], slm_b[SLM_X_SIZE * SLM_Y_SIZE], slm_gb[SLM_X_SIZE * SLM_Y_SIZE]; | |
for (; index < SLM_X_SIZE * SLM_Y_SIZE; index += l_x_size * l_y_size) { | |
float8 line1; | |
float8 line2; | |
x = index % SLM_X_SIZE + x_cell_start; | |
y = index / SLM_X_SIZE + y_cell_start; | |
#if ENABLE_IMAGE_2D_INPUT | |
line1 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2)))) / 65536.0f; | |
line2 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2 + 1)))) / 65536.0f; | |
#else | |
line1 = convert_float8 (input [y * 2 * input_aligned_width + x]) / 65536.0f; | |
line2 = convert_float8 (input [(y * 2 + 1) * input_aligned_width + x]) / 65536.0f; | |
#endif | |
float4 gr = mad (line1.even, blc_multiplier, - blc_config.level_gr); | |
float4 r = mad (line1.odd, blc_multiplier, - blc_config.level_r); | |
float4 b = mad (line2.even, blc_multiplier, - blc_config.level_b); | |
float4 gb = mad (line2.odd, blc_multiplier, - blc_config.level_gb); | |
slm_gr[index] = gr; | |
slm_r[index] = r; | |
slm_b[index] = b; | |
slm_gb[index] = gb; | |
} | |
barrier(CLK_LOCAL_MEM_FENCE); | |
float8 data_gr, data_r, data_b, data_gb; | |
index = mad24 (l_y, l_x_size, l_x); | |
x = mad24 (GROUP_CELL_X_SIZE / 8, group_id_x, index % (SLM_X_SIZE / 2)); | |
y = mad24 (GROUP_CELL_Y_SIZE, group_id_y, index / (SLM_X_SIZE / 2)); | |
data_gr = ((__local float8*)slm_gr)[index]; | |
data_gr = data_gr * wb_config.gr_gain; | |
data_r = ((__local float8*)slm_r)[index]; | |
data_r = data_r * wb_config.r_gain; | |
data_b = ((__local float8*)slm_b)[index]; | |
data_b = data_b * wb_config.b_gain; | |
data_gb = ((__local float8*)slm_gb)[index]; | |
data_gb = data_gb * wb_config.gb_gain; | |
#if ENABLE_GAMMA | |
gamma_correct (&data_gr, gamma_table); | |
gamma_correct (&data_r, gamma_table); | |
gamma_correct (&data_b, gamma_table); | |
gamma_correct (&data_gb, gamma_table); | |
#endif | |
#if 0 | |
if (x % 16 == 0 && y % 16 == 0) { | |
uint8 value = convert_uint8(convert_uchar8_sat(data_gr * 255.0f)); | |
printf ("(x:%d, y:%d) (blc.bit:%d, level:%d) (wb.gr:%f)=> (%d, %d, %d, %d, %d, %d, %d, %d)\n", | |
x * 8, y, | |
blc_config.color_bits, convert_uint(blc_config.level_gr * 255.0f), | |
wb_config.gr_gain, | |
value.s0, value.s1, value.s2, value.s3, value.s4, value.s5, value.s6, value.s7); | |
} | |
#endif | |
write_imageui (output, (int2)(x, y), as_uint4 (convert_ushort8 (data_gr * 65536.0f))); | |
write_imageui (output, (int2)(x, y + out_height), as_uint4 (convert_ushort8 (data_r * 65536.0f))); | |
write_imageui (output, (int2)(x, y + out_height * 2), as_uint4 (convert_ushort8 (data_b * 65536.0f))); | |
write_imageui (output, (int2)(x, y + out_height * 3), as_uint4 (convert_ushort8 (data_gb * 65536.0f))); | |
stats_3a_calculate (slm_gr, slm_r, slm_b, slm_gb, stats_output, &wb_config); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_bayer_pipe | |
* params: | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* blc_config: black level correction configuration | |
* wb_config: whitebalance configuration | |
* gamma_table: RGGB table | |
* stats_output: 3a stats output | |
*/ | |
#define WORKGROUP_CELL_WIDTH 8 | |
#define WORKGROUP_CELL_HEIGHT 8 | |
#define DEMOSAIC_X_CELL_PER_WORKITEM 2 | |
#define PIXEL_PER_CELL 2 | |
#define SLM_CELL_X_OFFSET 4 | |
#define SLM_CELL_Y_OFFSET 1 | |
// 8x8 | |
#define SLM_CELL_X_VALID_SIZE WORKGROUP_CELL_WIDTH | |
#define SLM_CELL_Y_VALID_SIZE WORKGROUP_CELL_HEIGHT | |
// 10x10 | |
#define SLM_CELL_X_SIZE (SLM_CELL_X_VALID_SIZE + SLM_CELL_X_OFFSET * 2) | |
#define SLM_CELL_Y_SIZE (SLM_CELL_Y_VALID_SIZE + SLM_CELL_Y_OFFSET * 2) | |
#define GUASS_DELTA_S_1 1.031739f | |
#define GUASS_DELTA_S_1_5 1.072799f | |
#define GUASS_DELTA_S_2 1.133173f | |
#define GUASS_DELTA_S_2_5 1.215717f | |
typedef struct | |
{ | |
float ee_gain; | |
float ee_threshold; | |
float nr_gain; | |
} CLEeConfig; | |
inline int get_shared_pos_x (int i) | |
{ | |
return i % SLM_CELL_X_SIZE; | |
} | |
inline int get_shared_pos_y (int i) | |
{ | |
return i / SLM_CELL_X_SIZE; | |
} | |
inline int shared_pos (int x, int y) | |
{ | |
return mad24(y, SLM_CELL_X_SIZE, x); | |
} | |
/* BA10=> GRBG */ | |
inline void grbg_slm_load ( | |
__local float *px, __local float *py, __local float *pz, __local float *pw, | |
int index, __read_only image2d_t input, int x_start, int y_start | |
) | |
{ | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
float4 data1, data2, line1, line2; | |
int x0 = (get_shared_pos_x (index) + x_start) / 4; | |
int y0 = get_shared_pos_y (index) + y_start; | |
int2 pos = (int2)(x0, y0); | |
float4 gr, r, b, gb; | |
gr = read_imagef (input, sampler, (int2)(x0, y0)); | |
r = read_imagef (input, sampler, (int2)(x0, y0 + 544)); | |
b = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 2)); | |
gb = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 3)); | |
(*(__local float4 *)(px + index)) = gr; | |
(*(__local float4 *)(py + index)) = r; | |
(*(__local float4 *)(pz + index)) = b; | |
(*(__local float4 *)(pw + index)) = gb; | |
} | |
#define MAX_DELTA_COFF 5.0f | |
#define MIN_DELTA_COFF 1.0f | |
#define DEFAULT_DELTA_COFF 4.0f | |
inline float2 delta_coff (float2 in, __local float *table) | |
{ | |
float2 out; | |
out.x = table[(int)(fabs(in.x * 64.0f))]; | |
out.y = table[(int)(fabs(in.y * 64.0f))]; | |
return out; | |
} | |
inline float2 dot_denoise (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain) | |
{ | |
float2 coff0, coff1, coff2, coff3, coff4, coff5; | |
coff0 = delta_coff (0.0, table) * gain; | |
coff1 = delta_coff (in1 - value, table); | |
coff2 = delta_coff (in2 - value, table); | |
coff3 = delta_coff (in3 - value, table); | |
coff4 = delta_coff (in4 - value, table); | |
//(in1 * coff1 + in2 * coff2 + in3 * coff3 + in4 * coff4 + value * coff0) | |
float2 sum1 = (mad (in1, coff1, | |
mad (in2, coff2, | |
mad (in3, coff3, | |
mad (in4, coff4, value * coff0))))); | |
return sum1 / (coff0 + coff1 + coff2 + coff3 + coff4); | |
} | |
inline float2 dot_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, float2 out, CLEeConfig ee_config, float2 *egain) | |
{ | |
float2 eH = value - in1 * 0.5f - in3 * 0.5f; | |
float2 eV = value - in2 * 0.5f - in4 * 0.5f; | |
float2 ee; | |
eH.x = eH.x > ee_config.ee_threshold ? eH.x : 0.0f; | |
eH.y = eH.y > ee_config.ee_threshold ? eH.y : 0.0f; | |
eV.x = eV.x > ee_config.ee_threshold ? eV.x : 0.0f; | |
eV.y = eV.y > ee_config.ee_threshold ? eV.y : 0.0f; | |
ee.x = fmax(eH.x, eV.x); | |
ee.y = fmax(eH.y, eV.y); | |
egain[0] = mad(ee, ee_config.ee_gain, out) / out; | |
return out * egain[0]; | |
} | |
inline float2 dot_denoise_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain, float2 *egain, CLEeConfig ee_config) | |
{ | |
float2 out = dot_denoise(value, in1, in2, in3, in4, table, gain); | |
return dot_ee(value, in1, in2, in3, in4, out, ee_config, egain); | |
} | |
void demosaic_2_cell ( | |
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, | |
int in_x, int in_y, | |
__write_only image2d_t out, uint out_height, int out_x, int out_y) | |
{ | |
float4 out_data; | |
float2 value; | |
int index; | |
{ | |
float3 R_y[2]; | |
index = shared_pos (in_x - 1, in_y); | |
R_y[0] = *(__local float3*)(y_data_in + index); | |
index = shared_pos (in_x - 1, in_y + 1); | |
R_y[1] = *(__local float3*)(y_data_in + index); | |
out_data.s02 = (R_y[0].s01 + R_y[0].s12) * 0.5f; | |
out_data.s13 = R_y[0].s12; | |
write_imagef (out, (int2)(out_x, out_y), out_data); | |
out_data.s02 = (R_y[0].s01 + R_y[0].s12 + R_y[1].s01 + R_y[1].s12) * 0.25f; | |
out_data.s13 = (R_y[0].s12 + R_y[1].s12) * 0.5f; | |
write_imagef (out, (int2)(out_x, out_y + 1), out_data); | |
} | |
{ | |
float3 B_z[2]; | |
index = shared_pos (in_x, in_y - 1); | |
B_z[0] = *(__local float3*)(z_data_in + index); | |
index = shared_pos (in_x, in_y); | |
B_z[1] = *(__local float3*)(z_data_in + index); | |
out_data.s02 = (B_z[0].s01 + B_z[1].s01) * 0.5f; | |
out_data.s13 = (B_z[0].s01 + B_z[0].s12 + B_z[1].s01 + B_z[1].s12) * 0.25f; | |
write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data); | |
out_data.s02 = B_z[1].s01; | |
out_data.s13 = (B_z[1].s01 + B_z[1].s12) * 0.5f; | |
write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data); | |
} | |
{ | |
float3 Gr_x[2], Gb_w[2]; | |
index = shared_pos (in_x, in_y); | |
Gr_x[0] = *(__local float3*)(x_data_in + index); | |
index = shared_pos (in_x, in_y + 1); | |
Gr_x[1] = *(__local float3*)(x_data_in + index); | |
index = shared_pos (in_x - 1, in_y - 1); | |
Gb_w[0] = *(__local float3*)(w_data_in + index); | |
index = shared_pos (in_x - 1, in_y); | |
Gb_w[1] = *(__local float3*)(w_data_in + index); | |
out_data.s02 = (Gr_x[0].s01 * 4.0f + Gb_w[0].s01 + | |
Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12) * 0.125f; | |
out_data.s13 = (Gr_x[0].s01 + Gr_x[0].s12 + Gb_w[0].s12 + Gb_w[1].s12) * 0.25f; | |
write_imagef (out, (int2)(out_x, out_y + out_height), out_data); | |
out_data.s02 = (Gr_x[0].s01 + Gr_x[1].s01 + Gb_w[1].s01 + Gb_w[1].s12) * 0.25f; | |
out_data.s13 = (Gb_w[1].s12 * 4.0f + Gr_x[0].s01 + | |
Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12) * 0.125f; | |
write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data); | |
} | |
} | |
void demosaic_denoise_2_cell ( | |
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, | |
int in_x, int in_y, | |
__write_only image2d_t out, uint out_height, int out_x, int out_y, __local float *table, CLEeConfig ee_config) | |
{ | |
float4 out_data[2]; | |
float2 value; | |
int index; | |
float2 egain[4]; | |
///////////////////////////////////////G/////////////////////////////////// | |
{ | |
float3 Gr_x[2], Gb_w[2]; | |
index = shared_pos (in_x - 1, in_y - 1); | |
Gb_w[0] = *(__local float3*)(w_data_in + index); | |
index = shared_pos (in_x - 1, in_y); | |
Gb_w[1] = *(__local float3*)(w_data_in + index); | |
index = shared_pos (in_x, in_y); | |
Gr_x[0] = *(__local float3*)(x_data_in + index); | |
index = shared_pos (in_x, in_y + 1); | |
Gr_x[1] = *(__local float3*)(x_data_in + index); | |
value = mad (Gr_x[0].s01, 4.0f, (Gb_w[0].s01 + | |
Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12)) * 0.125f; | |
out_data[0].s02 = dot_denoise_ee (value, Gb_w[0].s01, Gb_w[0].s12, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1_5, &egain[0], ee_config); | |
value = (Gr_x[0].s01 + Gr_x[0].s12 + | |
Gb_w[0].s12 + Gb_w[1].s12) * 0.25f; | |
out_data[0].s13 = dot_denoise_ee(value, Gr_x[0].s01, Gr_x[0].s12, Gb_w[0].s12, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[1], ee_config); | |
value = (Gr_x[0].s01 + Gr_x[1].s01 + | |
Gb_w[1].s01 + Gb_w[1].s12) * 0.25f; | |
out_data[1].s02 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[1].s01, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[2], ee_config); | |
value = mad (Gb_w[1].s12, 4.0f, (Gr_x[0].s01 + | |
Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12)) * 0.125f; | |
out_data[1].s13 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[0].s12, Gr_x[1].s01, Gr_x[1].s12, table, GUASS_DELTA_S_1_5, &egain[3], ee_config); | |
write_imagef (out, (int2)(out_x, out_y + out_height), out_data[0]); | |
write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data[1]); | |
} | |
////////////////////////////////R////////////////////////////////////////// | |
{ | |
float4 R_y[3]; | |
index = shared_pos (in_x - 1, in_y - 1); | |
R_y[0] = *(__local float4*)(y_data_in + index); | |
index = shared_pos (in_x - 1, in_y); | |
R_y[1] = *(__local float4*)(y_data_in + index); | |
index = shared_pos (in_x - 1, in_y + 1); | |
R_y[2] = *(__local float4*)(y_data_in + index); | |
value = (R_y[1].s01 + R_y[1].s12) * 0.5f; | |
out_data[0].s02 = dot_denoise (value, R_y[0].s01, R_y[0].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_2_5) * egain[0]; | |
value = R_y[1].s12; | |
out_data[0].s13 = dot_denoise (value, R_y[0].s12, R_y[1].s01, R_y[1].s23, R_y[2].s12, table, GUASS_DELTA_S_2) * egain[1]; | |
value = (R_y[1].s01 + R_y[1].s12 + | |
R_y[2].s01 + R_y[2].s12) * 0.25f; | |
out_data[1].s02 = dot_denoise (value, R_y[1].s01, R_y[1].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_1_5) * egain[2]; | |
value = (R_y[1].s12 + R_y[2].s12) * 0.5f; | |
out_data[1].s13 = dot_denoise (value, R_y[1].s01, R_y[1].s23, R_y[2].s01, R_y[2].s23, table, GUASS_DELTA_S_2_5) * egain[3]; | |
write_imagef (out, (int2)(out_x, out_y), out_data[0]); | |
write_imagef (out, (int2)(out_x, out_y + 1), out_data[1]); | |
} | |
////////////////////////////////B////////////////////////////////////////// | |
{ | |
float4 B_z[3]; | |
index = shared_pos (in_x - 1, in_y - 1); | |
B_z[0] = *(__local float4*)(z_data_in + index); | |
index = shared_pos (in_x - 1, in_y); | |
B_z[1] = *(__local float4*)(z_data_in + index); | |
index = shared_pos (in_x - 1, in_y + 1); | |
B_z[2] = *(__local float4*)(z_data_in + index); | |
value = (B_z[0].s12 + B_z[1].s12) * 0.5f; | |
out_data[0].s02 = dot_denoise (value, B_z[0].s01, B_z[0].s23, B_z[1].s01, B_z[1].s23, table, GUASS_DELTA_S_2_5) * egain[0]; | |
value = (B_z[0].s12 + B_z[0].s23 + | |
B_z[1].s12 + B_z[1].s23) * 0.25f; | |
out_data[0].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[1].s12, B_z[1].s23, table, GUASS_DELTA_S_1_5) * egain[1]; | |
value = B_z[1].s12; | |
out_data[1].s02 = dot_denoise (value, B_z[0].s12, B_z[1].s01, B_z[1].s23, B_z[2].s12, table, GUASS_DELTA_S_2) * egain[2]; | |
value = (B_z[1].s12 + B_z[1].s23) * 0.5f; | |
out_data[1].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[2].s12, B_z[2].s23, table, GUASS_DELTA_S_2_5) * egain[3]; | |
write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data[0]); | |
write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data[1]); | |
} | |
} | |
void shared_demosaic ( | |
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, | |
int in_x, int in_y, | |
__write_only image2d_t out, uint output_height, int out_x, int out_y, | |
uint has_denoise, __local float *table, CLEeConfig ee_config) | |
{ | |
if (has_denoise) { | |
demosaic_denoise_2_cell ( | |
x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, | |
out, output_height, out_x, out_y, table, ee_config); | |
} else { | |
demosaic_2_cell ( | |
x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, | |
out, output_height, out_x, out_y); | |
} | |
} | |
__kernel void kernel_bayer_pipe (__read_only image2d_t input, | |
uint input_height, | |
__write_only image2d_t output, | |
uint output_height, | |
__global float * bnr_table, | |
uint has_denoise, | |
CLEeConfig ee_config | |
) | |
{ | |
int g_id_x = get_global_id (0); | |
int g_id_y = get_global_id (1); | |
int g_size_x = get_global_size (0); | |
int g_size_y = get_global_size (1); | |
int l_id_x = get_local_id(0); | |
int l_id_y = get_local_id(1); | |
int l_size_x = get_local_size (0); | |
int l_size_y = get_local_size (1); | |
__local float p1_x[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_y[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_z[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_w[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE]; | |
__local float4 p2[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE]; | |
__local float4 *stats_cache = p2; | |
__local float SLM_delta_coef_table[64]; | |
int out_x_start, out_y_start; | |
int x_start = get_group_id (0) * WORKGROUP_CELL_WIDTH; | |
int y_start = get_group_id (1) * WORKGROUP_CELL_HEIGHT; | |
int i = mad24 (l_id_y, l_size_x, l_id_x); | |
int j = i; | |
i *= 4; | |
for (; i < SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE; i += (l_size_x * l_size_y) * 4) { | |
grbg_slm_load (p1_x, p1_y, p1_z, p1_w, i, | |
input, | |
x_start - SLM_CELL_X_OFFSET, y_start - SLM_CELL_Y_OFFSET); | |
} | |
for(; j < 64; j += l_size_x * l_size_y) | |
SLM_delta_coef_table[j] = bnr_table[j]; | |
barrier(CLK_LOCAL_MEM_FENCE); | |
i = mad24 (l_id_y, l_size_x, l_id_x); | |
int workitem_x_size = (SLM_CELL_X_VALID_SIZE / DEMOSAIC_X_CELL_PER_WORKITEM); | |
int input_x = (i % workitem_x_size) * DEMOSAIC_X_CELL_PER_WORKITEM; | |
int input_y = i / workitem_x_size; | |
shared_demosaic ( | |
p1_x, p1_y, p1_z, p1_w, | |
input_x + SLM_CELL_X_OFFSET, input_y + SLM_CELL_Y_OFFSET, | |
output, output_height, | |
(input_x + x_start) * PIXEL_PER_CELL / 4, (input_y + y_start) * PIXEL_PER_CELL, has_denoise, SLM_delta_coef_table, ee_config); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_blc | |
* black level correction for sensor data input | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* blc_config: black level correction configuration | |
* color_bits: identify 10bit or 12bit data | |
* param: | |
*/ | |
typedef struct | |
{ | |
float level_gr; /* Black level for GR pixels */ | |
float level_r; /* Black level for R pixels */ | |
float level_b; /* Black level for B pixels */ | |
float level_gb; /* Black level for GB pixels */ | |
uint color_bits; | |
} BLCConfig; | |
uint decompression(uint data_in, uint color_bits) | |
{ | |
uint data_out = 0; | |
data_out = data_in << (16 - color_bits); | |
return data_out; | |
} | |
__kernel void kernel_blc (__read_only image2d_t input, | |
__write_only image2d_t output, | |
BLCConfig blc_config) | |
{ | |
int x0 = 2 * get_global_id (0); | |
int y0 = 2 * get_global_id (1); | |
int x1 = x0 + 1; | |
int y1 = y0 + 1; | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
int2 pos_r = (int2)(x0, y0); | |
int2 pos_gr = (int2)(x1, y0); | |
int2 pos_gb = (int2)(x0, y1); | |
int2 pos_b = (int2)(x1, y1); | |
uint4 pixel; | |
pixel = read_imageui(input, sampler, pos_r); | |
pixel.x = floor(pixel.x - blc_config.level_r * (pown(2.0, blc_config.color_bits)) + 0.5); | |
pixel.x = decompression(pixel.x, blc_config.color_bits); | |
write_imageui(output, pos_r, pixel); | |
pixel = read_imageui(input, sampler, pos_gr); | |
pixel.x = floor(pixel.x - blc_config.level_gr * (pown(2.0, blc_config.color_bits)) + 0.5); | |
pixel.x = decompression(pixel.x, blc_config.color_bits); | |
write_imageui(output, pos_gr, pixel); | |
pixel = read_imageui(input, sampler, pos_gb); | |
pixel.x = floor(pixel.x - blc_config.level_gb * (pown(2.0, blc_config.color_bits)) + 0.5); | |
pixel.x = decompression(pixel.x, blc_config.color_bits); | |
write_imageui(output, pos_gb, pixel); | |
pixel = read_imageui(input, sampler, pos_b); | |
pixel.x = floor(pixel.x - blc_config.level_b * (pown(2.0, blc_config.color_bits)) + 0.5); | |
pixel.x = decompression(pixel.x, blc_config.color_bits); | |
write_imageui(output, pos_b, pixel); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_bnr | |
* implementation of bayer noise reduction | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* bnr_gain: strength of noise reduction | |
* direction: sensitivity of edge | |
* todo: add the upstream algorithm for BNR | |
*/ | |
__kernel void kernel_bnr (__read_only image2d_t input, __write_only image2d_t output, | |
float bnr_gain, float direction) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
float4 p; | |
p = read_imagef(input, sampler, (int2)(x, y)); | |
write_imagef(output, (int2)(x, y), p); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_demosaic | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
__kernel void kernel_demosaic (__read_only image2d_t input, __write_only image2d_t output) | |
{ | |
int x = 2 * get_global_id (0); | |
int y = 2 * get_global_id (1); | |
// sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
int x0 = x - 1; | |
int y0 = y - 1; | |
float4 p[16]; | |
#if 0 | |
p[0] = read_imagef (input, sampler, (int2)(x0, y0)); | |
p[1] = read_imagef (input, sampler, (int2)(x0 + 1, y0)); | |
p[2] = read_imagef (input, sampler, (int2)(x0 + 2, y0)); | |
p[3] = read_imagef (input, sampler, (int2)(x0 + 3, y0)); | |
p[4] = read_imagef (input, sampler, (int2)(x0, y0 + 1)); | |
p[5] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 1)); | |
p[6] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 1)); | |
p[7] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 1)); | |
p[8] = read_imagef (input, sampler, (int2)(x0, y0 + 2)); | |
p[9] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 2)); | |
p[10] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 2)); | |
p[11] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 2)); | |
p[12] = read_imagef (input, sampler, (int2)(x0, y0 + 3)); | |
p[13] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 3)); | |
p[14] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 3)); | |
p[15] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 3)); | |
#endif | |
#pragma unroll | |
for (int i = 0; i < 16; ++i) { | |
p[i] = read_imagef (input, sampler, (int2)(x0 + i % 4, y0 + i / 4)); | |
} | |
float4 p00, p01, p10, p11; | |
p00.x = (p[4].x + p[6].x) / 2.0; | |
p00.y = (p[5].x * 4 + p[0].x + p[2].x + p[8].x + p[10].x) / 8.0; | |
p00.z = (p[1].x + p[9].x) / 2.0; | |
p01.x = p[6].x; | |
p01.y = (p[2].x + p[5].x + p[7].x + p[10].x) / 4.0; | |
p01.z = (p[1].x + p[3].x + p[9].x + p[11].x) / 4.0; | |
p10.x = (p[4].x + p[6].x + p[12].x + p[14].x) / 4.0; | |
p10.y = (p[5].x + p[8].x + p[10].x + p[13].x) / 4.0; | |
p10.z = p[9].x; | |
p11.x = (p[6].x + p[14].x) / 2.0; | |
p11.y = (p[10].x * 4 + p[5].x + p[7].x + p[13].x + p[15].x) / 8.0; | |
p11.z = (p[9].x + p[11].x) / 2.0; | |
write_imagef (output, (int2)(x, y), p00); | |
write_imagef (output, (int2)(x + 1, y), p01); | |
write_imagef (output, (int2)(x, y + 1), p10); | |
write_imagef (output, (int2)(x + 1, y + 1), p11); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_denoise | |
* bi-laterial filter for denoise usage | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* sigma_r: the parameter to set sigma_r in the Gaussian filtering | |
* imw: image width, used for edge detect | |
* imh: image height, used for edge detect | |
*/ | |
__constant float gausssingle[25]={0.6411,0.7574,0.8007,0.7574,0.6411,0.7574,0.8948,0.9459,0.8948,0.7574,0.8007,0.94595945,1,0.9459,0.8007,0.7574,0.8948,0.9459,0.8948,0.7574,0.6411,0.7574,0.8007,0.7574,0.6411}; | |
#define LOCAL_SIZE_X 16 | |
#define LOCAL_SIZE_Y 15 | |
__kernel void kernel_denoise(__read_only image2d_t srcRGB, __write_only image2d_t dstRGB, float sigma_r, unsigned int imw, unsigned int imh) | |
{ | |
int x = get_global_id(1); //[0,imw-1] | |
int y = get_global_id(0); //[0,imh-1] | |
int localX = get_local_id(1); //[0,imw/120-1] | |
int localY = get_local_id(0); //[0,imh/72-1] | |
//printf("localX=%d,localY=%d\n",localX,localY); | |
float normF=0; | |
float H=0; | |
float delta=0; | |
int i=0,j=0; | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE |CLK_FILTER_NEAREST; | |
sigma_r = 2*pown(sigma_r,2); | |
//coord in srcY | |
float4 line; | |
line.x=0; | |
line.y=0; | |
line.z=0; | |
line.w=1.0; | |
__local float4 pixel[LOCAL_SIZE_X+4][LOCAL_SIZE_Y+4]; | |
bool interior = x > 1 && x <(imw-3) | |
&& y>1 && y< (imh-3); | |
if(interior) | |
{ | |
pixel[localX+2][localY+2]=read_imagef(srcRGB, sampler,(int2)(x,y)); | |
if(localX==0) | |
{ | |
if(localY==0) | |
{ | |
pixel[0][0]=read_imagef(srcRGB, sampler,(int2)(x-2,y-2)); | |
pixel[0][1]=read_imagef(srcRGB, sampler,(int2)(x-2,y-1)); | |
pixel[0][2]=read_imagef(srcRGB, sampler,(int2)(x-2,y )); | |
pixel[1][0]=read_imagef(srcRGB, sampler,(int2)(x-1,y-2)); | |
pixel[1][1]=read_imagef(srcRGB, sampler,(int2)(x-1,y-1)); | |
pixel[1][2]=read_imagef(srcRGB, sampler,(int2)(x-1,y )); | |
pixel[2][0]=read_imagef(srcRGB, sampler,(int2)(x ,y-2)); | |
pixel[2][1]=read_imagef(srcRGB, sampler,(int2)(x ,y-1)); | |
} | |
else if(localY==LOCAL_SIZE_Y-1) | |
{ | |
pixel[0][LOCAL_SIZE_Y-1+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y )); | |
pixel[0][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x-2,y+1)); | |
pixel[0][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y+2)); | |
pixel[1][LOCAL_SIZE_Y-1+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y )); | |
pixel[1][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x-1,y+1)); | |
pixel[1][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y+2)); | |
pixel[2][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x ,y+1)); | |
pixel[2][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x ,y+2)); | |
} | |
else | |
{ | |
pixel[0][localY+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y)); | |
pixel[1][localY+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y)); | |
} | |
} | |
else if(localX==LOCAL_SIZE_X-1) | |
{ | |
if(localY==0) | |
{ | |
pixel[LOCAL_SIZE_X-1+2+0][0]=read_imagef(srcRGB, sampler,(int2)(x ,y-2)); | |
pixel[LOCAL_SIZE_X-1+2+0][1]=read_imagef(srcRGB, sampler,(int2)(x ,y-1)); | |
//pixel[LOCAL_SIZE_X-1+2+0][2]=read_imagef(srcRGB, sampler,(int2)(x ,y)); | |
pixel[LOCAL_SIZE_X-1+2+1][0]=read_imagef(srcRGB, sampler,(int2)(x+1,y-2)); | |
pixel[LOCAL_SIZE_X-1+2+1][1]=read_imagef(srcRGB, sampler,(int2)(x+1,y-1)); | |
pixel[LOCAL_SIZE_X-1+2+1][2]=read_imagef(srcRGB, sampler,(int2)(x+1,y )); | |
pixel[LOCAL_SIZE_X-1+2+2][0]=read_imagef(srcRGB, sampler,(int2)(x+2,y-2)); | |
pixel[LOCAL_SIZE_X-1+2+2][1]=read_imagef(srcRGB, sampler,(int2)(x+2,y-1)); | |
pixel[LOCAL_SIZE_X-1+2+2][2]=read_imagef(srcRGB, sampler,(int2)(x+2,y )); | |
} | |
else if(localY==LOCAL_SIZE_Y-1) | |
{ | |
// pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x ,y )); | |
pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x ,y+1)); | |
pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x ,y+2)); | |
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x+1,y )); | |
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x+1,y+1)); | |
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x+1,y+2)); | |
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x+2,y )); | |
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x+2,y+1)); | |
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x+2,y+2)); | |
} | |
else | |
{ | |
pixel[LOCAL_SIZE_X-1+2+1][localY+2]=read_imagef(srcRGB, sampler,(int2)(x+1,y )); | |
pixel[LOCAL_SIZE_X-1+2+2][localY+2]=read_imagef(srcRGB, sampler,(int2)(x+2,y )); | |
} | |
} | |
else if(localY==0) | |
{ | |
pixel[localX+2][0]=read_imagef(srcRGB,sampler,(int2)(x,y-2)); | |
pixel[localX+2][1]=read_imagef(srcRGB,sampler,(int2)(x,y-1)); | |
} | |
else if(localY==LOCAL_SIZE_Y-1) | |
{ | |
pixel[localX+2][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB,sampler,(int2)(x,y+1)); | |
pixel[localX+2][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB,sampler,(int2)(x,y+2)); | |
} | |
}else{ | |
line=read_imagef(srcRGB, sampler,(int2)(x,y)); | |
} | |
barrier(CLK_LOCAL_MEM_FENCE); | |
if (interior) { | |
#pragma unroll | |
for(i=0;i<5;i++) | |
{ | |
#pragma unroll | |
for(j=0;j<5;j++) | |
{ | |
delta=pown(pixel[localX+i][localY+j].x-pixel[localX+2][localY+2].x,2) + | |
pown(pixel[localX+i][localY+j].y-pixel[localX+2][localY+2].y,2) + | |
pown(pixel[localX+i][localY+j].z-pixel[localX+2][localY+2].z,2); | |
H = (exp(-(delta/sigma_r)))*gausssingle[i*5+j]; | |
normF+=H; | |
line.x+=pixel[localX+i][localY+j].x*H; | |
line.y+=pixel[localX+i][localY+j].y*H; | |
line.z+=pixel[localX+i][localY+j].z*H; | |
} | |
} | |
line.x=line.x/normF; | |
line.y=line.y/normF; | |
line.z=line.z/normF; | |
} | |
write_imagef(dstRGB,(int2)(x,y),line); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_dpc | |
* defect pixel correction on bayer data input | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* gr_threshold: GR threshold of defect pixel correction | |
* r_threshold: R threshold of defect pixel correction | |
* b_threshold: B threshold of defect pixel correction | |
* gb_threshold: GB threshold of defect pixel correction | |
* param: | |
*/ | |
__kernel void kernel_dpc (__read_only image2d_t input, | |
__write_only image2d_t output, | |
float gr_threshold, float r_threshold, | |
float b_threshold, float gb_threshold) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
float4 p[9]; | |
p[0] = read_imagef(input, sampler, (int2)(x - 2, y - 2)); | |
p[1] = read_imagef(input, sampler, (int2)(x, y - 2)); | |
p[2] = read_imagef(input, sampler, (int2)(x + 2, y - 2)); | |
p[3] = read_imagef(input, sampler, (int2)(x - 2, y)); | |
p[4] = read_imagef(input, sampler, (int2)(x, y)); | |
p[5] = read_imagef(input, sampler, (int2)(x + 2, y)); | |
p[6] = read_imagef(input, sampler, (int2)(x - 2, y + 2)); | |
p[7] = read_imagef(input, sampler, (int2)(x, y + 2)); | |
p[8] = read_imagef(input, sampler, (int2)(x + 2, y + 2)); | |
float aveVer = (p[1].x + p[7].x) / 2; | |
float aveHor = (p[3].x + p[5].x) / 2; | |
float avePosDia = (p[0].x + p[8].x) / 2; | |
float aveNegDia = (p[2].x + p[6].x) / 2; | |
float aveMin, aveMax; | |
if (aveVer > aveHor) { | |
aveMin = aveHor; | |
aveMax = aveVer; | |
} | |
else { | |
aveMin = aveVer; | |
aveMax = aveHor; | |
} | |
if (avePosDia < aveMin) | |
aveMin = avePosDia; | |
else if (avePosDia > aveMax) | |
aveMax = avePosDia; | |
if (aveNegDia < aveMin) | |
aveMin = aveNegDia; | |
else if (aveNegDia > aveMax) | |
aveMax = aveNegDia; | |
float edgeVer = p[4].x - aveVer; | |
float edgeHor = p[4].x - aveHor; | |
float edgeNeighbourVer = (p[3].x + p[5].x - (p[0].x + p[2].x + p[6].x + p[8].x) / 2) / 2; | |
float edgeNeighbourHor = (p[1].x + p[7].x - (p[0].x + p[2].x + p[6].x + p[8].x) / 2) / 2; | |
float threshold; | |
if (x % 2 == 0) | |
threshold = (y % 2 == 0) ? gr_threshold : b_threshold; | |
else | |
threshold = (y % 2 == 0) ? r_threshold : gb_threshold; | |
float4 pixelOut; | |
pixelOut.x = p[4].x; | |
pixelOut.y = p[4].y; | |
pixelOut.z = p[4].z; | |
pixelOut.w = p[4].w; | |
if ((edgeVer > edgeNeighbourVer) && (edgeHor > edgeNeighbourHor)) { | |
if ((p[4].x - aveMax) > threshold) { | |
pixelOut.x = aveMax; | |
} | |
} | |
if ((edgeVer < edgeNeighbourVer) && (edgeHor < edgeNeighbourHor)) { | |
if ((aveMin - p[4].x) > threshold) { | |
pixelOut.x = aveMin; | |
} | |
} | |
write_imagef (output, (int2)(x, y), pixelOut); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_ee | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* ee_config: Edge enhancement configuration | |
*/ | |
typedef struct | |
{ | |
float ee_gain; | |
float ee_threshold; | |
float nr_gain; | |
} CLEeConfig; | |
__constant float lv[25] = {0.0, 0.0, 0.0, 0.0, 0.0, | |
0.0, 0.0, -1.0, 0.0, 0.0, | |
-1.0, -14.0, 32.0, -14.0, -1.0, | |
0.0, 0.0, -1.0, 0.0, 0.0, | |
0.0, 0.0, 0.0, 0.0, 0.0 | |
}; | |
__constant float lh[25] = {0.0, 0.0, -1.0, 0.0, 0.0, | |
0.0, 0.0, -14.0, 0.0, 0.0, | |
0.0, -1.0, 32.0, -1.0, 0.0, | |
0.0, 0.0, -14.0, 0.0, 0.0, | |
0.0, 0.0, -1.0, 0.0, 0.0 | |
}; | |
__constant float la[25] = {0.0, 0.0, -2.0, 0.0, 0.0, | |
0.0, -2.0, -2.0, -2.0, 0.0, | |
-2.0, -2.0, 24.0, -2.0, -2.0, | |
0.0, -2.0, -2.0, -2.0, 0.0, | |
0.0, 0.0, -2.0, 0.0, 0.0 | |
}; | |
__constant float na[25] = { -1.0, -1.0, -1.0, -1.0, -1.0, | |
-1.0, -1.0, -1.0, -1.0, -1.0, | |
-1.0, -1.0, 16.0, -1.0, -1.0, | |
-1.0, -1.0, -1.0, -1.0, -1.0, | |
-1.0, -1.0, -1.0, -1.0, -1.0 | |
}; | |
__kernel void kernel_ee (__read_only image2d_t input, __write_only image2d_t output, uint vertical_offset_in, uint vertical_offset_out, CLEeConfig ee_config) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
int X = get_global_size(0); | |
int Y = get_global_size(1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
float4 y_in, y_out, uv_in; | |
float4 a[5], b[5], c[5], d[5], e[5]; | |
// cpy UV | |
if(y % 2 == 0) { | |
uv_in = read_imagef(input, sampler, (int2)(x, y / 2 + vertical_offset_in)); | |
write_imagef(output, (int2)(x, y / 2 + vertical_offset_out), uv_in); | |
} | |
a[0] = read_imagef(input, sampler, (int2)(x - 2, y - 2)); | |
a[1] = read_imagef(input, sampler, (int2)(x - 1, y - 2)); | |
a[2] = read_imagef(input, sampler, (int2)(x, y - 2)); | |
a[3] = read_imagef(input, sampler, (int2)(x + 1, y - 2)); | |
a[4] = read_imagef(input, sampler, (int2)(x + 2, y - 2)); | |
b[0] = read_imagef(input, sampler, (int2)(x - 2, y - 1)); | |
b[1] = read_imagef(input, sampler, (int2)(x - 1, y - 1)); | |
b[2] = read_imagef(input, sampler, (int2)(x, y - 1)); | |
b[3] = read_imagef(input, sampler, (int2)(x + 1, y - 1)); | |
b[4] = read_imagef(input, sampler, (int2)(x + 2, y - 1)); | |
c[0] = read_imagef(input, sampler, (int2)(x - 2, y)); | |
c[1] = read_imagef(input, sampler, (int2)(x - 1, y)); | |
c[2] = read_imagef(input, sampler, (int2)(x, y)); | |
c[3] = read_imagef(input, sampler, (int2)(x + 1, y)); | |
c[4] = read_imagef(input, sampler, (int2)(x + 2, y)); | |
d[0] = read_imagef(input, sampler, (int2)(x - 2, y + 1)); | |
d[1] = read_imagef(input, sampler, (int2)(x - 1, y + 1)); | |
d[2] = read_imagef(input, sampler, (int2)(x, y + 1)); | |
d[3] = read_imagef(input, sampler, (int2)(x + 1, y + 1)); | |
d[4] = read_imagef(input, sampler, (int2)(x + 2, y + 1)); | |
e[0] = read_imagef(input, sampler, (int2)(x - 2, y + 2)); | |
e[1] = read_imagef(input, sampler, (int2)(x - 1, y + 2)); | |
e[2] = read_imagef(input, sampler, (int2)(x, y + 2)); | |
e[3] = read_imagef(input, sampler, (int2)(x + 1, y + 2)); | |
e[4] = read_imagef(input, sampler, (int2)(x + 2, y + 2)); | |
float eV = (a[0].x * lv[0] + a[1].x * lv[1] + a[2].x * lv[2] + a[3].x * lv[3] + a[4].x * lv[4] | |
+ b[0].x * lv[5] + b[1].x * lv[6] + b[2].x * lv[7] + b[3].x * lv[8] + b[4].x * lv[9] | |
+ c[0].x * lv[10] + c[1].x * lv[11] + c[2].x * lv[12] + c[3].x * lv[13] + c[4].x * lv[14] | |
+ d[0].x * lv[15] + d[1].x * lv[16] + d[2].x * lv[17] + d[3].x * lv[18] + d[4].x * lv[19] | |
+ e[0].x * lv[20] + e[1].x * lv[21] + e[2].x * lv[22] + e[3].x * lv[23] + e[4].x * lv[24]) * 255.0; | |
float eH = (a[0].x * lh[0] + a[1].x * lh[1] + a[2].x * lh[2] + a[3].x * lh[3] + a[4].x * lh[4] | |
+ b[0].x * lh[5] + b[1].x * lh[6] + b[2].x * lh[7] + b[3].x * lh[8] + b[4].x * lh[9] | |
+ c[0].x * lh[10] + c[1].x * lh[11] + c[2].x * lh[12] + c[3].x * lh[13] + c[4].x * lh[14] | |
+ d[0].x * lh[15] + d[1].x * lh[16] + d[2].x * lh[17] + d[3].x * lh[18] + d[4].x * lh[19] | |
+ e[0].x * lh[20] + e[1].x * lh[21] + e[2].x * lh[22] + e[3].x * lh[23] + e[4].x * lh[24]) * 255.0; | |
float eA = (a[0].x * la[0] + a[1].x * la[1] + a[2].x * la[2] + a[3].x * la[3] + a[4].x * la[4] | |
+ b[0].x * la[5] + b[1].x * la[6] + b[2].x * la[7] + b[3].x * la[8] + b[4].x * la[9] | |
+ c[0].x * la[10] + c[1].x * la[11] + c[2].x * la[12] + c[3].x * la[13] + c[4].x * la[14] | |
+ d[0].x * la[15] + d[1].x * la[16] + d[2].x * la[17] + d[3].x * la[18] + d[4].x * la[19] | |
+ e[0].x * la[20] + e[1].x * la[21] + e[2].x * la[22] + e[3].x * la[23] + e[4].x * la[24]) * 255.0; | |
float nA = (a[0].x * na[0] + a[1].x * na[1] + a[2].x * na[2] + a[3].x * na[3] + a[4].x * na[4] | |
+ b[0].x * na[5] + b[1].x * na[6] + b[2].x * na[7] + b[3].x * na[8] + b[4].x * na[9] | |
+ c[0].x * na[10] + c[1].x * na[11] + c[2].x * na[12] + c[3].x * na[13] + c[4].x * na[14] | |
+ d[0].x * na[15] + d[1].x * na[16] + d[2].x * na[17] + d[3].x * na[18] + d[4].x * na[19] | |
+ e[0].x * na[20] + e[1].x * na[21] + e[2].x * na[22] + e[3].x * na[23] + e[4].x * na[24]) * 255.0; | |
float nV = eH; | |
float nH = eV; | |
float dV = (fabs(2.0 * b[1].x - a[1].x - c[1].x) + fabs(2.0 * b[2].x - a[2].x - c[2].x) + fabs(2.0 * b[3].x - a[3].x - c[3].x) + fabs(2.0 * c[1].x - b[1].x - d[1].x) + fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[3].x - b[3].x - d[3].x) + fabs(2.0 * d[1].x - c[1].x - e[1].x) + fabs(2.0 * d[2].x - c[2].x - e[2].x) + fabs(2.0 * d[3].x - c[3].x - e[3].x)) * 255.0; | |
float dH = (fabs(2.0 * b[1].x - b[0].x - b[2].x) + fabs(2.0 * b[2].x - b[1].x - b[3].x) + fabs(2.0 * b[3].x - b[2].x - b[4].x) + fabs(2.0 * c[1].x - c[0].x - c[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[3].x - c[2].x - c[4].x) + fabs(2.0 * d[1].x - d[0].x - d[2].x) + fabs(2.0 * d[2].x - d[1].x - d[3].x) + fabs(2.0 * d[3].x - d[2].x - d[4].x)) * 255.0; | |
float dA = (fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[2].x - b[1].x - d[3].x) + fabs(2.0 * c[2].x - b[3].x - d[1].x) + fabs(2.0 * c[2].x - a[0].x - e[4].x) + fabs(2.0 * c[2].x - a[4].x - e[0].x) + fabs(2.0 * c[2].x - c[0].x - c[4].x) + fabs(2.0 * c[2].x - a[2].x - e[2].x) + fabs(2.0 * c[2].x - (b[0].x + d[0].x + b[4].x + d[4].x) / 2.0)) * 255.0; | |
float edge = dH < (dV < dA ? dV : dA) ? eH : (dV < dA ? eV : eA); | |
float noise = dH < (dV < dA ? dV : dA) ? nH : (dV < dA ? nV : nA); | |
float dir = dH < (dV < dA ? dA : dV) ? (dV < dA ? dA : dV) : dH; | |
noise = noise * ee_config.nr_gain / 16.0; | |
edge = edge * ee_config.ee_gain / 16.0; | |
y_out.x = dir > ee_config.ee_threshold ? (c[2].x * 255.0 + edge - noise) / 255.0 : c[2].x; | |
y_out.y = 0.0; | |
y_out.z = 0.0; | |
y_out.w = 1.0; | |
write_imagef(output, (int2)(x, y), y_out); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_gamma | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* table: gamma table. | |
*/ | |
//TODO: linear scale should be a problem | |
__kernel void kernel_gamma (__read_only image2d_t input, __write_only image2d_t output, __global float *table) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
float4 pixel_in[8], pixel_out[8]; | |
int i=0,j=0; | |
#pragma unroll | |
for(j=0;j<2;j++) { | |
#pragma unroll | |
for(i=0;i<4;i++) { | |
pixel_in[j*4 + i] = read_imagef(input, sampler,(int2)(4*x + i, 2*y + j)); | |
pixel_out[j*4 + i].x = table[convert_int(pixel_in[j*4 + i].x * 255.0)] / 255.0; | |
pixel_out[j*4 + i].y = table[convert_int(pixel_in[j*4 + i].y * 255.0)] / 255.0; | |
pixel_out[j*4 + i].z = table[convert_int(pixel_in[j*4 + i].z * 255.0)] / 255.0; | |
pixel_out[j*4 + i].w = 0.0; | |
write_imagef(output, (int2)(4*x + i, 2*y + j), pixel_out[j*4 + i]); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_hdr_lab | |
* sample code of default kernel arguments | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
__constant float table[] = {4.3287616, 5.9319224, 7.1324205, 8.1288157, 8.9965763, 9.7739191, 10.483327, 11.139331, 11.751958, 12.328467, 12.874310, 13.393700, 13.889977, 14.365837, 14.823494, 15.264792, 15.691289, 16.104307, 16.504990, 16.894327, 17.273184, 17.642323, 18.002419, 18.354071, 18.697817, 19.034143, 19.363485, 19.686239, 20.002764, 20.313389, 20.618416, 20.918123, 21.212763, 21.502573, 21.787767, 22.068552, 22.345116, 22.617630, 22.886259, 23.151157, 23.412468, 23.670324, 23.924854, 24.176174, 24.424398, 24.669630, 24.911972, 25.151518, 25.388355, 25.622572, 25.854246, 26.083458, 26.310276, 26.534771, 26.757010, 26.977057, 27.194969, 27.410807, 27.624624, 27.836473, 28.046406, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.377895, 28.567524, 28.755781, 28.942692, 29.128284, 29.312582, 29.495617, 29.677410, 29.857986, 30.037369, 30.215582, 30.392645, 30.568581, 30.743410, 30.917152, 31.089828, 31.261454, 31.432049, 31.601633, 31.770222, 31.937832, 32.104481, 32.270180, 32.434952, 32.598808, 32.761761, 32.923832, 33.085026, 33.245361, 33.404850, 33.563507, 33.721340, 33.878368, 34.034599, 34.190044, 34.344715, 34.498627, 34.651783, 34.804199, 34.955887, 35.106853, 35.257107, 35.406662, 35.555523, 35.703705, 35.851208, 35.998051, 36.144238, 36.289776, 36.434673, 36.578941, 36.722588, 36.865616, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.013512, 37.148350, 37.282703, 37.416573, 37.549969, 37.682888, 37.815342, 37.947330, 38.078865, 38.209946, 38.340580, 38.470768, 38.600517, 38.729836, 38.858719, 38.987175, 39.115215, 39.242832, 39.370041, 39.496838, 39.623226, 39.749214, 39.874802, 40, 40.124805, 40.249222, 40.373260, 40.496914, 40.620193, 40.743095, 40.865631, 40.987804, 41.109608, 41.231056, 41.352146, 41.472885, 41.593269, 41.713306, 41.833000, 41.952354, 42.071369, 42.190048, 42.308392, 42.426407, 42.544094, 42.661457, 42.778500, 42.895222, 43.011627, 43.127716, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.315910, 43.427536, 43.538902, 43.650013, 43.760872, 43.871479, 43.981831, 44.091938, 44.201797, 44.311413, 44.420780, 44.529911, 44.638798, 44.747448, 44.855862, 44.964039, 45.071983, 45.179695, 45.287178, 45.394428, 45.501453, 45.608253, 45.714825, 45.821178, 45.927307, 46.033215, 46.138905, 46.244377, 46.349632, 46.454674, 46.559505, 46.664120, 46.768528, 46.872723, 46.976711, 47.080494, 47.184067, 47.287441, 47.390610, 47.493576, 47.596344, 47.698910, 47.801281, 47.903454, 48.005428, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.116840, 48.216503, 48.316002, 48.415333, 48.514507, 48.613514, 48.712364, 48.811050, 48.909580, 49.007950, 49.106163, 49.204220, 49.302116, 49.399860, 49.497452, 49.594887, 49.692173, 49.789303, 49.886284, 49.983109, 50.079788, 50.176319, 50.272701, 50.368935, 50.465023, 50.560966, 50.656761, 50.752411, 50.847919, 50.943279, 51.038502, 51.133583, 51.228519, 51.323318, 51.417973, 51.512493, 51.606873, 51.701115, 51.795219, 51.889191, 51.983021, 52.076717, 52.170280, 52.263706, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.370274, 52.462727, 52.555069, 52.647293, 52.739407, 52.831406, 52.923294, 53.015072, 53.106739, 53.198296, 53.289738, 53.381077, 53.472301, 53.563419, 53.654430, 53.745327, 53.836124, 53.926811, 54.017391, 54.107864, 54.198231, 54.288494, 54.378651, 54.468708, 54.558655, 54.648502, 54.738243, 54.827885, 54.917419, 55.006851, 55.096188, 55.185417, 55.274551, 55.363579, 55.452511, 55.541340, 55.630070, 55.718704, 55.807240, 55.895676, 55.984013, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.136337, 56.224670, 56.312920, 56.401089, 56.489174, 56.577179, 56.665100, 56.752941, 56.840698, 56.928371, 57.015968, 57.103481, 57.190918, 57.278271, 57.365547, 57.452744, 57.539856, 57.626896, 57.713852, 57.800732, 57.887531, 57.974255, 58.060902, 58.147465, 58.233959, 58.320370, 58.406708, 58.492970, 58.579155, 58.665260, 58.751289, 58.837250, 58.923130, 59.008938, 59.094666, 59.180325, 59.265907, 59.351414, 59.436852, 59.522209, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.676125, 59.762394, 59.848598, 59.934742, 60.020828, 60.106846, 60.192806, 60.278702, 60.364540, 60.450314, 60.536030, 60.621685, 60.707275, 60.792812, 60.878284, 60.963699, 61.049049, 61.134342, 61.219578, 61.304752, 61.389870, 61.474926, 61.559925, 61.644867, 61.729748, 61.814568, 61.899334, 61.984039, 62.068687, 62.153282, 62.237812, 62.322292, 62.406708, 62.491070, 62.575375, 62.659622, 62.743816, 62.827950, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.940441, 63.026379, 63.112267, 63.198112, 63.283909, 63.369659, 63.455360, 63.541019, 63.626637, 63.712200, 63.797718, 63.883190, 63.968624, 64.054001, 64.139343, 64.224632, 64.309875, 64.395081, 64.480240, 64.565346, 64.650414, 64.735435, 64.820412, 64.905342, 64.990234, 65.075073, 65.159874, 65.244629, 65.329346, 65.414009, 65.498634, 65.583214, 65.667747, 65.752243, 65.836693, 65.921097, 66.005463, 66.089783, 66.174057, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.291786, 66.378784, 66.465752, 66.552689, 66.639587, 66.726456, 66.813293, 66.900085, 66.986855, 67.073586, 67.160286, 67.246948, 67.333572, 67.420174, 67.506737, 67.593269, 67.679764, 67.766228, 67.852654, 67.939056, 68.025421, 68.111755, 68.198051, 68.284325, 68.370560, 68.456757, 68.542923, 68.629059, 68.715164, 68.801239, 68.887276, 68.973282, 69.059258, 69.145203, 69.231110, 69.316986, 69.402832, 69.488655, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.659309, 69.748878, 69.838425, 69.927956, 70.017456, 70.106934, 70.196388, 70.285820, 70.375237, 70.464622, 70.553986, 70.643333, 70.732658, 70.821953, 70.911224, 71.000481, 71.089706, 71.178917, 71.268105, 71.357262, 71.446404, 71.535530, 71.624619, 71.713692, 71.802750, 71.891777, 71.980789, 72.069771, 72.158737, 72.247673, 72.336594, 72.425499, 72.514374, 72.603226, 72.692062, 72.780876, 72.869659, 72.958427, 73.047173, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.151604, 73.245392, 73.339172, 73.432938, 73.526695, 73.620430, 73.714165, 73.807884, 73.901588, 73.995285, 74.088966, 74.182640, 74.276299, 74.369942, 74.463577, 74.557198, 74.650818, 74.744415, 74.838005, 74.931580, 75.025139, 75.118698, 75.212242, 75.305771, 75.399284, 75.492798, 75.586296, 75.679771, 75.773247, 75.866707, 75.960159, 76.053596, 76.147018, 76.240433, 76.333839, 76.427231, 76.520615, 76.613983, 76.707336, 76.800690, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.900002, 77, 77.100006, 77.200005, 77.300003, 77.400002, 77.500000, 77.600006, 77.700005, 77.800003, 77.900002, 78, 78.099998, 78.199997, 78.300003, 78.400002, 78.500000, 78.599998, 78.699997, 78.799995, 78.899994, 79, 79.099998, 79.199997, 79.299995, 79.400002, 79.500000, 79.599998, 79.699997, 79.799995, 79.900002, 80, 80.099998, 80.199997, 80.299995, 80.400002, 80.500000, 80.599998, 80.699997, 80.800003, 80.900002, 81, 81.099998, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.299294, 81.408134, 81.516991, 81.625862, 81.734749, 81.843643, 81.952560, 82.061493, 82.170433, 82.279388, 82.388359, 82.497353, 82.606354, 82.715363, 82.824394, 82.933441, 83.042496, 83.151573, 83.260658, 83.369759, 83.478874, 83.588005, 83.697151, 83.806305, 83.915474, 84.024666, 84.133865, 84.243080, 84.352310, 84.461555, 84.570816, 84.680084, 84.789368, 84.898666, 85.007988, 85.117310, 85.226654, 85.336006, 85.445374, 85.554764, 85.664162, 85.773575, 85.882996, 85.992439, 86.101898, 86.211365, 86.320839, 86.430336, 86.539848, 86.649376, 86.758911, 86.868454, 86.978027, 87.087608, 87.197197, 87.306801, 87.416420, 87.526054, 87.635704, 87.745361, 87.855034, 87.964722, 88.074432, 88.184143, 88.293877, 88.403618, 88.513374, 88.623146, 88.732933, 88.842728, 88.952538, 89.062363, 89.172203, 89.282051, 89.391914, 89.501793, 89.611694, 89.721596, 89.831512, 89.941444, 90.051399, 90.161354, 90.271324, 90.381310, 90.491310, 90.601326, 90.711349, 90.821388, 90.931442, 91.041512, 91.151596, 91.261681, 91.371788, 91.481911, 91.592049, 91.702187, 91.812347, 91.922516, 92.032707, 92.142906, 92.253120, 92.363342, 92.473579, 92.583839, 92.694099, 92.804375, 92.914665, 93.024971, 93.135292, 93.245621, 93.355965, 93.466316, 93.576683, 93.687073, 93.797470, 93.907875, 94.018295, 94.128731, 94.239182, 94.349640, 94.460106, 94.570595, 94.681099, 94.791611, 94.902130, 95.012665, 95.123215, 95.233788, 95.344360, 95.454948, 95.565552, 95.676170, 95.786797, 95.897430, 96.008087, 96.118752, 96.229431, 96.340126, 96.450829, 96.561539, 96.672272, 96.783012, 96.893768, 97.004532, 97.115311, 97.226112, 97.336914, 97.447731, 97.558563, 97.669403, 97.780266, 97.891129, 98.002007, 98.112900, 98.223816, 98.334732, 98.445656, 98.556602, 98.667557, 98.778526, 98.889511, 99.000504, 99.111504, 99.222519, 99.333557, 99.444603, 99.555656, 99.666718, 99.777809, 99.888893, 100}; | |
static float fun(float in) | |
{ | |
return in > 0.008856 ? (native_powr(in, 1.0 / 3)) : (7.787 * in + 16.0 / 116); | |
} | |
__kernel void kernel_hdr_lab (__read_only image2d_t input, __write_only image2d_t output) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
float4 pixel_in, pixel_out; | |
unsigned int table_id; | |
pixel_in = read_imagef(input, sampler, (int2)(x, y)); | |
float X, Y, Z, L, a, b; | |
X = 0.412453 * pixel_in.x + 0.357580 * pixel_in.y + 0.180423 * pixel_in.z; | |
Y = 0.212671 * pixel_in.x + 0.715160 * pixel_in.y + 0.072169 * pixel_in.z; | |
Z = 0.019334 * pixel_in.x + 0.119193 * pixel_in.y + 0.950227 * pixel_in.z; | |
L = Y > 0.008856 ? (116.0 * native_powr(Y, 1.0 / 3) - 16.0) : 903.3 * Y; | |
a = 500 * (fun(X) - fun(Y)); | |
b = 200 * (fun(Y) - fun(Z)); | |
table_id = (unsigned int)(L * 10) < 1000 ? (unsigned int)(L * 10) : 999; | |
L = table[table_id]; | |
float fX, fY, fZ; | |
Y = (L / 116.0) * (L / 116.0) * (L / 116.0); | |
Y = Y < 0.008856 ? L / 903.3 : Y; | |
fY = Y > 0.008856 ? (native_powr(Y, 1.0 / 3.0)) : (7.787 * Y + 16.0 / 116.0); | |
fX = a / 500.0 + fY; | |
X = fX * fX * fX; | |
X = X < 0.008865 ? ((fX - 16.0 / 116.0) / 7.787) : X; | |
fZ = fY - b / 200.0; | |
Z = fZ * fZ * fZ; | |
Z = Z < 0.008865 ? ((fZ - 16.0 / 116.0) / 7.787) : Z; | |
pixel_out.x = 3.240479 * X - 1.537150 * Y - 0.498535 * Z; | |
pixel_out.y = -0.969256 * X + 1.875992 * Y + 0.041556 * Z; | |
pixel_out.z = 0.055648 * X - 0.204043 * Y + 1.204043 * Z; | |
pixel_out.w = pixel_in.w; | |
write_imagef(output, (int2)(x, y), pixel_out); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_hdr_rgb | |
* sample code of default kernel arguments | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
__constant int HDRTable[1024] = {104, 106, 108, 109, 111, 113, 114, 116, 118, 119, 121, 123, 125, 126, 128, 130, 131, 133, 135, 136, 138, 139, 141, 143, 144, 146, 148, 149, 151, 152, 154, 156, 157, 159, 160, 162, 164, 165, 167, 168, 170, 171, 173, 174, 176, 177, 179, 181, 182, 184, 185, 187, 188, 190, 191, 193, 194, 195, 197, 198, 200, 201, 203, 204, 206, 207, 209, 210, 211, 213, 214, 216, 217, 219, 220, 221, 223, 224, 226, 227, 228, 230, 231, 232, 234, 235, 236, 238, 239, 241, 242, 243, 245, 246, 247, 248, 250, 251, 252, 254, 255, 256, 258, 259, 260, 261, 263, 264, 265, 267, 268, 269, 270, 272, 273, 274, 275, 277, 278, 279, 280, 281, 283, 284, 285, 286, 287, 289, 290, 291, 292, 293, 295, 296, 297, 298, 299, 300, 302, 303, 304, 305, 306, 307, 308, 310, 311, 312, 313, 314, 315, 316, 317, 318, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 389, 390, 391, 392, 393, 394, 395, 395, 396, 397, 398, 399, 400, 401, 401, 402, 403, 404, 405, 406, 406, 407, 408, 409, 410, 410, 411, 412, 413, 414, 415, 415, 416, 417, 418, 418, 419, 420, 421, 422, 422, 423, 424, 425, 425, 426, 427, 428, 429, 429, 430, 431, 432, 432, 433, 434, 435, 435, 436, 437, 437, 438, 439, 440, 440, 441, 442, 442, 443, 444, 445, 445, 446, 447, 447, 448, 449, 450, 450, 451, 452, 452, 453, 454, 454, 455, 456, 456, 457, 458, 458, 459, 460, 460, 461, 462, 462, 463, 464, 464, 465, 466, 466, 467, 468, 468, 469, 469, 470, 471, 471, 472, 473, 473, 474, 475, 475, 476, 476, 477, 478, 478, 479, 479, 480, 481, 481, 482, 483, 483, 484, 484, 485, 486, 486, 487, 487, 488, 488, 489, 490, 490, 491, 491, 492, 493, 493, 494, 494, 495, 495, 496, 497, 497, 498, 498, 499, 499, 500, 501, 501, 502, 502, 503, 503, 504, 504, 505, 506, 506, 507, 507, 508, 508, 509, 509, 510, 510, 511, 512, 512, 513, 513, 514, 514, 515, 515, 516, 516, 517, 517, 518, 518, 519, 519, 520, 521, 521, 522, 522, 523, 523, 524, 524, 525, 525, 526, 526, 527, 527, 528, 528, 529, 529, 530, 530, 531, 531, 532, 532, 533, 533, 534, 534, 535, 535, 536, 536, 537, 537, 538, 538, 539, 539, 540, 540, 541, 541, 542, 542, 543, 543, 544, 544, 545, 545, 546, 546, 547, 547, 548, 548, 549, 549, 550, 550, 551, 551, 552, 552, 553, 553, 554, 554, 554, 555, 555, 556, 556, 557, 557, 558, 558, 559, 559, 560, 560, 561, 561, 562, 562, 563, 563, 564, 564, 565, 565, 566, 566, 567, 567, 568, 568, 568, 569, 569, 570, 570, 571, 571, 572, 572, 573, 573, 574, 574, 575, 575, 576, 576, 577, 577, 578, 578, 579, 579, 580, 580, 581, 581, 582, 582, 582, 583, 583, 584, 584, 585, 585, 586, 586, 587, 587, 588, 588, 589, 589, 590, 590, 591, 591, 592, 592, 593, 593, 594, 594, 595, 595, 596, 596, 597, 597, 598, 598, 599, 599, 600, 600, 601, 601, 602, 602, 603, 603, 604, 604, 605, 605, 606, 606, 607, 607, 608, 608, 609, 609, 610, 611, 611, 612, 612, 613, 613, 614, 614, 615, 615, 616, 616, 617, 617, 618, 618, 619, 619, 620, 621, 621, 622, 622, 623, 623, 624, 624, 625, 625, 626, 627, 627, 628, 628, 629, 629, 630, 630, 631, 632, 632, 633, 633, 634, 634, 635, 636, 636, 637, 637, 638, 638, 639, 640, 640, 641, 641, 642, 642, 643, 644, 644, 645, 645, 646, 647, 647, 648, 648, 649, 650, 650, 651, 651, 652, 653, 653, 654, 654, 655, 656, 656, 657, 658, 658, 659, 659, 660, 661, 661, 662, 663, 663, 664, 665, 665, 666, 666, 667, 668, 668, 669, 670, 670, 671, 672, 672, 673, 674, 674, 675, 676, 676, 677, 678, 678, 679, 680, 680, 681, 682, 683, 683, 684, 685, 685, 686, 687, 687, 688, 689, 690, 690, 691, 692, 692, 693, 694, 695, 695, 696, 697, 698, 698, 699, 700, 701, 701, 702, 703, 704, 704, 705, 706, 707, 707, 708, 709, 710, 710, 711, 712, 713, 714, 714, 715, 716, 717, 718, 718, 719, 720, 721, 722, 722, 723, 724, 725, 726, 726, 727, 728, 729, 730, 731, 731, 732, 733, 734, 735, 736, 737, 737, 738, 739, 740, 741, 742, 743, 744, 744, 745, 746, 747, 748, 749, 750, 751, 752, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 814, 815, 816, 817, 818, 819, 820, 821, 822, 824, 825, 826, 827, 828, 829, 830, 832, 833, 834, 835, 836, 837, 839, 840, 841, 842, 843, 845, 846, 847, 848, 849, 851, 852, 853, 854, 855, 857, 858, 859, 860, 862, 863, 864, 865, 867, 868, 869, 870, 872, 873, 874, 876, 877, 878, 880, 881, 882, 883, 885, 886, 887, 889, 890, 891, 893, 894, 895, 897, 898, 900, 901, 902, 904, 905, 906, 908, 909, 911, 912, 913, 915, 916, 918, 919, 920, 922, 923, 925, 926, 928, 929, 931, 932, 933, 935, 936, 938, 939, 941, 942, 944, 945, 947, 948, 950, 951, 953, 954, 956, 957, 959, 961, 962, 964, 965, 967, 968, 970, 972, 973, 975, 976, 978, 979, 981, 983, 984, 986, 988, 989, 991, 992, 994, 996, 997, 999, 1001, 1002, 1004, 1006, 1007, 1009, 1011, 1012, 1014, 1016, 1017, 1019, 1021, 1023}; | |
__kernel void kernel_hdr_rgb (__read_only image2d_t input, __write_only image2d_t output) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
int2 pos = (int2)(x, y); | |
float4 pixel_in, pixel_out; | |
pixel_in = read_imagef(input, sampler, pos); | |
pixel_out.x = convert_float(HDRTable[convert_int(pixel_in.x * 1023)] / 1023.0); | |
pixel_out.y = convert_float(HDRTable[convert_int(pixel_in.y * 1023)] / 1023.0); | |
pixel_out.z = convert_float(HDRTable[convert_int(pixel_in.z * 1023)] / 1023.0); | |
pixel_out.w = 0.0; | |
write_imagef(output, pos, pixel_out); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* \brief Image scaling kernel function. | |
* \param[in] input Input image object. | |
* \param[out] output scaled output image object. | |
* \param[in] output_widht: output width | |
* \param[in] output_height: output height | |
* \param[in] vertical_offset: vertical offset from y to uv | |
*/ | |
//TODO: This is the most instesrting kernel, using hardware sampler to scale | |
__kernel void kernel_image_scaler (__read_only image2d_t input, | |
__write_only image2d_t output, | |
const uint output_widht, | |
const uint output_height) | |
{ | |
int x = get_global_id(0); | |
int y = get_global_id(1); | |
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; | |
float2 normCoor = convert_float2((int2)(x, y)) / (float2)(output_widht, output_height); | |
float4 scaled_pixel = read_imagef(input, sampler, normCoor); | |
write_imagef(output, (int2)(x, y), scaled_pixel); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_macc | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* table: macc table. | |
*/ | |
unsigned int get_sector_id (float u, float v) | |
{ | |
u = fabs(u) > 0.00001f ? u : 0.00001f; | |
float tg = v / u; | |
unsigned int se = tg > 1 ? (tg > 2 ? 3 : 2) : (tg > 0.5 ? 1 : 0); | |
unsigned int so = tg > -1 ? (tg > -0.5 ? 3 : 2) : (tg > -2 ? 1 : 0); | |
return tg > 0 ? (u > 0 ? se : (se + 8)) : (u > 0 ? (so + 12) : (so + 4)); | |
} | |
__kernel void kernel_macc (__read_only image2d_t input, __write_only image2d_t output, __global float *table) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
float4 pixel_in[8], pixel_out[8]; | |
float Y[8], ui[8], vi[8], uo[8], vo[8]; | |
unsigned int table_id[8]; | |
int i = 0, j = 0; | |
#pragma unroll | |
for(j = 0; j < 2; j++) { | |
#pragma unroll | |
for(i = 0; i < 4; i++) { | |
pixel_in[j * 4 + i] = read_imagef(input, sampler, (int2)(4 * x + i, 2 * y + j)); | |
Y[j * 4 + i] = 0.3 * pixel_in[j * 4 + i].x + 0.59 * pixel_in[j * 4 + i].y + 0.11 * pixel_in[j * 4 + i].z; | |
ui[j * 4 + i] = 0.493 * (pixel_in[j * 4 + i].z - Y[j * 4 + i]); | |
vi[j * 4 + i] = 0.877 * (pixel_in[j * 4 + i].x - Y[j * 4 + i]); | |
table_id[j * 4 + i] = get_sector_id(ui[j * 4 + i], vi[j * 4 + i]); | |
uo[j * 4 + i] = ui[j * 4 + i] * table[4 * table_id[j * 4 + i]] + vi[j * 4 + i] * table[4 * table_id[j * 4 + i] + 1]; | |
vo[j * 4 + i] = ui[j * 4 + i] * table[4 * table_id[j * 4 + i] + 2] + vi[j * 4 + i] * table[4 * table_id[j * 4 + i] + 3]; | |
pixel_out[j * 4 + i].x = Y[j * 4 + i] + 1.14 * vo[j * 4 + i]; | |
pixel_out[j * 4 + i].y = Y[j * 4 + i] - 0.39 * uo[j * 4 + i] - 0.58 * vo[j * 4 + i]; | |
pixel_out[j * 4 + i].z = Y[j * 4 + i] + 2.03 * uo[j * 4 + i]; | |
pixel_out[j * 4 + i].w = 0.0; | |
write_imagef(output, (int2)(4 * x + i, 2 * y + j), pixel_out[j * 4 + i]); | |
} | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_snr | |
* implementation of simple noise reduction | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
__kernel void kernel_snr (__read_only image2d_t input, __write_only image2d_t output) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
float4 p[9]; | |
p[0] = read_imagef(input, sampler, (int2)(x - 1, y - 1)); | |
p[1] = read_imagef(input, sampler, (int2)(x, y - 1)); | |
p[2] = read_imagef(input, sampler, (int2)(x + 1, y - 1)); | |
p[3] = read_imagef(input, sampler, (int2)(x - 1, y)); | |
p[4] = read_imagef(input, sampler, (int2)(x, y)); | |
p[5] = read_imagef(input, sampler, (int2)(x + 1, y)); | |
p[6] = read_imagef(input, sampler, (int2)(x - 1, y + 1)); | |
p[7] = read_imagef(input, sampler, (int2)(x, y + 1)); | |
p[8] = read_imagef(input, sampler, (int2)(x + 1, y + 1)); | |
float4 pixel_out; | |
pixel_out.x = (p[0].x + p[1].x + p[2].x + p[3].x + p[4].x + p[5].x + p[6].x + p[7].x + p[8].x) / 9.0f; | |
pixel_out.y = (p[0].y + p[1].y + p[2].y + p[3].y + p[4].y + p[5].y + p[6].y + p[7].y + p[8].y) / 9.0f; | |
pixel_out.z = (p[0].z + p[1].z + p[2].z + p[3].z + p[4].z + p[5].z + p[6].z + p[7].z + p[8].z) / 9.0f; | |
pixel_out.w = p[4].w; | |
write_imagef(output, (int2)(x, y), pixel_out); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_tonemapping | |
* implementation of tone mapping | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
*/ | |
#define WORK_ITEM_X_SIZE 8 | |
#define WORK_ITEM_Y_SIZE 8 | |
#define SHARED_PIXEL_X_SIZE 10 | |
#define SHARED_PIXEL_Y_SIZE 10 | |
__kernel void kernel_tonemapping (__read_only image2d_t input, __write_only image2d_t output, float y_max, float y_target, int image_height) | |
{ | |
int g_id_x = get_global_id (0); | |
int g_id_y = get_global_id (1); | |
int group_id_x = get_group_id(0); | |
int group_id_y = get_group_id(1); | |
int local_id_x = get_local_id(0); | |
int local_id_y = get_local_id(1); | |
int g_size_x = get_global_size (0); | |
int g_size_y = get_global_size (1); | |
int local_index = local_id_y * WORK_ITEM_X_SIZE + local_id_x; | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; | |
__local float4 local_src_data[SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE]; | |
float4 src_data_Gr = read_imagef (input, sampler, (int2)(g_id_x, g_id_y)); | |
float4 src_data_R = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height)); | |
float4 src_data_B = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 2)); | |
float4 src_data_Gb = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 3)); | |
float4 src_data_G = (src_data_Gr + src_data_Gb) / 2; | |
float4 src_y_data = 0.0f; | |
src_y_data = mad(src_data_R, 255.f * 0.299f, src_y_data); | |
src_y_data = mad(src_data_G, 255.f * 0.587f, src_y_data); | |
src_y_data = mad(src_data_B, 255.f * 0.114f, src_y_data); | |
local_src_data[(local_id_y + 1) * SHARED_PIXEL_X_SIZE + local_id_x + 1] = src_y_data; | |
if(local_index < SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE - WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE) | |
{ | |
int target_index = local_index <= SHARED_PIXEL_X_SIZE ? local_index : (local_index <= (SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE - WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE - SHARED_PIXEL_X_SIZE) ? (local_index + WORK_ITEM_X_SIZE + (local_index - (SHARED_PIXEL_X_SIZE + 1)) / 2 * WORK_ITEM_X_SIZE) : (local_index + WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE)); | |
int start_x = mad24(group_id_x, WORK_ITEM_X_SIZE, -1); | |
int start_y = mad24(group_id_y, WORK_ITEM_Y_SIZE, -1); | |
int offset_x = target_index % SHARED_PIXEL_X_SIZE; | |
int offset_y = target_index / SHARED_PIXEL_X_SIZE; | |
float4 data_Gr = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y)); | |
float4 data_R = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height)); | |
float4 data_B = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height * 2)); | |
float4 data_Gb = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height * 3)); | |
float4 data_G = (data_Gr + data_Gb) / 2; | |
float4 y_data = 0.0f; | |
y_data = mad(data_R, 255.f * 0.299f, y_data); | |
y_data = mad(data_G, 255.f * 0.587f, y_data); | |
y_data = mad(data_B, 255.f * 0.114f, y_data); | |
local_src_data[target_index] = y_data; | |
} | |
barrier(CLK_LOCAL_MEM_FENCE); | |
float gaussian_table[9] = {0.075f, 0.124f, 0.075f, | |
0.124f, 0.204f, 0.124f, | |
0.075f, 0.124f, 0.075f | |
}; | |
float4 src_ym_data = 0.0f; | |
float16 integrate_data = *((__local float16 *)(local_src_data + local_id_y * SHARED_PIXEL_X_SIZE + local_id_x)); | |
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[0], src_ym_data); | |
src_ym_data = mad(integrate_data.s4567, (float4)gaussian_table[1], src_ym_data); | |
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[2], src_ym_data); | |
integrate_data = *((__local float16 *)(local_src_data + (local_id_y + 1) * SHARED_PIXEL_X_SIZE + local_id_x)); | |
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[3], src_ym_data); | |
src_ym_data = mad(src_y_data, (float4)gaussian_table[4], src_ym_data); | |
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[5], src_ym_data); | |
integrate_data = *((__local float16 *)(local_src_data + (local_id_y + 2) * SHARED_PIXEL_X_SIZE + local_id_x)); | |
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[6], src_ym_data); | |
src_ym_data = mad(integrate_data.s4567, (float4)gaussian_table[7], src_ym_data); | |
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[8], src_ym_data); | |
float4 gain = ((float4)(y_max + y_target) + src_ym_data) / (src_y_data + src_ym_data + (float4)y_target); | |
src_data_Gr = src_data_Gr * gain; | |
src_data_R = src_data_R * gain; | |
src_data_B = src_data_B * gain; | |
src_data_Gb = src_data_Gb * gain; | |
write_imagef(output, (int2)(g_id_x, g_id_y), src_data_Gr); | |
write_imagef(output, (int2)(g_id_x, g_id_y + image_height), src_data_R); | |
write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 2), src_data_B); | |
write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 3), src_data_Gb); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* function: kernel_wb | |
* black level correction for sensor data input | |
* input: image2d_t as read only | |
* output: image2d_t as write only | |
* wb_config: white balance configuration | |
*/ | |
typedef struct | |
{ | |
float r_gain; | |
float gr_gain; | |
float gb_gain; | |
float b_gain; | |
} CLWBConfig; | |
__kernel void kernel_wb (__read_only image2d_t input, | |
__write_only image2d_t output, | |
CLWBConfig wb_config) | |
{ | |
int x = get_global_id (0); | |
int y = get_global_id (1); | |
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |
float4 Gr_in, R_in, B_in, Gb_in; | |
float4 Gr_out, R_out, B_out, Gb_out; | |
Gr_in = read_imagef(input, sampler, (int2)(2 * x, 2 * y)); | |
R_in = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y)); | |
B_in = read_imagef(input, sampler, (int2)(2 * x, 2 * y + 1)); | |
Gb_in = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y + 1)); | |
Gr_out.x = Gr_in.x * wb_config.gr_gain; | |
Gr_out.y = 0.0; | |
Gr_out.z = 0.0; | |
Gr_out.w = 1.0; | |
R_out.x = R_in.x * wb_config.r_gain; | |
R_out.y = 0.0; | |
R_out.z = 0.0; | |
R_out.w = 1.0; | |
B_out.x = B_in.x * wb_config.b_gain; | |
B_out.y = 0.0; | |
B_out.z = 0.0; | |
B_out.w = 1.0; | |
Gb_out.x = Gb_in.x * wb_config.gb_gain; | |
Gb_out.y = 0.0; | |
Gb_out.z = 0.0; | |
Gb_out.w = 1.0; | |
write_imagef(output, (int2)(2 * x, 2 * y), Gr_out); | |
write_imagef(output, (int2)(2 * x + 1, 2 * y), R_out); | |
write_imagef(output, (int2)(2 * x, 2 * y + 1), B_out); | |
write_imagef(output, (int2)(2 * x + 1, 2 * y + 1), Gb_out); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment