Skip to content

Instantly share code, notes, and snippets.

@sukinull
Created January 4, 2016 07:45
Show Gist options
  • Save sukinull/23d6cfb53dd2213997c2 to your computer and use it in GitHub Desktop.
Save sukinull/23d6cfb53dd2213997c2 to your computer and use it in GitHub Desktop.
/*
* function: kernel_3a_stats
* input: image2d_t as read only
* output: XCamGridStat, stats results
*/
typedef struct
{
unsigned int avg_y;
unsigned int avg_r;
unsigned int avg_gr;
unsigned int avg_gb;
unsigned int avg_b;
unsigned int valid_wb_count;
unsigned int f_value1;
unsigned int f_value2;
} XCamGridStat;
__kernel void kernel_3a_stats (__read_only image2d_t input, __global XCamGridStat *output)
{
int x = get_global_id (0);
int y = get_global_id (1);
int w = get_global_size (0);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
int x0 = 16 * x;
int y0 = 16 * y;
float sum_gr = 0.0f, sum_r = 0.0f, sum_b = 0.0f, sum_gb = 0.0f;
float avg_gr = 0.0f, avg_r = 0.0f, avg_b = 0.0f, avg_gb = 0.0f;
int i = 0, j = 0;
float count = (16.0 / 2) * (16.0 / 2);
float4 p[4];
#pragma unroll
for (j = 0; j < 16; j += 2) {
#pragma unroll
for (i = 0; i < 16; i += 2) {
p[0] = read_imagef (input, sampler, (int2)(x0 + i, y0 + j));
p[1] = read_imagef (input, sampler, (int2)(x0 + i, y0 + j + 1));
p[2] = read_imagef (input, sampler, (int2)(x0 + i + 1, y0 + j));
p[3] = read_imagef (input, sampler, (int2)(x0 + i + 1, y0 + j + 1));
sum_gr += p[0].x;
sum_b += p[1].x;
sum_r += p[2].x;
sum_gb += p[3].x;
}
}
avg_gr = sum_gr / count;
avg_r = sum_r / count;
avg_b = sum_b / count;
avg_gb = sum_gb / count;
output[y * w + x].avg_gr = convert_uint(avg_gr * 256.0);
output[y * w + x].avg_r = convert_uint(avg_r * 256.0);
output[y * w + x].avg_b = convert_uint(avg_b * 256.0);
output[y * w + x].avg_gb = convert_uint(avg_gb * 256.0);
output[y * w + x].valid_wb_count = convert_uint(count);
output[y * w + x].avg_y = convert_uint(((avg_gr + avg_gb) / 2.0f) * 256.0);
output[y * w + x].f_value1 = 0;
output[y * w + x].f_value2 = 0;
}
/*
* function: kernel_bayer_copy
* sample code of default kernel arguments
* input: image2d_t as read only
* output: image2d_t as write only
*/
//#define ENABLE_IMAGE_2D_INPUT 0
/*
* GROUP_PIXEL_X_SIZE = 2 * GROUP_CELL_X_SIZE
* GROUP_PIXEL_Y_SIZE = 2 * GROUP_CELL_Y_SIZE
*/
#define GROUP_CELL_X_SIZE 64
#define GROUP_CELL_Y_SIZE 4
//float4; 16
#define SLM_X_SIZE (GROUP_CELL_X_SIZE / 4)
#define SLM_Y_SIZE GROUP_CELL_Y_SIZE
#define STATS_3A_CELL_X_SIZE 8
#define STATS_3A_CELL_Y_SIZE GROUP_CELL_Y_SIZE
typedef struct {
float level_gr; /* Black level for GR pixels */
float level_r; /* Black level for R pixels */
float level_b; /* Black level for B pixels */
float level_gb; /* Black level for GB pixels */
uint color_bits;
} CLBLCConfig;
typedef struct
{
float r_gain;
float gr_gain;
float gb_gain;
float b_gain;
} CLWBConfig;
inline int slm_pos (const int x, const int y)
{
return mad24 (y, SLM_X_SIZE, x);
}
inline void gamma_correct(float8 *in_out, __global float *table)
{
in_out->s0 = table[clamp(convert_int(in_out->s0 * 255.0f), 0, 255)];
in_out->s1 = table[clamp(convert_int(in_out->s1 * 255.0f), 0, 255)];
in_out->s2 = table[clamp(convert_int(in_out->s2 * 255.0f), 0, 255)];
in_out->s3 = table[clamp(convert_int(in_out->s3 * 255.0f), 0, 255)];
in_out->s4 = table[clamp(convert_int(in_out->s4 * 255.0f), 0, 255)];
in_out->s5 = table[clamp(convert_int(in_out->s5 * 255.0f), 0, 255)];
in_out->s6 = table[clamp(convert_int(in_out->s6 * 255.0f), 0, 255)];
in_out->s7 = table[clamp(convert_int(in_out->s7 * 255.0f), 0, 255)];
}
inline float avg_float8 (float8 data)
{
return (data.s0 + data.s1 + data.s2 + data.s3 + data.s4 + data.s5 + data.s6 + data.s7) * 0.125f;
}
inline void stats_3a_calculate (
__local float4 * slm_gr,
__local float4 * slm_r,
__local float4 * slm_b,
__local float4 * slm_gb,
__global ushort8 * stats_output,
CLWBConfig *wb_config)
{
const int group_x_size = get_num_groups (0);
const int group_id_x = get_group_id (0);
const int group_id_y = get_group_id (1);
const int l_id_x = get_local_id (0);
const int l_id_y = get_local_id (1);
const int l_size_x = get_local_size (0);
const int stats_float4_x_count = STATS_3A_CELL_X_SIZE / 4;
int count = stats_float4_x_count * STATS_3A_CELL_Y_SIZE / 4;
int index = mad24 (l_id_y, l_size_x, l_id_x);
int index_x = index % SLM_X_SIZE;
int index_y = index / SLM_X_SIZE;
if (mad24 (index_y, stats_float4_x_count, index_x % stats_float4_x_count) < count) {
int pitch_count = count / stats_float4_x_count * SLM_X_SIZE;
int index1 = index + pitch_count;
int index2 = index1 + pitch_count;
int index3 = index2 + pitch_count;
slm_gr[index] = (slm_gr[index] + slm_gr[index1] + slm_gr[index2] + slm_gr[index3]) * 0.25f;
slm_r[index] = (slm_r[index] + slm_r[index1] + slm_r[index2] + slm_r[index3]) * 0.25f;
slm_b[index] = (slm_b[index] + slm_b[index1] + slm_b[index2] + slm_b[index3]) * 0.25f;
slm_gb[index] = (slm_gb[index] + slm_gb[index1] + slm_gb[index2] + slm_gb[index3]) * 0.25f;
}
barrier (CLK_LOCAL_MEM_FENCE);
if (index < SLM_X_SIZE / 2) {
float result_gr, result_r, result_b, result_gb, avg_y;
float8 tmp;
tmp = ((__local float8*)slm_gr)[index];
result_gr = avg_float8 (tmp);
tmp = ((__local float8*)slm_r)[index];
result_r = avg_float8 (tmp);
tmp = ((__local float8*)slm_b)[index];
result_b = avg_float8 (tmp);
tmp = ((__local float8*)slm_gb)[index];
result_gb = avg_float8 (tmp);
avg_y = convert_uchar_sat(
mad ((result_gr * wb_config->gr_gain + result_gb * wb_config->gb_gain), 74.843f,
mad (result_r * wb_config->r_gain, 76.245f, result_b * 29.070f)));
int out_index = mad24 (mad24 (group_id_y, group_x_size, group_id_x),
(GROUP_CELL_X_SIZE / STATS_3A_CELL_X_SIZE) * (GROUP_CELL_Y_SIZE / STATS_3A_CELL_Y_SIZE),
index);
//ushort avg_y; avg_r; avg_gr; avg_gb; avg_b; valid_wb_count; f_value1; f_value2;
stats_output[out_index] = (ushort8) (
convert_ushort (convert_uchar_sat (avg_y)),
convert_ushort (convert_uchar_sat (result_r * 255.0f)),
convert_ushort (convert_uchar_sat (result_gr * 255.0f)),
convert_ushort (convert_uchar_sat (result_gb * 255.0f)),
convert_ushort (convert_uchar_sat (result_b * 255.0f)),
STATS_3A_CELL_X_SIZE * STATS_3A_CELL_Y_SIZE,
0,
0);
}
}
__kernel void kernel_bayer_basic (
#if ENABLE_IMAGE_2D_INPUT
__read_only image2d_t input,
#else
__global const ushort8 *input,
#endif
uint input_aligned_width,
__write_only image2d_t output,
uint out_height,
CLBLCConfig blc_config,
CLWBConfig wb_config,
__global float *gamma_table,
__global ushort8 *stats_output
)
{
int g_x = get_global_id (0);
int g_y = get_global_id (1);
const int l_x = get_local_id (0);
const int l_y = get_local_id (1);
const int l_x_size = get_local_size (0);
const int l_y_size = get_local_size (1);
const int group_id_x = get_group_id (0);
const int group_id_y = get_group_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
int index = mad24 (l_y, l_x_size, l_x);
int x_cell_start = (GROUP_CELL_X_SIZE / 4) * group_id_x;
int y_cell_start = GROUP_CELL_Y_SIZE * group_id_y;
int x, y;
float blc_multiplier = (float)(1 << (16 - blc_config.color_bits));
__local float4 slm_gr[SLM_X_SIZE * SLM_Y_SIZE], slm_r[SLM_X_SIZE * SLM_Y_SIZE], slm_b[SLM_X_SIZE * SLM_Y_SIZE], slm_gb[SLM_X_SIZE * SLM_Y_SIZE];
for (; index < SLM_X_SIZE * SLM_Y_SIZE; index += l_x_size * l_y_size) {
float8 line1;
float8 line2;
x = index % SLM_X_SIZE + x_cell_start;
y = index / SLM_X_SIZE + y_cell_start;
#if ENABLE_IMAGE_2D_INPUT
line1 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2)))) / 65536.0f;
line2 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2 + 1)))) / 65536.0f;
#else
line1 = convert_float8 (input [y * 2 * input_aligned_width + x]) / 65536.0f;
line2 = convert_float8 (input [(y * 2 + 1) * input_aligned_width + x]) / 65536.0f;
#endif
float4 gr = mad (line1.even, blc_multiplier, - blc_config.level_gr);
float4 r = mad (line1.odd, blc_multiplier, - blc_config.level_r);
float4 b = mad (line2.even, blc_multiplier, - blc_config.level_b);
float4 gb = mad (line2.odd, blc_multiplier, - blc_config.level_gb);
slm_gr[index] = gr;
slm_r[index] = r;
slm_b[index] = b;
slm_gb[index] = gb;
}
barrier(CLK_LOCAL_MEM_FENCE);
float8 data_gr, data_r, data_b, data_gb;
index = mad24 (l_y, l_x_size, l_x);
x = mad24 (GROUP_CELL_X_SIZE / 8, group_id_x, index % (SLM_X_SIZE / 2));
y = mad24 (GROUP_CELL_Y_SIZE, group_id_y, index / (SLM_X_SIZE / 2));
data_gr = ((__local float8*)slm_gr)[index];
data_gr = data_gr * wb_config.gr_gain;
data_r = ((__local float8*)slm_r)[index];
data_r = data_r * wb_config.r_gain;
data_b = ((__local float8*)slm_b)[index];
data_b = data_b * wb_config.b_gain;
data_gb = ((__local float8*)slm_gb)[index];
data_gb = data_gb * wb_config.gb_gain;
#if ENABLE_GAMMA
gamma_correct (&data_gr, gamma_table);
gamma_correct (&data_r, gamma_table);
gamma_correct (&data_b, gamma_table);
gamma_correct (&data_gb, gamma_table);
#endif
#if 0
if (x % 16 == 0 && y % 16 == 0) {
uint8 value = convert_uint8(convert_uchar8_sat(data_gr * 255.0f));
printf ("(x:%d, y:%d) (blc.bit:%d, level:%d) (wb.gr:%f)=> (%d, %d, %d, %d, %d, %d, %d, %d)\n",
x * 8, y,
blc_config.color_bits, convert_uint(blc_config.level_gr * 255.0f),
wb_config.gr_gain,
value.s0, value.s1, value.s2, value.s3, value.s4, value.s5, value.s6, value.s7);
}
#endif
write_imageui (output, (int2)(x, y), as_uint4 (convert_ushort8 (data_gr * 65536.0f)));
write_imageui (output, (int2)(x, y + out_height), as_uint4 (convert_ushort8 (data_r * 65536.0f)));
write_imageui (output, (int2)(x, y + out_height * 2), as_uint4 (convert_ushort8 (data_b * 65536.0f)));
write_imageui (output, (int2)(x, y + out_height * 3), as_uint4 (convert_ushort8 (data_gb * 65536.0f)));
stats_3a_calculate (slm_gr, slm_r, slm_b, slm_gb, stats_output, &wb_config);
}
/*
* function: kernel_bayer_pipe
* params:
* input: image2d_t as read only
* output: image2d_t as write only
* blc_config: black level correction configuration
* wb_config: whitebalance configuration
* gamma_table: RGGB table
* stats_output: 3a stats output
*/
#define WORKGROUP_CELL_WIDTH 8
#define WORKGROUP_CELL_HEIGHT 8
#define DEMOSAIC_X_CELL_PER_WORKITEM 2
#define PIXEL_PER_CELL 2
#define SLM_CELL_X_OFFSET 4
#define SLM_CELL_Y_OFFSET 1
// 8x8
#define SLM_CELL_X_VALID_SIZE WORKGROUP_CELL_WIDTH
#define SLM_CELL_Y_VALID_SIZE WORKGROUP_CELL_HEIGHT
// 10x10
#define SLM_CELL_X_SIZE (SLM_CELL_X_VALID_SIZE + SLM_CELL_X_OFFSET * 2)
#define SLM_CELL_Y_SIZE (SLM_CELL_Y_VALID_SIZE + SLM_CELL_Y_OFFSET * 2)
#define GUASS_DELTA_S_1 1.031739f
#define GUASS_DELTA_S_1_5 1.072799f
#define GUASS_DELTA_S_2 1.133173f
#define GUASS_DELTA_S_2_5 1.215717f
typedef struct
{
float ee_gain;
float ee_threshold;
float nr_gain;
} CLEeConfig;
inline int get_shared_pos_x (int i)
{
return i % SLM_CELL_X_SIZE;
}
inline int get_shared_pos_y (int i)
{
return i / SLM_CELL_X_SIZE;
}
inline int shared_pos (int x, int y)
{
return mad24(y, SLM_CELL_X_SIZE, x);
}
/* BA10=> GRBG */
inline void grbg_slm_load (
__local float *px, __local float *py, __local float *pz, __local float *pw,
int index, __read_only image2d_t input, int x_start, int y_start
)
{
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 data1, data2, line1, line2;
int x0 = (get_shared_pos_x (index) + x_start) / 4;
int y0 = get_shared_pos_y (index) + y_start;
int2 pos = (int2)(x0, y0);
float4 gr, r, b, gb;
gr = read_imagef (input, sampler, (int2)(x0, y0));
r = read_imagef (input, sampler, (int2)(x0, y0 + 544));
b = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 2));
gb = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 3));
(*(__local float4 *)(px + index)) = gr;
(*(__local float4 *)(py + index)) = r;
(*(__local float4 *)(pz + index)) = b;
(*(__local float4 *)(pw + index)) = gb;
}
#define MAX_DELTA_COFF 5.0f
#define MIN_DELTA_COFF 1.0f
#define DEFAULT_DELTA_COFF 4.0f
inline float2 delta_coff (float2 in, __local float *table)
{
float2 out;
out.x = table[(int)(fabs(in.x * 64.0f))];
out.y = table[(int)(fabs(in.y * 64.0f))];
return out;
}
inline float2 dot_denoise (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain)
{
float2 coff0, coff1, coff2, coff3, coff4, coff5;
coff0 = delta_coff (0.0, table) * gain;
coff1 = delta_coff (in1 - value, table);
coff2 = delta_coff (in2 - value, table);
coff3 = delta_coff (in3 - value, table);
coff4 = delta_coff (in4 - value, table);
//(in1 * coff1 + in2 * coff2 + in3 * coff3 + in4 * coff4 + value * coff0)
float2 sum1 = (mad (in1, coff1,
mad (in2, coff2,
mad (in3, coff3,
mad (in4, coff4, value * coff0)))));
return sum1 / (coff0 + coff1 + coff2 + coff3 + coff4);
}
inline float2 dot_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, float2 out, CLEeConfig ee_config, float2 *egain)
{
float2 eH = value - in1 * 0.5f - in3 * 0.5f;
float2 eV = value - in2 * 0.5f - in4 * 0.5f;
float2 ee;
eH.x = eH.x > ee_config.ee_threshold ? eH.x : 0.0f;
eH.y = eH.y > ee_config.ee_threshold ? eH.y : 0.0f;
eV.x = eV.x > ee_config.ee_threshold ? eV.x : 0.0f;
eV.y = eV.y > ee_config.ee_threshold ? eV.y : 0.0f;
ee.x = fmax(eH.x, eV.x);
ee.y = fmax(eH.y, eV.y);
egain[0] = mad(ee, ee_config.ee_gain, out) / out;
return out * egain[0];
}
inline float2 dot_denoise_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain, float2 *egain, CLEeConfig ee_config)
{
float2 out = dot_denoise(value, in1, in2, in3, in4, table, gain);
return dot_ee(value, in1, in2, in3, in4, out, ee_config, egain);
}
void demosaic_2_cell (
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
int in_x, int in_y,
__write_only image2d_t out, uint out_height, int out_x, int out_y)
{
float4 out_data;
float2 value;
int index;
{
float3 R_y[2];
index = shared_pos (in_x - 1, in_y);
R_y[0] = *(__local float3*)(y_data_in + index);
index = shared_pos (in_x - 1, in_y + 1);
R_y[1] = *(__local float3*)(y_data_in + index);
out_data.s02 = (R_y[0].s01 + R_y[0].s12) * 0.5f;
out_data.s13 = R_y[0].s12;
write_imagef (out, (int2)(out_x, out_y), out_data);
out_data.s02 = (R_y[0].s01 + R_y[0].s12 + R_y[1].s01 + R_y[1].s12) * 0.25f;
out_data.s13 = (R_y[0].s12 + R_y[1].s12) * 0.5f;
write_imagef (out, (int2)(out_x, out_y + 1), out_data);
}
{
float3 B_z[2];
index = shared_pos (in_x, in_y - 1);
B_z[0] = *(__local float3*)(z_data_in + index);
index = shared_pos (in_x, in_y);
B_z[1] = *(__local float3*)(z_data_in + index);
out_data.s02 = (B_z[0].s01 + B_z[1].s01) * 0.5f;
out_data.s13 = (B_z[0].s01 + B_z[0].s12 + B_z[1].s01 + B_z[1].s12) * 0.25f;
write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data);
out_data.s02 = B_z[1].s01;
out_data.s13 = (B_z[1].s01 + B_z[1].s12) * 0.5f;
write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data);
}
{
float3 Gr_x[2], Gb_w[2];
index = shared_pos (in_x, in_y);
Gr_x[0] = *(__local float3*)(x_data_in + index);
index = shared_pos (in_x, in_y + 1);
Gr_x[1] = *(__local float3*)(x_data_in + index);
index = shared_pos (in_x - 1, in_y - 1);
Gb_w[0] = *(__local float3*)(w_data_in + index);
index = shared_pos (in_x - 1, in_y);
Gb_w[1] = *(__local float3*)(w_data_in + index);
out_data.s02 = (Gr_x[0].s01 * 4.0f + Gb_w[0].s01 +
Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12) * 0.125f;
out_data.s13 = (Gr_x[0].s01 + Gr_x[0].s12 + Gb_w[0].s12 + Gb_w[1].s12) * 0.25f;
write_imagef (out, (int2)(out_x, out_y + out_height), out_data);
out_data.s02 = (Gr_x[0].s01 + Gr_x[1].s01 + Gb_w[1].s01 + Gb_w[1].s12) * 0.25f;
out_data.s13 = (Gb_w[1].s12 * 4.0f + Gr_x[0].s01 +
Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12) * 0.125f;
write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data);
}
}
void demosaic_denoise_2_cell (
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
int in_x, int in_y,
__write_only image2d_t out, uint out_height, int out_x, int out_y, __local float *table, CLEeConfig ee_config)
{
float4 out_data[2];
float2 value;
int index;
float2 egain[4];
///////////////////////////////////////G///////////////////////////////////
{
float3 Gr_x[2], Gb_w[2];
index = shared_pos (in_x - 1, in_y - 1);
Gb_w[0] = *(__local float3*)(w_data_in + index);
index = shared_pos (in_x - 1, in_y);
Gb_w[1] = *(__local float3*)(w_data_in + index);
index = shared_pos (in_x, in_y);
Gr_x[0] = *(__local float3*)(x_data_in + index);
index = shared_pos (in_x, in_y + 1);
Gr_x[1] = *(__local float3*)(x_data_in + index);
value = mad (Gr_x[0].s01, 4.0f, (Gb_w[0].s01 +
Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12)) * 0.125f;
out_data[0].s02 = dot_denoise_ee (value, Gb_w[0].s01, Gb_w[0].s12, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1_5, &egain[0], ee_config);
value = (Gr_x[0].s01 + Gr_x[0].s12 +
Gb_w[0].s12 + Gb_w[1].s12) * 0.25f;
out_data[0].s13 = dot_denoise_ee(value, Gr_x[0].s01, Gr_x[0].s12, Gb_w[0].s12, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[1], ee_config);
value = (Gr_x[0].s01 + Gr_x[1].s01 +
Gb_w[1].s01 + Gb_w[1].s12) * 0.25f;
out_data[1].s02 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[1].s01, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[2], ee_config);
value = mad (Gb_w[1].s12, 4.0f, (Gr_x[0].s01 +
Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12)) * 0.125f;
out_data[1].s13 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[0].s12, Gr_x[1].s01, Gr_x[1].s12, table, GUASS_DELTA_S_1_5, &egain[3], ee_config);
write_imagef (out, (int2)(out_x, out_y + out_height), out_data[0]);
write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data[1]);
}
////////////////////////////////R//////////////////////////////////////////
{
float4 R_y[3];
index = shared_pos (in_x - 1, in_y - 1);
R_y[0] = *(__local float4*)(y_data_in + index);
index = shared_pos (in_x - 1, in_y);
R_y[1] = *(__local float4*)(y_data_in + index);
index = shared_pos (in_x - 1, in_y + 1);
R_y[2] = *(__local float4*)(y_data_in + index);
value = (R_y[1].s01 + R_y[1].s12) * 0.5f;
out_data[0].s02 = dot_denoise (value, R_y[0].s01, R_y[0].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_2_5) * egain[0];
value = R_y[1].s12;
out_data[0].s13 = dot_denoise (value, R_y[0].s12, R_y[1].s01, R_y[1].s23, R_y[2].s12, table, GUASS_DELTA_S_2) * egain[1];
value = (R_y[1].s01 + R_y[1].s12 +
R_y[2].s01 + R_y[2].s12) * 0.25f;
out_data[1].s02 = dot_denoise (value, R_y[1].s01, R_y[1].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_1_5) * egain[2];
value = (R_y[1].s12 + R_y[2].s12) * 0.5f;
out_data[1].s13 = dot_denoise (value, R_y[1].s01, R_y[1].s23, R_y[2].s01, R_y[2].s23, table, GUASS_DELTA_S_2_5) * egain[3];
write_imagef (out, (int2)(out_x, out_y), out_data[0]);
write_imagef (out, (int2)(out_x, out_y + 1), out_data[1]);
}
////////////////////////////////B//////////////////////////////////////////
{
float4 B_z[3];
index = shared_pos (in_x - 1, in_y - 1);
B_z[0] = *(__local float4*)(z_data_in + index);
index = shared_pos (in_x - 1, in_y);
B_z[1] = *(__local float4*)(z_data_in + index);
index = shared_pos (in_x - 1, in_y + 1);
B_z[2] = *(__local float4*)(z_data_in + index);
value = (B_z[0].s12 + B_z[1].s12) * 0.5f;
out_data[0].s02 = dot_denoise (value, B_z[0].s01, B_z[0].s23, B_z[1].s01, B_z[1].s23, table, GUASS_DELTA_S_2_5) * egain[0];
value = (B_z[0].s12 + B_z[0].s23 +
B_z[1].s12 + B_z[1].s23) * 0.25f;
out_data[0].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[1].s12, B_z[1].s23, table, GUASS_DELTA_S_1_5) * egain[1];
value = B_z[1].s12;
out_data[1].s02 = dot_denoise (value, B_z[0].s12, B_z[1].s01, B_z[1].s23, B_z[2].s12, table, GUASS_DELTA_S_2) * egain[2];
value = (B_z[1].s12 + B_z[1].s23) * 0.5f;
out_data[1].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[2].s12, B_z[2].s23, table, GUASS_DELTA_S_2_5) * egain[3];
write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data[0]);
write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data[1]);
}
}
void shared_demosaic (
__local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
int in_x, int in_y,
__write_only image2d_t out, uint output_height, int out_x, int out_y,
uint has_denoise, __local float *table, CLEeConfig ee_config)
{
if (has_denoise) {
demosaic_denoise_2_cell (
x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y,
out, output_height, out_x, out_y, table, ee_config);
} else {
demosaic_2_cell (
x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y,
out, output_height, out_x, out_y);
}
}
__kernel void kernel_bayer_pipe (__read_only image2d_t input,
uint input_height,
__write_only image2d_t output,
uint output_height,
__global float * bnr_table,
uint has_denoise,
CLEeConfig ee_config
)
{
int g_id_x = get_global_id (0);
int g_id_y = get_global_id (1);
int g_size_x = get_global_size (0);
int g_size_y = get_global_size (1);
int l_id_x = get_local_id(0);
int l_id_y = get_local_id(1);
int l_size_x = get_local_size (0);
int l_size_y = get_local_size (1);
__local float p1_x[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_y[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_z[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_w[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE];
__local float4 p2[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE];
__local float4 *stats_cache = p2;
__local float SLM_delta_coef_table[64];
int out_x_start, out_y_start;
int x_start = get_group_id (0) * WORKGROUP_CELL_WIDTH;
int y_start = get_group_id (1) * WORKGROUP_CELL_HEIGHT;
int i = mad24 (l_id_y, l_size_x, l_id_x);
int j = i;
i *= 4;
for (; i < SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE; i += (l_size_x * l_size_y) * 4) {
grbg_slm_load (p1_x, p1_y, p1_z, p1_w, i,
input,
x_start - SLM_CELL_X_OFFSET, y_start - SLM_CELL_Y_OFFSET);
}
for(; j < 64; j += l_size_x * l_size_y)
SLM_delta_coef_table[j] = bnr_table[j];
barrier(CLK_LOCAL_MEM_FENCE);
i = mad24 (l_id_y, l_size_x, l_id_x);
int workitem_x_size = (SLM_CELL_X_VALID_SIZE / DEMOSAIC_X_CELL_PER_WORKITEM);
int input_x = (i % workitem_x_size) * DEMOSAIC_X_CELL_PER_WORKITEM;
int input_y = i / workitem_x_size;
shared_demosaic (
p1_x, p1_y, p1_z, p1_w,
input_x + SLM_CELL_X_OFFSET, input_y + SLM_CELL_Y_OFFSET,
output, output_height,
(input_x + x_start) * PIXEL_PER_CELL / 4, (input_y + y_start) * PIXEL_PER_CELL, has_denoise, SLM_delta_coef_table, ee_config);
}
/*
* function: kernel_blc
* black level correction for sensor data input
* input: image2d_t as read only
* output: image2d_t as write only
* blc_config: black level correction configuration
* color_bits: identify 10bit or 12bit data
* param:
*/
typedef struct
{
float level_gr; /* Black level for GR pixels */
float level_r; /* Black level for R pixels */
float level_b; /* Black level for B pixels */
float level_gb; /* Black level for GB pixels */
uint color_bits;
} BLCConfig;
uint decompression(uint data_in, uint color_bits)
{
uint data_out = 0;
data_out = data_in << (16 - color_bits);
return data_out;
}
__kernel void kernel_blc (__read_only image2d_t input,
__write_only image2d_t output,
BLCConfig blc_config)
{
int x0 = 2 * get_global_id (0);
int y0 = 2 * get_global_id (1);
int x1 = x0 + 1;
int y1 = y0 + 1;
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
int2 pos_r = (int2)(x0, y0);
int2 pos_gr = (int2)(x1, y0);
int2 pos_gb = (int2)(x0, y1);
int2 pos_b = (int2)(x1, y1);
uint4 pixel;
pixel = read_imageui(input, sampler, pos_r);
pixel.x = floor(pixel.x - blc_config.level_r * (pown(2.0, blc_config.color_bits)) + 0.5);
pixel.x = decompression(pixel.x, blc_config.color_bits);
write_imageui(output, pos_r, pixel);
pixel = read_imageui(input, sampler, pos_gr);
pixel.x = floor(pixel.x - blc_config.level_gr * (pown(2.0, blc_config.color_bits)) + 0.5);
pixel.x = decompression(pixel.x, blc_config.color_bits);
write_imageui(output, pos_gr, pixel);
pixel = read_imageui(input, sampler, pos_gb);
pixel.x = floor(pixel.x - blc_config.level_gb * (pown(2.0, blc_config.color_bits)) + 0.5);
pixel.x = decompression(pixel.x, blc_config.color_bits);
write_imageui(output, pos_gb, pixel);
pixel = read_imageui(input, sampler, pos_b);
pixel.x = floor(pixel.x - blc_config.level_b * (pown(2.0, blc_config.color_bits)) + 0.5);
pixel.x = decompression(pixel.x, blc_config.color_bits);
write_imageui(output, pos_b, pixel);
}
/*
* function: kernel_bnr
* implementation of bayer noise reduction
* input: image2d_t as read only
* output: image2d_t as write only
* bnr_gain: strength of noise reduction
* direction: sensitivity of edge
* todo: add the upstream algorithm for BNR
*/
__kernel void kernel_bnr (__read_only image2d_t input, __write_only image2d_t output,
float bnr_gain, float direction)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 p;
p = read_imagef(input, sampler, (int2)(x, y));
write_imagef(output, (int2)(x, y), p);
}
/*
* function: kernel_demosaic
* input: image2d_t as read only
* output: image2d_t as write only
*/
__kernel void kernel_demosaic (__read_only image2d_t input, __write_only image2d_t output)
{
int x = 2 * get_global_id (0);
int y = 2 * get_global_id (1);
// sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
int x0 = x - 1;
int y0 = y - 1;
float4 p[16];
#if 0
p[0] = read_imagef (input, sampler, (int2)(x0, y0));
p[1] = read_imagef (input, sampler, (int2)(x0 + 1, y0));
p[2] = read_imagef (input, sampler, (int2)(x0 + 2, y0));
p[3] = read_imagef (input, sampler, (int2)(x0 + 3, y0));
p[4] = read_imagef (input, sampler, (int2)(x0, y0 + 1));
p[5] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 1));
p[6] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 1));
p[7] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 1));
p[8] = read_imagef (input, sampler, (int2)(x0, y0 + 2));
p[9] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 2));
p[10] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 2));
p[11] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 2));
p[12] = read_imagef (input, sampler, (int2)(x0, y0 + 3));
p[13] = read_imagef (input, sampler, (int2)(x0 + 1, y0 + 3));
p[14] = read_imagef (input, sampler, (int2)(x0 + 2, y0 + 3));
p[15] = read_imagef (input, sampler, (int2)(x0 + 3, y0 + 3));
#endif
#pragma unroll
for (int i = 0; i < 16; ++i) {
p[i] = read_imagef (input, sampler, (int2)(x0 + i % 4, y0 + i / 4));
}
float4 p00, p01, p10, p11;
p00.x = (p[4].x + p[6].x) / 2.0;
p00.y = (p[5].x * 4 + p[0].x + p[2].x + p[8].x + p[10].x) / 8.0;
p00.z = (p[1].x + p[9].x) / 2.0;
p01.x = p[6].x;
p01.y = (p[2].x + p[5].x + p[7].x + p[10].x) / 4.0;
p01.z = (p[1].x + p[3].x + p[9].x + p[11].x) / 4.0;
p10.x = (p[4].x + p[6].x + p[12].x + p[14].x) / 4.0;
p10.y = (p[5].x + p[8].x + p[10].x + p[13].x) / 4.0;
p10.z = p[9].x;
p11.x = (p[6].x + p[14].x) / 2.0;
p11.y = (p[10].x * 4 + p[5].x + p[7].x + p[13].x + p[15].x) / 8.0;
p11.z = (p[9].x + p[11].x) / 2.0;
write_imagef (output, (int2)(x, y), p00);
write_imagef (output, (int2)(x + 1, y), p01);
write_imagef (output, (int2)(x, y + 1), p10);
write_imagef (output, (int2)(x + 1, y + 1), p11);
}
/*
* function: kernel_denoise
* bi-laterial filter for denoise usage
* input: image2d_t as read only
* output: image2d_t as write only
* sigma_r: the parameter to set sigma_r in the Gaussian filtering
* imw: image width, used for edge detect
* imh: image height, used for edge detect
*/
__constant float gausssingle[25]={0.6411,0.7574,0.8007,0.7574,0.6411,0.7574,0.8948,0.9459,0.8948,0.7574,0.8007,0.94595945,1,0.9459,0.8007,0.7574,0.8948,0.9459,0.8948,0.7574,0.6411,0.7574,0.8007,0.7574,0.6411};
#define LOCAL_SIZE_X 16
#define LOCAL_SIZE_Y 15
__kernel void kernel_denoise(__read_only image2d_t srcRGB, __write_only image2d_t dstRGB, float sigma_r, unsigned int imw, unsigned int imh)
{
int x = get_global_id(1); //[0,imw-1]
int y = get_global_id(0); //[0,imh-1]
int localX = get_local_id(1); //[0,imw/120-1]
int localY = get_local_id(0); //[0,imh/72-1]
//printf("localX=%d,localY=%d\n",localX,localY);
float normF=0;
float H=0;
float delta=0;
int i=0,j=0;
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE |CLK_FILTER_NEAREST;
sigma_r = 2*pown(sigma_r,2);
//coord in srcY
float4 line;
line.x=0;
line.y=0;
line.z=0;
line.w=1.0;
__local float4 pixel[LOCAL_SIZE_X+4][LOCAL_SIZE_Y+4];
bool interior = x > 1 && x <(imw-3)
&& y>1 && y< (imh-3);
if(interior)
{
pixel[localX+2][localY+2]=read_imagef(srcRGB, sampler,(int2)(x,y));
if(localX==0)
{
if(localY==0)
{
pixel[0][0]=read_imagef(srcRGB, sampler,(int2)(x-2,y-2));
pixel[0][1]=read_imagef(srcRGB, sampler,(int2)(x-2,y-1));
pixel[0][2]=read_imagef(srcRGB, sampler,(int2)(x-2,y ));
pixel[1][0]=read_imagef(srcRGB, sampler,(int2)(x-1,y-2));
pixel[1][1]=read_imagef(srcRGB, sampler,(int2)(x-1,y-1));
pixel[1][2]=read_imagef(srcRGB, sampler,(int2)(x-1,y ));
pixel[2][0]=read_imagef(srcRGB, sampler,(int2)(x ,y-2));
pixel[2][1]=read_imagef(srcRGB, sampler,(int2)(x ,y-1));
}
else if(localY==LOCAL_SIZE_Y-1)
{
pixel[0][LOCAL_SIZE_Y-1+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y ));
pixel[0][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x-2,y+1));
pixel[0][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y+2));
pixel[1][LOCAL_SIZE_Y-1+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y ));
pixel[1][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x-1,y+1));
pixel[1][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y+2));
pixel[2][LOCAL_SIZE_Y +2]=read_imagef(srcRGB, sampler,(int2)(x ,y+1));
pixel[2][LOCAL_SIZE_Y+1+2]=read_imagef(srcRGB, sampler,(int2)(x ,y+2));
}
else
{
pixel[0][localY+2]=read_imagef(srcRGB, sampler,(int2)(x-2,y));
pixel[1][localY+2]=read_imagef(srcRGB, sampler,(int2)(x-1,y));
}
}
else if(localX==LOCAL_SIZE_X-1)
{
if(localY==0)
{
pixel[LOCAL_SIZE_X-1+2+0][0]=read_imagef(srcRGB, sampler,(int2)(x ,y-2));
pixel[LOCAL_SIZE_X-1+2+0][1]=read_imagef(srcRGB, sampler,(int2)(x ,y-1));
//pixel[LOCAL_SIZE_X-1+2+0][2]=read_imagef(srcRGB, sampler,(int2)(x ,y));
pixel[LOCAL_SIZE_X-1+2+1][0]=read_imagef(srcRGB, sampler,(int2)(x+1,y-2));
pixel[LOCAL_SIZE_X-1+2+1][1]=read_imagef(srcRGB, sampler,(int2)(x+1,y-1));
pixel[LOCAL_SIZE_X-1+2+1][2]=read_imagef(srcRGB, sampler,(int2)(x+1,y ));
pixel[LOCAL_SIZE_X-1+2+2][0]=read_imagef(srcRGB, sampler,(int2)(x+2,y-2));
pixel[LOCAL_SIZE_X-1+2+2][1]=read_imagef(srcRGB, sampler,(int2)(x+2,y-1));
pixel[LOCAL_SIZE_X-1+2+2][2]=read_imagef(srcRGB, sampler,(int2)(x+2,y ));
}
else if(localY==LOCAL_SIZE_Y-1)
{
// pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x ,y ));
pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x ,y+1));
pixel[LOCAL_SIZE_X-1+2+0][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x ,y+2));
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x+1,y ));
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x+1,y+1));
pixel[LOCAL_SIZE_X-1+2+1][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x+1,y+2));
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+0]=read_imagef(srcRGB, sampler,(int2)(x+2,y ));
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB, sampler,(int2)(x+2,y+1));
pixel[LOCAL_SIZE_X-1+2+2][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB, sampler,(int2)(x+2,y+2));
}
else
{
pixel[LOCAL_SIZE_X-1+2+1][localY+2]=read_imagef(srcRGB, sampler,(int2)(x+1,y ));
pixel[LOCAL_SIZE_X-1+2+2][localY+2]=read_imagef(srcRGB, sampler,(int2)(x+2,y ));
}
}
else if(localY==0)
{
pixel[localX+2][0]=read_imagef(srcRGB,sampler,(int2)(x,y-2));
pixel[localX+2][1]=read_imagef(srcRGB,sampler,(int2)(x,y-1));
}
else if(localY==LOCAL_SIZE_Y-1)
{
pixel[localX+2][LOCAL_SIZE_Y-1+2+1]=read_imagef(srcRGB,sampler,(int2)(x,y+1));
pixel[localX+2][LOCAL_SIZE_Y-1+2+2]=read_imagef(srcRGB,sampler,(int2)(x,y+2));
}
}else{
line=read_imagef(srcRGB, sampler,(int2)(x,y));
}
barrier(CLK_LOCAL_MEM_FENCE);
if (interior) {
#pragma unroll
for(i=0;i<5;i++)
{
#pragma unroll
for(j=0;j<5;j++)
{
delta=pown(pixel[localX+i][localY+j].x-pixel[localX+2][localY+2].x,2) +
pown(pixel[localX+i][localY+j].y-pixel[localX+2][localY+2].y,2) +
pown(pixel[localX+i][localY+j].z-pixel[localX+2][localY+2].z,2);
H = (exp(-(delta/sigma_r)))*gausssingle[i*5+j];
normF+=H;
line.x+=pixel[localX+i][localY+j].x*H;
line.y+=pixel[localX+i][localY+j].y*H;
line.z+=pixel[localX+i][localY+j].z*H;
}
}
line.x=line.x/normF;
line.y=line.y/normF;
line.z=line.z/normF;
}
write_imagef(dstRGB,(int2)(x,y),line);
}
/*
* function: kernel_dpc
* defect pixel correction on bayer data input
* input: image2d_t as read only
* output: image2d_t as write only
* gr_threshold: GR threshold of defect pixel correction
* r_threshold: R threshold of defect pixel correction
* b_threshold: B threshold of defect pixel correction
* gb_threshold: GB threshold of defect pixel correction
* param:
*/
__kernel void kernel_dpc (__read_only image2d_t input,
__write_only image2d_t output,
float gr_threshold, float r_threshold,
float b_threshold, float gb_threshold)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 p[9];
p[0] = read_imagef(input, sampler, (int2)(x - 2, y - 2));
p[1] = read_imagef(input, sampler, (int2)(x, y - 2));
p[2] = read_imagef(input, sampler, (int2)(x + 2, y - 2));
p[3] = read_imagef(input, sampler, (int2)(x - 2, y));
p[4] = read_imagef(input, sampler, (int2)(x, y));
p[5] = read_imagef(input, sampler, (int2)(x + 2, y));
p[6] = read_imagef(input, sampler, (int2)(x - 2, y + 2));
p[7] = read_imagef(input, sampler, (int2)(x, y + 2));
p[8] = read_imagef(input, sampler, (int2)(x + 2, y + 2));
float aveVer = (p[1].x + p[7].x) / 2;
float aveHor = (p[3].x + p[5].x) / 2;
float avePosDia = (p[0].x + p[8].x) / 2;
float aveNegDia = (p[2].x + p[6].x) / 2;
float aveMin, aveMax;
if (aveVer > aveHor) {
aveMin = aveHor;
aveMax = aveVer;
}
else {
aveMin = aveVer;
aveMax = aveHor;
}
if (avePosDia < aveMin)
aveMin = avePosDia;
else if (avePosDia > aveMax)
aveMax = avePosDia;
if (aveNegDia < aveMin)
aveMin = aveNegDia;
else if (aveNegDia > aveMax)
aveMax = aveNegDia;
float edgeVer = p[4].x - aveVer;
float edgeHor = p[4].x - aveHor;
float edgeNeighbourVer = (p[3].x + p[5].x - (p[0].x + p[2].x + p[6].x + p[8].x) / 2) / 2;
float edgeNeighbourHor = (p[1].x + p[7].x - (p[0].x + p[2].x + p[6].x + p[8].x) / 2) / 2;
float threshold;
if (x % 2 == 0)
threshold = (y % 2 == 0) ? gr_threshold : b_threshold;
else
threshold = (y % 2 == 0) ? r_threshold : gb_threshold;
float4 pixelOut;
pixelOut.x = p[4].x;
pixelOut.y = p[4].y;
pixelOut.z = p[4].z;
pixelOut.w = p[4].w;
if ((edgeVer > edgeNeighbourVer) && (edgeHor > edgeNeighbourHor)) {
if ((p[4].x - aveMax) > threshold) {
pixelOut.x = aveMax;
}
}
if ((edgeVer < edgeNeighbourVer) && (edgeHor < edgeNeighbourHor)) {
if ((aveMin - p[4].x) > threshold) {
pixelOut.x = aveMin;
}
}
write_imagef (output, (int2)(x, y), pixelOut);
}
/*
* function: kernel_ee
* input: image2d_t as read only
* output: image2d_t as write only
* ee_config: Edge enhancement configuration
*/
typedef struct
{
float ee_gain;
float ee_threshold;
float nr_gain;
} CLEeConfig;
__constant float lv[25] = {0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, -1.0, 0.0, 0.0,
-1.0, -14.0, 32.0, -14.0, -1.0,
0.0, 0.0, -1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0
};
__constant float lh[25] = {0.0, 0.0, -1.0, 0.0, 0.0,
0.0, 0.0, -14.0, 0.0, 0.0,
0.0, -1.0, 32.0, -1.0, 0.0,
0.0, 0.0, -14.0, 0.0, 0.0,
0.0, 0.0, -1.0, 0.0, 0.0
};
__constant float la[25] = {0.0, 0.0, -2.0, 0.0, 0.0,
0.0, -2.0, -2.0, -2.0, 0.0,
-2.0, -2.0, 24.0, -2.0, -2.0,
0.0, -2.0, -2.0, -2.0, 0.0,
0.0, 0.0, -2.0, 0.0, 0.0
};
__constant float na[25] = { -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, 16.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0
};
__kernel void kernel_ee (__read_only image2d_t input, __write_only image2d_t output, uint vertical_offset_in, uint vertical_offset_out, CLEeConfig ee_config)
{
int x = get_global_id (0);
int y = get_global_id (1);
int X = get_global_size(0);
int Y = get_global_size(1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 y_in, y_out, uv_in;
float4 a[5], b[5], c[5], d[5], e[5];
// cpy UV
if(y % 2 == 0) {
uv_in = read_imagef(input, sampler, (int2)(x, y / 2 + vertical_offset_in));
write_imagef(output, (int2)(x, y / 2 + vertical_offset_out), uv_in);
}
a[0] = read_imagef(input, sampler, (int2)(x - 2, y - 2));
a[1] = read_imagef(input, sampler, (int2)(x - 1, y - 2));
a[2] = read_imagef(input, sampler, (int2)(x, y - 2));
a[3] = read_imagef(input, sampler, (int2)(x + 1, y - 2));
a[4] = read_imagef(input, sampler, (int2)(x + 2, y - 2));
b[0] = read_imagef(input, sampler, (int2)(x - 2, y - 1));
b[1] = read_imagef(input, sampler, (int2)(x - 1, y - 1));
b[2] = read_imagef(input, sampler, (int2)(x, y - 1));
b[3] = read_imagef(input, sampler, (int2)(x + 1, y - 1));
b[4] = read_imagef(input, sampler, (int2)(x + 2, y - 1));
c[0] = read_imagef(input, sampler, (int2)(x - 2, y));
c[1] = read_imagef(input, sampler, (int2)(x - 1, y));
c[2] = read_imagef(input, sampler, (int2)(x, y));
c[3] = read_imagef(input, sampler, (int2)(x + 1, y));
c[4] = read_imagef(input, sampler, (int2)(x + 2, y));
d[0] = read_imagef(input, sampler, (int2)(x - 2, y + 1));
d[1] = read_imagef(input, sampler, (int2)(x - 1, y + 1));
d[2] = read_imagef(input, sampler, (int2)(x, y + 1));
d[3] = read_imagef(input, sampler, (int2)(x + 1, y + 1));
d[4] = read_imagef(input, sampler, (int2)(x + 2, y + 1));
e[0] = read_imagef(input, sampler, (int2)(x - 2, y + 2));
e[1] = read_imagef(input, sampler, (int2)(x - 1, y + 2));
e[2] = read_imagef(input, sampler, (int2)(x, y + 2));
e[3] = read_imagef(input, sampler, (int2)(x + 1, y + 2));
e[4] = read_imagef(input, sampler, (int2)(x + 2, y + 2));
float eV = (a[0].x * lv[0] + a[1].x * lv[1] + a[2].x * lv[2] + a[3].x * lv[3] + a[4].x * lv[4]
+ b[0].x * lv[5] + b[1].x * lv[6] + b[2].x * lv[7] + b[3].x * lv[8] + b[4].x * lv[9]
+ c[0].x * lv[10] + c[1].x * lv[11] + c[2].x * lv[12] + c[3].x * lv[13] + c[4].x * lv[14]
+ d[0].x * lv[15] + d[1].x * lv[16] + d[2].x * lv[17] + d[3].x * lv[18] + d[4].x * lv[19]
+ e[0].x * lv[20] + e[1].x * lv[21] + e[2].x * lv[22] + e[3].x * lv[23] + e[4].x * lv[24]) * 255.0;
float eH = (a[0].x * lh[0] + a[1].x * lh[1] + a[2].x * lh[2] + a[3].x * lh[3] + a[4].x * lh[4]
+ b[0].x * lh[5] + b[1].x * lh[6] + b[2].x * lh[7] + b[3].x * lh[8] + b[4].x * lh[9]
+ c[0].x * lh[10] + c[1].x * lh[11] + c[2].x * lh[12] + c[3].x * lh[13] + c[4].x * lh[14]
+ d[0].x * lh[15] + d[1].x * lh[16] + d[2].x * lh[17] + d[3].x * lh[18] + d[4].x * lh[19]
+ e[0].x * lh[20] + e[1].x * lh[21] + e[2].x * lh[22] + e[3].x * lh[23] + e[4].x * lh[24]) * 255.0;
float eA = (a[0].x * la[0] + a[1].x * la[1] + a[2].x * la[2] + a[3].x * la[3] + a[4].x * la[4]
+ b[0].x * la[5] + b[1].x * la[6] + b[2].x * la[7] + b[3].x * la[8] + b[4].x * la[9]
+ c[0].x * la[10] + c[1].x * la[11] + c[2].x * la[12] + c[3].x * la[13] + c[4].x * la[14]
+ d[0].x * la[15] + d[1].x * la[16] + d[2].x * la[17] + d[3].x * la[18] + d[4].x * la[19]
+ e[0].x * la[20] + e[1].x * la[21] + e[2].x * la[22] + e[3].x * la[23] + e[4].x * la[24]) * 255.0;
float nA = (a[0].x * na[0] + a[1].x * na[1] + a[2].x * na[2] + a[3].x * na[3] + a[4].x * na[4]
+ b[0].x * na[5] + b[1].x * na[6] + b[2].x * na[7] + b[3].x * na[8] + b[4].x * na[9]
+ c[0].x * na[10] + c[1].x * na[11] + c[2].x * na[12] + c[3].x * na[13] + c[4].x * na[14]
+ d[0].x * na[15] + d[1].x * na[16] + d[2].x * na[17] + d[3].x * na[18] + d[4].x * na[19]
+ e[0].x * na[20] + e[1].x * na[21] + e[2].x * na[22] + e[3].x * na[23] + e[4].x * na[24]) * 255.0;
float nV = eH;
float nH = eV;
float dV = (fabs(2.0 * b[1].x - a[1].x - c[1].x) + fabs(2.0 * b[2].x - a[2].x - c[2].x) + fabs(2.0 * b[3].x - a[3].x - c[3].x) + fabs(2.0 * c[1].x - b[1].x - d[1].x) + fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[3].x - b[3].x - d[3].x) + fabs(2.0 * d[1].x - c[1].x - e[1].x) + fabs(2.0 * d[2].x - c[2].x - e[2].x) + fabs(2.0 * d[3].x - c[3].x - e[3].x)) * 255.0;
float dH = (fabs(2.0 * b[1].x - b[0].x - b[2].x) + fabs(2.0 * b[2].x - b[1].x - b[3].x) + fabs(2.0 * b[3].x - b[2].x - b[4].x) + fabs(2.0 * c[1].x - c[0].x - c[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[3].x - c[2].x - c[4].x) + fabs(2.0 * d[1].x - d[0].x - d[2].x) + fabs(2.0 * d[2].x - d[1].x - d[3].x) + fabs(2.0 * d[3].x - d[2].x - d[4].x)) * 255.0;
float dA = (fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[2].x - b[1].x - d[3].x) + fabs(2.0 * c[2].x - b[3].x - d[1].x) + fabs(2.0 * c[2].x - a[0].x - e[4].x) + fabs(2.0 * c[2].x - a[4].x - e[0].x) + fabs(2.0 * c[2].x - c[0].x - c[4].x) + fabs(2.0 * c[2].x - a[2].x - e[2].x) + fabs(2.0 * c[2].x - (b[0].x + d[0].x + b[4].x + d[4].x) / 2.0)) * 255.0;
float edge = dH < (dV < dA ? dV : dA) ? eH : (dV < dA ? eV : eA);
float noise = dH < (dV < dA ? dV : dA) ? nH : (dV < dA ? nV : nA);
float dir = dH < (dV < dA ? dA : dV) ? (dV < dA ? dA : dV) : dH;
noise = noise * ee_config.nr_gain / 16.0;
edge = edge * ee_config.ee_gain / 16.0;
y_out.x = dir > ee_config.ee_threshold ? (c[2].x * 255.0 + edge - noise) / 255.0 : c[2].x;
y_out.y = 0.0;
y_out.z = 0.0;
y_out.w = 1.0;
write_imagef(output, (int2)(x, y), y_out);
}
/*
* function: kernel_gamma
* input: image2d_t as read only
* output: image2d_t as write only
* table: gamma table.
*/
//TODO: linear scale should be a problem
__kernel void kernel_gamma (__read_only image2d_t input, __write_only image2d_t output, __global float *table)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
float4 pixel_in[8], pixel_out[8];
int i=0,j=0;
#pragma unroll
for(j=0;j<2;j++) {
#pragma unroll
for(i=0;i<4;i++) {
pixel_in[j*4 + i] = read_imagef(input, sampler,(int2)(4*x + i, 2*y + j));
pixel_out[j*4 + i].x = table[convert_int(pixel_in[j*4 + i].x * 255.0)] / 255.0;
pixel_out[j*4 + i].y = table[convert_int(pixel_in[j*4 + i].y * 255.0)] / 255.0;
pixel_out[j*4 + i].z = table[convert_int(pixel_in[j*4 + i].z * 255.0)] / 255.0;
pixel_out[j*4 + i].w = 0.0;
write_imagef(output, (int2)(4*x + i, 2*y + j), pixel_out[j*4 + i]);
}
}
}
/*
* function: kernel_hdr_lab
* sample code of default kernel arguments
* input: image2d_t as read only
* output: image2d_t as write only
*/
__constant float table[] = {4.3287616, 5.9319224, 7.1324205, 8.1288157, 8.9965763, 9.7739191, 10.483327, 11.139331, 11.751958, 12.328467, 12.874310, 13.393700, 13.889977, 14.365837, 14.823494, 15.264792, 15.691289, 16.104307, 16.504990, 16.894327, 17.273184, 17.642323, 18.002419, 18.354071, 18.697817, 19.034143, 19.363485, 19.686239, 20.002764, 20.313389, 20.618416, 20.918123, 21.212763, 21.502573, 21.787767, 22.068552, 22.345116, 22.617630, 22.886259, 23.151157, 23.412468, 23.670324, 23.924854, 24.176174, 24.424398, 24.669630, 24.911972, 25.151518, 25.388355, 25.622572, 25.854246, 26.083458, 26.310276, 26.534771, 26.757010, 26.977057, 27.194969, 27.410807, 27.624624, 27.836473, 28.046406, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.254467, 28.377895, 28.567524, 28.755781, 28.942692, 29.128284, 29.312582, 29.495617, 29.677410, 29.857986, 30.037369, 30.215582, 30.392645, 30.568581, 30.743410, 30.917152, 31.089828, 31.261454, 31.432049, 31.601633, 31.770222, 31.937832, 32.104481, 32.270180, 32.434952, 32.598808, 32.761761, 32.923832, 33.085026, 33.245361, 33.404850, 33.563507, 33.721340, 33.878368, 34.034599, 34.190044, 34.344715, 34.498627, 34.651783, 34.804199, 34.955887, 35.106853, 35.257107, 35.406662, 35.555523, 35.703705, 35.851208, 35.998051, 36.144238, 36.289776, 36.434673, 36.578941, 36.722588, 36.865616, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.008038, 37.013512, 37.148350, 37.282703, 37.416573, 37.549969, 37.682888, 37.815342, 37.947330, 38.078865, 38.209946, 38.340580, 38.470768, 38.600517, 38.729836, 38.858719, 38.987175, 39.115215, 39.242832, 39.370041, 39.496838, 39.623226, 39.749214, 39.874802, 40, 40.124805, 40.249222, 40.373260, 40.496914, 40.620193, 40.743095, 40.865631, 40.987804, 41.109608, 41.231056, 41.352146, 41.472885, 41.593269, 41.713306, 41.833000, 41.952354, 42.071369, 42.190048, 42.308392, 42.426407, 42.544094, 42.661457, 42.778500, 42.895222, 43.011627, 43.127716, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.243496, 43.315910, 43.427536, 43.538902, 43.650013, 43.760872, 43.871479, 43.981831, 44.091938, 44.201797, 44.311413, 44.420780, 44.529911, 44.638798, 44.747448, 44.855862, 44.964039, 45.071983, 45.179695, 45.287178, 45.394428, 45.501453, 45.608253, 45.714825, 45.821178, 45.927307, 46.033215, 46.138905, 46.244377, 46.349632, 46.454674, 46.559505, 46.664120, 46.768528, 46.872723, 46.976711, 47.080494, 47.184067, 47.287441, 47.390610, 47.493576, 47.596344, 47.698910, 47.801281, 47.903454, 48.005428, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.107212, 48.116840, 48.216503, 48.316002, 48.415333, 48.514507, 48.613514, 48.712364, 48.811050, 48.909580, 49.007950, 49.106163, 49.204220, 49.302116, 49.399860, 49.497452, 49.594887, 49.692173, 49.789303, 49.886284, 49.983109, 50.079788, 50.176319, 50.272701, 50.368935, 50.465023, 50.560966, 50.656761, 50.752411, 50.847919, 50.943279, 51.038502, 51.133583, 51.228519, 51.323318, 51.417973, 51.512493, 51.606873, 51.701115, 51.795219, 51.889191, 51.983021, 52.076717, 52.170280, 52.263706, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.357002, 52.370274, 52.462727, 52.555069, 52.647293, 52.739407, 52.831406, 52.923294, 53.015072, 53.106739, 53.198296, 53.289738, 53.381077, 53.472301, 53.563419, 53.654430, 53.745327, 53.836124, 53.926811, 54.017391, 54.107864, 54.198231, 54.288494, 54.378651, 54.468708, 54.558655, 54.648502, 54.738243, 54.827885, 54.917419, 55.006851, 55.096188, 55.185417, 55.274551, 55.363579, 55.452511, 55.541340, 55.630070, 55.718704, 55.807240, 55.895676, 55.984013, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.072254, 56.136337, 56.224670, 56.312920, 56.401089, 56.489174, 56.577179, 56.665100, 56.752941, 56.840698, 56.928371, 57.015968, 57.103481, 57.190918, 57.278271, 57.365547, 57.452744, 57.539856, 57.626896, 57.713852, 57.800732, 57.887531, 57.974255, 58.060902, 58.147465, 58.233959, 58.320370, 58.406708, 58.492970, 58.579155, 58.665260, 58.751289, 58.837250, 58.923130, 59.008938, 59.094666, 59.180325, 59.265907, 59.351414, 59.436852, 59.522209, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.607498, 59.676125, 59.762394, 59.848598, 59.934742, 60.020828, 60.106846, 60.192806, 60.278702, 60.364540, 60.450314, 60.536030, 60.621685, 60.707275, 60.792812, 60.878284, 60.963699, 61.049049, 61.134342, 61.219578, 61.304752, 61.389870, 61.474926, 61.559925, 61.644867, 61.729748, 61.814568, 61.899334, 61.984039, 62.068687, 62.153282, 62.237812, 62.322292, 62.406708, 62.491070, 62.575375, 62.659622, 62.743816, 62.827950, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.912029, 62.940441, 63.026379, 63.112267, 63.198112, 63.283909, 63.369659, 63.455360, 63.541019, 63.626637, 63.712200, 63.797718, 63.883190, 63.968624, 64.054001, 64.139343, 64.224632, 64.309875, 64.395081, 64.480240, 64.565346, 64.650414, 64.735435, 64.820412, 64.905342, 64.990234, 65.075073, 65.159874, 65.244629, 65.329346, 65.414009, 65.498634, 65.583214, 65.667747, 65.752243, 65.836693, 65.921097, 66.005463, 66.089783, 66.174057, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.258286, 66.291786, 66.378784, 66.465752, 66.552689, 66.639587, 66.726456, 66.813293, 66.900085, 66.986855, 67.073586, 67.160286, 67.246948, 67.333572, 67.420174, 67.506737, 67.593269, 67.679764, 67.766228, 67.852654, 67.939056, 68.025421, 68.111755, 68.198051, 68.284325, 68.370560, 68.456757, 68.542923, 68.629059, 68.715164, 68.801239, 68.887276, 68.973282, 69.059258, 69.145203, 69.231110, 69.316986, 69.402832, 69.488655, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.574432, 69.659309, 69.748878, 69.838425, 69.927956, 70.017456, 70.106934, 70.196388, 70.285820, 70.375237, 70.464622, 70.553986, 70.643333, 70.732658, 70.821953, 70.911224, 71.000481, 71.089706, 71.178917, 71.268105, 71.357262, 71.446404, 71.535530, 71.624619, 71.713692, 71.802750, 71.891777, 71.980789, 72.069771, 72.158737, 72.247673, 72.336594, 72.425499, 72.514374, 72.603226, 72.692062, 72.780876, 72.869659, 72.958427, 73.047173, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.135902, 73.151604, 73.245392, 73.339172, 73.432938, 73.526695, 73.620430, 73.714165, 73.807884, 73.901588, 73.995285, 74.088966, 74.182640, 74.276299, 74.369942, 74.463577, 74.557198, 74.650818, 74.744415, 74.838005, 74.931580, 75.025139, 75.118698, 75.212242, 75.305771, 75.399284, 75.492798, 75.586296, 75.679771, 75.773247, 75.866707, 75.960159, 76.053596, 76.147018, 76.240433, 76.333839, 76.427231, 76.520615, 76.613983, 76.707336, 76.800690, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.894020, 76.900002, 77, 77.100006, 77.200005, 77.300003, 77.400002, 77.500000, 77.600006, 77.700005, 77.800003, 77.900002, 78, 78.099998, 78.199997, 78.300003, 78.400002, 78.500000, 78.599998, 78.699997, 78.799995, 78.899994, 79, 79.099998, 79.199997, 79.299995, 79.400002, 79.500000, 79.599998, 79.699997, 79.799995, 79.900002, 80, 80.099998, 80.199997, 80.299995, 80.400002, 80.500000, 80.599998, 80.699997, 80.800003, 80.900002, 81, 81.099998, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.199997, 81.299294, 81.408134, 81.516991, 81.625862, 81.734749, 81.843643, 81.952560, 82.061493, 82.170433, 82.279388, 82.388359, 82.497353, 82.606354, 82.715363, 82.824394, 82.933441, 83.042496, 83.151573, 83.260658, 83.369759, 83.478874, 83.588005, 83.697151, 83.806305, 83.915474, 84.024666, 84.133865, 84.243080, 84.352310, 84.461555, 84.570816, 84.680084, 84.789368, 84.898666, 85.007988, 85.117310, 85.226654, 85.336006, 85.445374, 85.554764, 85.664162, 85.773575, 85.882996, 85.992439, 86.101898, 86.211365, 86.320839, 86.430336, 86.539848, 86.649376, 86.758911, 86.868454, 86.978027, 87.087608, 87.197197, 87.306801, 87.416420, 87.526054, 87.635704, 87.745361, 87.855034, 87.964722, 88.074432, 88.184143, 88.293877, 88.403618, 88.513374, 88.623146, 88.732933, 88.842728, 88.952538, 89.062363, 89.172203, 89.282051, 89.391914, 89.501793, 89.611694, 89.721596, 89.831512, 89.941444, 90.051399, 90.161354, 90.271324, 90.381310, 90.491310, 90.601326, 90.711349, 90.821388, 90.931442, 91.041512, 91.151596, 91.261681, 91.371788, 91.481911, 91.592049, 91.702187, 91.812347, 91.922516, 92.032707, 92.142906, 92.253120, 92.363342, 92.473579, 92.583839, 92.694099, 92.804375, 92.914665, 93.024971, 93.135292, 93.245621, 93.355965, 93.466316, 93.576683, 93.687073, 93.797470, 93.907875, 94.018295, 94.128731, 94.239182, 94.349640, 94.460106, 94.570595, 94.681099, 94.791611, 94.902130, 95.012665, 95.123215, 95.233788, 95.344360, 95.454948, 95.565552, 95.676170, 95.786797, 95.897430, 96.008087, 96.118752, 96.229431, 96.340126, 96.450829, 96.561539, 96.672272, 96.783012, 96.893768, 97.004532, 97.115311, 97.226112, 97.336914, 97.447731, 97.558563, 97.669403, 97.780266, 97.891129, 98.002007, 98.112900, 98.223816, 98.334732, 98.445656, 98.556602, 98.667557, 98.778526, 98.889511, 99.000504, 99.111504, 99.222519, 99.333557, 99.444603, 99.555656, 99.666718, 99.777809, 99.888893, 100};
static float fun(float in)
{
return in > 0.008856 ? (native_powr(in, 1.0 / 3)) : (7.787 * in + 16.0 / 116);
}
__kernel void kernel_hdr_lab (__read_only image2d_t input, __write_only image2d_t output)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
float4 pixel_in, pixel_out;
unsigned int table_id;
pixel_in = read_imagef(input, sampler, (int2)(x, y));
float X, Y, Z, L, a, b;
X = 0.412453 * pixel_in.x + 0.357580 * pixel_in.y + 0.180423 * pixel_in.z;
Y = 0.212671 * pixel_in.x + 0.715160 * pixel_in.y + 0.072169 * pixel_in.z;
Z = 0.019334 * pixel_in.x + 0.119193 * pixel_in.y + 0.950227 * pixel_in.z;
L = Y > 0.008856 ? (116.0 * native_powr(Y, 1.0 / 3) - 16.0) : 903.3 * Y;
a = 500 * (fun(X) - fun(Y));
b = 200 * (fun(Y) - fun(Z));
table_id = (unsigned int)(L * 10) < 1000 ? (unsigned int)(L * 10) : 999;
L = table[table_id];
float fX, fY, fZ;
Y = (L / 116.0) * (L / 116.0) * (L / 116.0);
Y = Y < 0.008856 ? L / 903.3 : Y;
fY = Y > 0.008856 ? (native_powr(Y, 1.0 / 3.0)) : (7.787 * Y + 16.0 / 116.0);
fX = a / 500.0 + fY;
X = fX * fX * fX;
X = X < 0.008865 ? ((fX - 16.0 / 116.0) / 7.787) : X;
fZ = fY - b / 200.0;
Z = fZ * fZ * fZ;
Z = Z < 0.008865 ? ((fZ - 16.0 / 116.0) / 7.787) : Z;
pixel_out.x = 3.240479 * X - 1.537150 * Y - 0.498535 * Z;
pixel_out.y = -0.969256 * X + 1.875992 * Y + 0.041556 * Z;
pixel_out.z = 0.055648 * X - 0.204043 * Y + 1.204043 * Z;
pixel_out.w = pixel_in.w;
write_imagef(output, (int2)(x, y), pixel_out);
}
/*
* function: kernel_hdr_rgb
* sample code of default kernel arguments
* input: image2d_t as read only
* output: image2d_t as write only
*/
__constant int HDRTable[1024] = {104, 106, 108, 109, 111, 113, 114, 116, 118, 119, 121, 123, 125, 126, 128, 130, 131, 133, 135, 136, 138, 139, 141, 143, 144, 146, 148, 149, 151, 152, 154, 156, 157, 159, 160, 162, 164, 165, 167, 168, 170, 171, 173, 174, 176, 177, 179, 181, 182, 184, 185, 187, 188, 190, 191, 193, 194, 195, 197, 198, 200, 201, 203, 204, 206, 207, 209, 210, 211, 213, 214, 216, 217, 219, 220, 221, 223, 224, 226, 227, 228, 230, 231, 232, 234, 235, 236, 238, 239, 241, 242, 243, 245, 246, 247, 248, 250, 251, 252, 254, 255, 256, 258, 259, 260, 261, 263, 264, 265, 267, 268, 269, 270, 272, 273, 274, 275, 277, 278, 279, 280, 281, 283, 284, 285, 286, 287, 289, 290, 291, 292, 293, 295, 296, 297, 298, 299, 300, 302, 303, 304, 305, 306, 307, 308, 310, 311, 312, 313, 314, 315, 316, 317, 318, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 389, 390, 391, 392, 393, 394, 395, 395, 396, 397, 398, 399, 400, 401, 401, 402, 403, 404, 405, 406, 406, 407, 408, 409, 410, 410, 411, 412, 413, 414, 415, 415, 416, 417, 418, 418, 419, 420, 421, 422, 422, 423, 424, 425, 425, 426, 427, 428, 429, 429, 430, 431, 432, 432, 433, 434, 435, 435, 436, 437, 437, 438, 439, 440, 440, 441, 442, 442, 443, 444, 445, 445, 446, 447, 447, 448, 449, 450, 450, 451, 452, 452, 453, 454, 454, 455, 456, 456, 457, 458, 458, 459, 460, 460, 461, 462, 462, 463, 464, 464, 465, 466, 466, 467, 468, 468, 469, 469, 470, 471, 471, 472, 473, 473, 474, 475, 475, 476, 476, 477, 478, 478, 479, 479, 480, 481, 481, 482, 483, 483, 484, 484, 485, 486, 486, 487, 487, 488, 488, 489, 490, 490, 491, 491, 492, 493, 493, 494, 494, 495, 495, 496, 497, 497, 498, 498, 499, 499, 500, 501, 501, 502, 502, 503, 503, 504, 504, 505, 506, 506, 507, 507, 508, 508, 509, 509, 510, 510, 511, 512, 512, 513, 513, 514, 514, 515, 515, 516, 516, 517, 517, 518, 518, 519, 519, 520, 521, 521, 522, 522, 523, 523, 524, 524, 525, 525, 526, 526, 527, 527, 528, 528, 529, 529, 530, 530, 531, 531, 532, 532, 533, 533, 534, 534, 535, 535, 536, 536, 537, 537, 538, 538, 539, 539, 540, 540, 541, 541, 542, 542, 543, 543, 544, 544, 545, 545, 546, 546, 547, 547, 548, 548, 549, 549, 550, 550, 551, 551, 552, 552, 553, 553, 554, 554, 554, 555, 555, 556, 556, 557, 557, 558, 558, 559, 559, 560, 560, 561, 561, 562, 562, 563, 563, 564, 564, 565, 565, 566, 566, 567, 567, 568, 568, 568, 569, 569, 570, 570, 571, 571, 572, 572, 573, 573, 574, 574, 575, 575, 576, 576, 577, 577, 578, 578, 579, 579, 580, 580, 581, 581, 582, 582, 582, 583, 583, 584, 584, 585, 585, 586, 586, 587, 587, 588, 588, 589, 589, 590, 590, 591, 591, 592, 592, 593, 593, 594, 594, 595, 595, 596, 596, 597, 597, 598, 598, 599, 599, 600, 600, 601, 601, 602, 602, 603, 603, 604, 604, 605, 605, 606, 606, 607, 607, 608, 608, 609, 609, 610, 611, 611, 612, 612, 613, 613, 614, 614, 615, 615, 616, 616, 617, 617, 618, 618, 619, 619, 620, 621, 621, 622, 622, 623, 623, 624, 624, 625, 625, 626, 627, 627, 628, 628, 629, 629, 630, 630, 631, 632, 632, 633, 633, 634, 634, 635, 636, 636, 637, 637, 638, 638, 639, 640, 640, 641, 641, 642, 642, 643, 644, 644, 645, 645, 646, 647, 647, 648, 648, 649, 650, 650, 651, 651, 652, 653, 653, 654, 654, 655, 656, 656, 657, 658, 658, 659, 659, 660, 661, 661, 662, 663, 663, 664, 665, 665, 666, 666, 667, 668, 668, 669, 670, 670, 671, 672, 672, 673, 674, 674, 675, 676, 676, 677, 678, 678, 679, 680, 680, 681, 682, 683, 683, 684, 685, 685, 686, 687, 687, 688, 689, 690, 690, 691, 692, 692, 693, 694, 695, 695, 696, 697, 698, 698, 699, 700, 701, 701, 702, 703, 704, 704, 705, 706, 707, 707, 708, 709, 710, 710, 711, 712, 713, 714, 714, 715, 716, 717, 718, 718, 719, 720, 721, 722, 722, 723, 724, 725, 726, 726, 727, 728, 729, 730, 731, 731, 732, 733, 734, 735, 736, 737, 737, 738, 739, 740, 741, 742, 743, 744, 744, 745, 746, 747, 748, 749, 750, 751, 752, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 814, 815, 816, 817, 818, 819, 820, 821, 822, 824, 825, 826, 827, 828, 829, 830, 832, 833, 834, 835, 836, 837, 839, 840, 841, 842, 843, 845, 846, 847, 848, 849, 851, 852, 853, 854, 855, 857, 858, 859, 860, 862, 863, 864, 865, 867, 868, 869, 870, 872, 873, 874, 876, 877, 878, 880, 881, 882, 883, 885, 886, 887, 889, 890, 891, 893, 894, 895, 897, 898, 900, 901, 902, 904, 905, 906, 908, 909, 911, 912, 913, 915, 916, 918, 919, 920, 922, 923, 925, 926, 928, 929, 931, 932, 933, 935, 936, 938, 939, 941, 942, 944, 945, 947, 948, 950, 951, 953, 954, 956, 957, 959, 961, 962, 964, 965, 967, 968, 970, 972, 973, 975, 976, 978, 979, 981, 983, 984, 986, 988, 989, 991, 992, 994, 996, 997, 999, 1001, 1002, 1004, 1006, 1007, 1009, 1011, 1012, 1014, 1016, 1017, 1019, 1021, 1023};
__kernel void kernel_hdr_rgb (__read_only image2d_t input, __write_only image2d_t output)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
int2 pos = (int2)(x, y);
float4 pixel_in, pixel_out;
pixel_in = read_imagef(input, sampler, pos);
pixel_out.x = convert_float(HDRTable[convert_int(pixel_in.x * 1023)] / 1023.0);
pixel_out.y = convert_float(HDRTable[convert_int(pixel_in.y * 1023)] / 1023.0);
pixel_out.z = convert_float(HDRTable[convert_int(pixel_in.z * 1023)] / 1023.0);
pixel_out.w = 0.0;
write_imagef(output, pos, pixel_out);
}
/**
* \brief Image scaling kernel function.
* \param[in] input Input image object.
* \param[out] output scaled output image object.
* \param[in] output_widht: output width
* \param[in] output_height: output height
* \param[in] vertical_offset: vertical offset from y to uv
*/
//TODO: This is the most instesrting kernel, using hardware sampler to scale
__kernel void kernel_image_scaler (__read_only image2d_t input,
__write_only image2d_t output,
const uint output_widht,
const uint output_height)
{
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
float2 normCoor = convert_float2((int2)(x, y)) / (float2)(output_widht, output_height);
float4 scaled_pixel = read_imagef(input, sampler, normCoor);
write_imagef(output, (int2)(x, y), scaled_pixel);
}
/*
* function: kernel_macc
* input: image2d_t as read only
* output: image2d_t as write only
* table: macc table.
*/
unsigned int get_sector_id (float u, float v)
{
u = fabs(u) > 0.00001f ? u : 0.00001f;
float tg = v / u;
unsigned int se = tg > 1 ? (tg > 2 ? 3 : 2) : (tg > 0.5 ? 1 : 0);
unsigned int so = tg > -1 ? (tg > -0.5 ? 3 : 2) : (tg > -2 ? 1 : 0);
return tg > 0 ? (u > 0 ? se : (se + 8)) : (u > 0 ? (so + 12) : (so + 4));
}
__kernel void kernel_macc (__read_only image2d_t input, __write_only image2d_t output, __global float *table)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
float4 pixel_in[8], pixel_out[8];
float Y[8], ui[8], vi[8], uo[8], vo[8];
unsigned int table_id[8];
int i = 0, j = 0;
#pragma unroll
for(j = 0; j < 2; j++) {
#pragma unroll
for(i = 0; i < 4; i++) {
pixel_in[j * 4 + i] = read_imagef(input, sampler, (int2)(4 * x + i, 2 * y + j));
Y[j * 4 + i] = 0.3 * pixel_in[j * 4 + i].x + 0.59 * pixel_in[j * 4 + i].y + 0.11 * pixel_in[j * 4 + i].z;
ui[j * 4 + i] = 0.493 * (pixel_in[j * 4 + i].z - Y[j * 4 + i]);
vi[j * 4 + i] = 0.877 * (pixel_in[j * 4 + i].x - Y[j * 4 + i]);
table_id[j * 4 + i] = get_sector_id(ui[j * 4 + i], vi[j * 4 + i]);
uo[j * 4 + i] = ui[j * 4 + i] * table[4 * table_id[j * 4 + i]] + vi[j * 4 + i] * table[4 * table_id[j * 4 + i] + 1];
vo[j * 4 + i] = ui[j * 4 + i] * table[4 * table_id[j * 4 + i] + 2] + vi[j * 4 + i] * table[4 * table_id[j * 4 + i] + 3];
pixel_out[j * 4 + i].x = Y[j * 4 + i] + 1.14 * vo[j * 4 + i];
pixel_out[j * 4 + i].y = Y[j * 4 + i] - 0.39 * uo[j * 4 + i] - 0.58 * vo[j * 4 + i];
pixel_out[j * 4 + i].z = Y[j * 4 + i] + 2.03 * uo[j * 4 + i];
pixel_out[j * 4 + i].w = 0.0;
write_imagef(output, (int2)(4 * x + i, 2 * y + j), pixel_out[j * 4 + i]);
}
}
}
/*
* function: kernel_snr
* implementation of simple noise reduction
* input: image2d_t as read only
* output: image2d_t as write only
*/
__kernel void kernel_snr (__read_only image2d_t input, __write_only image2d_t output)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 p[9];
p[0] = read_imagef(input, sampler, (int2)(x - 1, y - 1));
p[1] = read_imagef(input, sampler, (int2)(x, y - 1));
p[2] = read_imagef(input, sampler, (int2)(x + 1, y - 1));
p[3] = read_imagef(input, sampler, (int2)(x - 1, y));
p[4] = read_imagef(input, sampler, (int2)(x, y));
p[5] = read_imagef(input, sampler, (int2)(x + 1, y));
p[6] = read_imagef(input, sampler, (int2)(x - 1, y + 1));
p[7] = read_imagef(input, sampler, (int2)(x, y + 1));
p[8] = read_imagef(input, sampler, (int2)(x + 1, y + 1));
float4 pixel_out;
pixel_out.x = (p[0].x + p[1].x + p[2].x + p[3].x + p[4].x + p[5].x + p[6].x + p[7].x + p[8].x) / 9.0f;
pixel_out.y = (p[0].y + p[1].y + p[2].y + p[3].y + p[4].y + p[5].y + p[6].y + p[7].y + p[8].y) / 9.0f;
pixel_out.z = (p[0].z + p[1].z + p[2].z + p[3].z + p[4].z + p[5].z + p[6].z + p[7].z + p[8].z) / 9.0f;
pixel_out.w = p[4].w;
write_imagef(output, (int2)(x, y), pixel_out);
}
/*
* function: kernel_tonemapping
* implementation of tone mapping
* input: image2d_t as read only
* output: image2d_t as write only
*/
#define WORK_ITEM_X_SIZE 8
#define WORK_ITEM_Y_SIZE 8
#define SHARED_PIXEL_X_SIZE 10
#define SHARED_PIXEL_Y_SIZE 10
__kernel void kernel_tonemapping (__read_only image2d_t input, __write_only image2d_t output, float y_max, float y_target, int image_height)
{
int g_id_x = get_global_id (0);
int g_id_y = get_global_id (1);
int group_id_x = get_group_id(0);
int group_id_y = get_group_id(1);
int local_id_x = get_local_id(0);
int local_id_y = get_local_id(1);
int g_size_x = get_global_size (0);
int g_size_y = get_global_size (1);
int local_index = local_id_y * WORK_ITEM_X_SIZE + local_id_x;
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__local float4 local_src_data[SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE];
float4 src_data_Gr = read_imagef (input, sampler, (int2)(g_id_x, g_id_y));
float4 src_data_R = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height));
float4 src_data_B = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 2));
float4 src_data_Gb = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 3));
float4 src_data_G = (src_data_Gr + src_data_Gb) / 2;
float4 src_y_data = 0.0f;
src_y_data = mad(src_data_R, 255.f * 0.299f, src_y_data);
src_y_data = mad(src_data_G, 255.f * 0.587f, src_y_data);
src_y_data = mad(src_data_B, 255.f * 0.114f, src_y_data);
local_src_data[(local_id_y + 1) * SHARED_PIXEL_X_SIZE + local_id_x + 1] = src_y_data;
if(local_index < SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE - WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE)
{
int target_index = local_index <= SHARED_PIXEL_X_SIZE ? local_index : (local_index <= (SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE - WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE - SHARED_PIXEL_X_SIZE) ? (local_index + WORK_ITEM_X_SIZE + (local_index - (SHARED_PIXEL_X_SIZE + 1)) / 2 * WORK_ITEM_X_SIZE) : (local_index + WORK_ITEM_X_SIZE * WORK_ITEM_Y_SIZE));
int start_x = mad24(group_id_x, WORK_ITEM_X_SIZE, -1);
int start_y = mad24(group_id_y, WORK_ITEM_Y_SIZE, -1);
int offset_x = target_index % SHARED_PIXEL_X_SIZE;
int offset_y = target_index / SHARED_PIXEL_X_SIZE;
float4 data_Gr = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y));
float4 data_R = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height));
float4 data_B = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height * 2));
float4 data_Gb = read_imagef (input, sampler, (int2)(start_x + offset_x, start_y + offset_y + image_height * 3));
float4 data_G = (data_Gr + data_Gb) / 2;
float4 y_data = 0.0f;
y_data = mad(data_R, 255.f * 0.299f, y_data);
y_data = mad(data_G, 255.f * 0.587f, y_data);
y_data = mad(data_B, 255.f * 0.114f, y_data);
local_src_data[target_index] = y_data;
}
barrier(CLK_LOCAL_MEM_FENCE);
float gaussian_table[9] = {0.075f, 0.124f, 0.075f,
0.124f, 0.204f, 0.124f,
0.075f, 0.124f, 0.075f
};
float4 src_ym_data = 0.0f;
float16 integrate_data = *((__local float16 *)(local_src_data + local_id_y * SHARED_PIXEL_X_SIZE + local_id_x));
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[0], src_ym_data);
src_ym_data = mad(integrate_data.s4567, (float4)gaussian_table[1], src_ym_data);
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[2], src_ym_data);
integrate_data = *((__local float16 *)(local_src_data + (local_id_y + 1) * SHARED_PIXEL_X_SIZE + local_id_x));
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[3], src_ym_data);
src_ym_data = mad(src_y_data, (float4)gaussian_table[4], src_ym_data);
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[5], src_ym_data);
integrate_data = *((__local float16 *)(local_src_data + (local_id_y + 2) * SHARED_PIXEL_X_SIZE + local_id_x));
src_ym_data = mad(integrate_data.s3456, (float4)gaussian_table[6], src_ym_data);
src_ym_data = mad(integrate_data.s4567, (float4)gaussian_table[7], src_ym_data);
src_ym_data = mad(integrate_data.s5678, (float4)gaussian_table[8], src_ym_data);
float4 gain = ((float4)(y_max + y_target) + src_ym_data) / (src_y_data + src_ym_data + (float4)y_target);
src_data_Gr = src_data_Gr * gain;
src_data_R = src_data_R * gain;
src_data_B = src_data_B * gain;
src_data_Gb = src_data_Gb * gain;
write_imagef(output, (int2)(g_id_x, g_id_y), src_data_Gr);
write_imagef(output, (int2)(g_id_x, g_id_y + image_height), src_data_R);
write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 2), src_data_B);
write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 3), src_data_Gb);
}
/*
* function: kernel_wb
* black level correction for sensor data input
* input: image2d_t as read only
* output: image2d_t as write only
* wb_config: white balance configuration
*/
typedef struct
{
float r_gain;
float gr_gain;
float gb_gain;
float b_gain;
} CLWBConfig;
__kernel void kernel_wb (__read_only image2d_t input,
__write_only image2d_t output,
CLWBConfig wb_config)
{
int x = get_global_id (0);
int y = get_global_id (1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
float4 Gr_in, R_in, B_in, Gb_in;
float4 Gr_out, R_out, B_out, Gb_out;
Gr_in = read_imagef(input, sampler, (int2)(2 * x, 2 * y));
R_in = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y));
B_in = read_imagef(input, sampler, (int2)(2 * x, 2 * y + 1));
Gb_in = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y + 1));
Gr_out.x = Gr_in.x * wb_config.gr_gain;
Gr_out.y = 0.0;
Gr_out.z = 0.0;
Gr_out.w = 1.0;
R_out.x = R_in.x * wb_config.r_gain;
R_out.y = 0.0;
R_out.z = 0.0;
R_out.w = 1.0;
B_out.x = B_in.x * wb_config.b_gain;
B_out.y = 0.0;
B_out.z = 0.0;
B_out.w = 1.0;
Gb_out.x = Gb_in.x * wb_config.gb_gain;
Gb_out.y = 0.0;
Gb_out.z = 0.0;
Gb_out.w = 1.0;
write_imagef(output, (int2)(2 * x, 2 * y), Gr_out);
write_imagef(output, (int2)(2 * x + 1, 2 * y), R_out);
write_imagef(output, (int2)(2 * x, 2 * y + 1), B_out);
write_imagef(output, (int2)(2 * x + 1, 2 * y + 1), Gb_out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment