CornuAmmonis/glider-distance

## glider-distance
__kernel void rd_compute(__global float4 *a_in,__global float4 *b_in,__global float4 *c_in,__global float4 *d_in,__global float4 *e_in,__global float4 *a_out,__global float4 *b_out,__global float4 *c_out,__global float4 *d_out,__global float4 *e_out)
{
    const int index_x = get_global_id(0);
    const int index_y = get_global_id(1);
    const int index_z = get_global_id(2);
    const int X = get_global_size(0);
    const int Y = get_global_size(1);
    const int Z = get_global_size(2);
    const int index_here = X*(Y*index_z + index_y) + index_x;

    float4 a = a_in[index_here];
    float4 b = b_in[index_here];
    float4 c = c_in[index_here];
    float4 d = d_in[index_here];
    float4 e = e_in[index_here];

    // compute the Laplacians of each chemical
    // 2D standard 9-point stencil: [ 1,4,1; 4,-20,4; 1,4,1 ] / 6
    const int xm1 = (index_x-1+X) & (X-1); // wrap (assumes X is a power of 2)
    const int xp1 = (index_x+1) & (X-1);
    const int ym1 = (index_y-1+Y) & (Y-1);
    const int yp1 = (index_y+1) & (Y-1);
    const int index_n =  X*(Y*index_z + ym1) + index_x;
    const int index_ne = X*(Y*index_z + ym1) + xp1;
    const int index_e =  X*(Y*index_z + index_y) + xp1;
    const int index_se = X*(Y*index_z + yp1) + xp1;
    const int index_s =  X*(Y*index_z + yp1) + index_x;
    const int index_sw = X*(Y*index_z + yp1) + xm1;
    const int index_w =  X*(Y*index_z + index_y) + xm1;
    const int index_nw = X*(Y*index_z + ym1) + xm1;
    float4 a_n = a_in[index_n];
    float4 a_ne = a_in[index_ne];
    float4 a_e = a_in[index_e];
    float4 a_se = a_in[index_se];
    float4 a_s = a_in[index_s];
    float4 a_sw = a_in[index_sw];
    float4 a_w = a_in[index_w];
    float4 a_nw = a_in[index_nw];
    float4 b_n = b_in[index_n];
    float4 b_ne = b_in[index_ne];
    float4 b_e = b_in[index_e];
    float4 b_se = b_in[index_se];
    float4 b_s = b_in[index_s];
    float4 b_sw = b_in[index_sw];
    float4 b_w = b_in[index_w];
    float4 b_nw = b_in[index_nw];
    float4 c_n = c_in[index_n];
    float4 c_ne = c_in[index_ne];
    float4 c_e = c_in[index_e];
    float4 c_se = c_in[index_se];
    float4 c_s = c_in[index_s];
    float4 c_sw = c_in[index_sw];
    float4 c_w = c_in[index_w];
    float4 c_nw = c_in[index_nw];
    float4 d_n = d_in[index_n];
    float4 d_ne = d_in[index_ne];
    float4 d_e = d_in[index_e];
    float4 d_se = d_in[index_se];
    float4 d_s = d_in[index_s];
    float4 d_sw = d_in[index_sw];
    float4 d_w = d_in[index_w];
    float4 d_nw = d_in[index_nw];
    float4 e_n = e_in[index_n];
    float4 e_ne = e_in[index_ne];
    float4 e_e = e_in[index_e];
    float4 e_se = e_in[index_se];
    float4 e_s = e_in[index_s];
    float4 e_sw = e_in[index_sw];
    float4 e_w = e_in[index_w];
    float4 e_nw = e_in[index_nw];
    const float4 _K0 = -20.0f/6.0f; // center weight
    const float _K1 = 4.0f/6.0f; // edge-neighbors
    const float _K2 = 1.0f/6.0f; // vertex-neighbors
    float4 laplacian_a = (float4)(a_n.x*_K1 + a_n.y*_K2 + a.y*_K1 + a_s.y*_K2 + a_s.x*_K1 + a_sw.w*_K2 + a_w.w*_K1 + a_nw.w*_K2,
    a_n.y*_K1 + a_n.z*_K2 + a.z*_K1 + a_s.z*_K2 + a_s.y*_K1 + a_s.x*_K2 + a.x*_K1 + a_n.x*_K2,
    a_n.z*_K1 + a_n.w*_K2 + a.w*_K1 + a_s.w*_K2 + a_s.z*_K1 + a_s.y*_K2 + a.y*_K1 + a_n.y*_K2,
    a_n.w*_K1 + a_ne.x*_K2 + a_e.x*_K1 + a_se.x*_K2 + a_s.w*_K1 + a_s.z*_K2 + a.z*_K1 + a_n.z*_K2 ) + a*_K0;
    float4 laplacian_b = (float4)(b_n.x*_K1 + b_n.y*_K2 + b.y*_K1 + b_s.y*_K2 + b_s.x*_K1 + b_sw.w*_K2 + b_w.w*_K1 + b_nw.w*_K2,
    b_n.y*_K1 + b_n.z*_K2 + b.z*_K1 + b_s.z*_K2 + b_s.y*_K1 + b_s.x*_K2 + b.x*_K1 + b_n.x*_K2,
    b_n.z*_K1 + b_n.w*_K2 + b.w*_K1 + b_s.w*_K2 + b_s.z*_K1 + b_s.y*_K2 + b.y*_K1 + b_n.y*_K2,
    b_n.w*_K1 + b_ne.x*_K2 + b_e.x*_K1 + b_se.x*_K2 + b_s.w*_K1 + b_s.z*_K2 + b.z*_K1 + b_n.z*_K2 ) + b*_K0;
    float4 laplacian_c = (float4)(c_n.x*_K1 + c_n.y*_K2 + c.y*_K1 + c_s.y*_K2 + c_s.x*_K1 + c_sw.w*_K2 + c_w.w*_K1 + c_nw.w*_K2,
    c_n.y*_K1 + c_n.z*_K2 + c.z*_K1 + c_s.z*_K2 + c_s.y*_K1 + c_s.x*_K2 + c.x*_K1 + c_n.x*_K2,
    c_n.z*_K1 + c_n.w*_K2 + c.w*_K1 + c_s.w*_K2 + c_s.z*_K1 + c_s.y*_K2 + c.y*_K1 + c_n.y*_K2,
    c_n.w*_K1 + c_ne.x*_K2 + c_e.x*_K1 + c_se.x*_K2 + c_s.w*_K1 + c_s.z*_K2 + c.z*_K1 + c_n.z*_K2 ) + c*_K0;
    float4 laplacian_d = (float4)(d_n.x*_K1 + d_n.y*_K2 + d.y*_K1 + d_s.y*_K2 + d_s.x*_K1 + d_sw.w*_K2 + d_w.w*_K1 + d_nw.w*_K2,
    d_n.y*_K1 + d_n.z*_K2 + d.z*_K1 + d_s.z*_K2 + d_s.y*_K1 + d_s.x*_K2 + d.x*_K1 + d_n.x*_K2,
    d_n.z*_K1 + d_n.w*_K2 + d.w*_K1 + d_s.w*_K2 + d_s.z*_K1 + d_s.y*_K2 + d.y*_K1 + d_n.y*_K2,
    d_n.w*_K1 + d_ne.x*_K2 + d_e.x*_K1 + d_se.x*_K2 + d_s.w*_K1 + d_s.z*_K2 + d.z*_K1 + d_n.z*_K2 ) + d*_K0;
    float4 laplacian_e = (float4)(e_n.x*_K1 + e_n.y*_K2 + e.y*_K1 + e_s.y*_K2 + e_s.x*_K1 + e_sw.w*_K2 + e_w.w*_K1 + e_nw.w*_K2,
    e_n.y*_K1 + e_n.z*_K2 + e.z*_K1 + e_s.z*_K2 + e_s.y*_K1 + e_s.x*_K2 + e.x*_K1 + e_n.x*_K2,
    e_n.z*_K1 + e_n.w*_K2 + e.w*_K1 + e_s.w*_K2 + e_s.z*_K1 + e_s.y*_K2 + e.y*_K1 + e_n.y*_K2,
    e_n.w*_K1 + e_ne.x*_K2 + e_e.x*_K1 + e_se.x*_K2 + e_s.w*_K1 + e_s.z*_K2 + e.z*_K1 + e_n.z*_K2 ) + e*_K0;

    float4 delta_a = 0.0f;
    float4 delta_b = 0.0f;
    float4 delta_c = 0.0f;
    float4 delta_d = 0.0f;
    float4 delta_e = 0.0f;

    float4 D_a = 0.000150f;
    float4 D_b = 0.000150f;
    float4 D_c = 0.009600f;
    float4 k3 = 8.500000f;
    float4 lambda = 2.000000f;
    float4 k1 = -6.920000f;
    float4 theta = 1.000000f;
    float4 tau = 48.000000f;
    float4 dx = 0.009000f;
    float4 timestep = 0.002000f;

    // chemicals a, b, and c produce gliders that are thresholded against to generate points for the distance field
    delta_a = D_a * laplacian_a / (dx*dx) - b - k3*c + lambda*a - a*a*a + k1;
    delta_b = ( D_b * laplacian_b / (dx*dx) + a - b ) / tau;
    delta_c = ( D_c * laplacian_c / (dx*dx) + a - c ) / theta;

    // threshold value
    const float4 th = 0.2f;

    const float4 zero = 0.0f;
    const float4 one = 1.0f;
    const float4 two = 2.0f;
    const float4 sqrt_2 = sqrt(2.0f);

    // scaling factor for distance increments
    const float4 scale = 0.005f;

    // separately compute min for cardinal and diagonal directions
    // note this is vectorized, 4 points in a row are computed
    float2 min0c0 = min((float2)(d_n.x, d.y  ), (float2)(d_s.x, d_w.w));
    float2 min1c0 = min((float2)(d_n.y, d.z  ), (float2)(d_s.y, d.x  ));
    float2 min2c0 = min((float2)(d_n.z, d.w  ), (float2)(d_s.z, d.y  ));
    float2 min3c0 = min((float2)(d_n.w, d_e.x), (float2)(d_s.w, d.z  ));

    float2 min0d0 = min((float2)(d_n.y , d_s.y ), (float2)(d_sw.w, d_nw.w));
    float2 min1d0 = min((float2)(d_n.z , d_s.z ), (float2)(d_s.x , d_n.x ));
    float2 min2d0 = min((float2)(d_n.w , d_s.w ), (float2)(d_s.y , d_n.y ));
    float2 min3d0 = min((float2)(d_ne.x, d_se.x), (float2)(d_s.z , d_n.z ));

    float min0c1 = min(min0c0.x, min0c0.y);
    float min1c1 = min(min1c0.x, min1c0.y);
    float min2c1 = min(min2c0.x, min2c0.y);
    float min3c1 = min(min3c0.x, min3c0.y);

    float min0d1 = min(min0d0.x, min0d0.y);
    float min1d1 = min(min1d0.x, min1d0.y);
    float min2d1 = min(min2d0.x, min2d0.y);
    float min3d1 = min(min3d0.x, min3d0.y);

    float4 d_min_c = (float4)(min0c1, min1c1, min2c1, min3c1);
    float4 d_min_d = (float4)(min0d1, min1d1, min2d1, min3d1);

    // difference between cardinal min and diagonal min input to a sigmoid
    float4 sd = -one + two /(one + exp(-4.0f * (d_min_c - d_min_d)/scale));
    float4 sdd = 1.0f + (sqrt_2 - 1.0f) * sd;
    float4 sdc = 1.0f + (sqrt_2 - 1.0f) * (1.0f - sd);

    // compute minimum distance using scaling factors
    float4 d_min = min(d_min_c + scale * sdc, d_min_d + scale * sdd);

    // if above the threshold, distance is zero, otherwise distance is d_min
    d = clamp(select(d_min, zero, isgreater(a, th)), 0.0f, 1.0f);

    // visualize the radial position estimate
    e = sd;


    a_out[index_here] = a + timestep * delta_a;
    b_out[index_here] = b + timestep * delta_b;
    c_out[index_here] = c + timestep * delta_c;
    d_out[index_here] = d + timestep * delta_d;
    e_out[index_here] = e + timestep * delta_e;
}
	__kernel void rd_compute(__global float4 a_in,__global float4 b_in,__global float4 c_in,__global float4 d_in,__global float4 e_in,__global float4 a_out,__global float4 b_out,__global float4 c_out,__global float4 d_out,__global float4 e_out)
	{
	const int index_x = get_global_id(0);
	const int index_y = get_global_id(1);
	const int index_z = get_global_id(2);
	const int X = get_global_size(0);
	const int Y = get_global_size(1);
	const int Z = get_global_size(2);
	const int index_here = X(Yindex_z + index_y) + index_x;

	float4 a = a_in[index_here];
	float4 b = b_in[index_here];
	float4 c = c_in[index_here];
	float4 d = d_in[index_here];
	float4 e = e_in[index_here];

	// compute the Laplacians of each chemical
	// 2D standard 9-point stencil: [ 1,4,1; 4,-20,4; 1,4,1 ] / 6
	const int xm1 = (index_x-1+X) & (X-1); // wrap (assumes X is a power of 2)
	const int xp1 = (index_x+1) & (X-1);
	const int ym1 = (index_y-1+Y) & (Y-1);
	const int yp1 = (index_y+1) & (Y-1);
	const int index_n = X(Yindex_z + ym1) + index_x;
	const int index_ne = X(Yindex_z + ym1) + xp1;
	const int index_e = X(Yindex_z + index_y) + xp1;
	const int index_se = X(Yindex_z + yp1) + xp1;
	const int index_s = X(Yindex_z + yp1) + index_x;
	const int index_sw = X(Yindex_z + yp1) + xm1;
	const int index_w = X(Yindex_z + index_y) + xm1;
	const int index_nw = X(Yindex_z + ym1) + xm1;
	float4 a_n = a_in[index_n];
	float4 a_ne = a_in[index_ne];
	float4 a_e = a_in[index_e];
	float4 a_se = a_in[index_se];
	float4 a_s = a_in[index_s];
	float4 a_sw = a_in[index_sw];
	float4 a_w = a_in[index_w];
	float4 a_nw = a_in[index_nw];
	float4 b_n = b_in[index_n];
	float4 b_ne = b_in[index_ne];
	float4 b_e = b_in[index_e];
	float4 b_se = b_in[index_se];
	float4 b_s = b_in[index_s];
	float4 b_sw = b_in[index_sw];
	float4 b_w = b_in[index_w];
	float4 b_nw = b_in[index_nw];
	float4 c_n = c_in[index_n];
	float4 c_ne = c_in[index_ne];
	float4 c_e = c_in[index_e];
	float4 c_se = c_in[index_se];
	float4 c_s = c_in[index_s];
	float4 c_sw = c_in[index_sw];
	float4 c_w = c_in[index_w];
	float4 c_nw = c_in[index_nw];
	float4 d_n = d_in[index_n];
	float4 d_ne = d_in[index_ne];
	float4 d_e = d_in[index_e];
	float4 d_se = d_in[index_se];
	float4 d_s = d_in[index_s];
	float4 d_sw = d_in[index_sw];
	float4 d_w = d_in[index_w];
	float4 d_nw = d_in[index_nw];
	float4 e_n = e_in[index_n];
	float4 e_ne = e_in[index_ne];
	float4 e_e = e_in[index_e];
	float4 e_se = e_in[index_se];
	float4 e_s = e_in[index_s];
	float4 e_sw = e_in[index_sw];
	float4 e_w = e_in[index_w];
	float4 e_nw = e_in[index_nw];
	const float4 _K0 = -20.0f/6.0f; // center weight
	const float _K1 = 4.0f/6.0f; // edge-neighbors
	const float _K2 = 1.0f/6.0f; // vertex-neighbors
	float4 laplacian_a = (float4)(a_n.x_K1 + a_n.y_K2 + a.y_K1 + a_s.y_K2 + a_s.x_K1 + a_sw.w_K2 + a_w.w_K1 + a_nw.w_K2,
	a_n.y_K1 + a_n.z_K2 + a.z_K1 + a_s.z_K2 + a_s.y_K1 + a_s.x_K2 + a.x_K1 + a_n.x_K2,
	a_n.z_K1 + a_n.w_K2 + a.w_K1 + a_s.w_K2 + a_s.z_K1 + a_s.y_K2 + a.y_K1 + a_n.y_K2,
	a_n.w_K1 + a_ne.x_K2 + a_e.x_K1 + a_se.x_K2 + a_s.w_K1 + a_s.z_K2 + a.z_K1 + a_n.z_K2 ) + a*_K0;
	float4 laplacian_b = (float4)(b_n.x_K1 + b_n.y_K2 + b.y_K1 + b_s.y_K2 + b_s.x_K1 + b_sw.w_K2 + b_w.w_K1 + b_nw.w_K2,
	b_n.y_K1 + b_n.z_K2 + b.z_K1 + b_s.z_K2 + b_s.y_K1 + b_s.x_K2 + b.x_K1 + b_n.x_K2,
	b_n.z_K1 + b_n.w_K2 + b.w_K1 + b_s.w_K2 + b_s.z_K1 + b_s.y_K2 + b.y_K1 + b_n.y_K2,
	b_n.w_K1 + b_ne.x_K2 + b_e.x_K1 + b_se.x_K2 + b_s.w_K1 + b_s.z_K2 + b.z_K1 + b_n.z_K2 ) + b*_K0;
	float4 laplacian_c = (float4)(c_n.x_K1 + c_n.y_K2 + c.y_K1 + c_s.y_K2 + c_s.x_K1 + c_sw.w_K2 + c_w.w_K1 + c_nw.w_K2,
	c_n.y_K1 + c_n.z_K2 + c.z_K1 + c_s.z_K2 + c_s.y_K1 + c_s.x_K2 + c.x_K1 + c_n.x_K2,
	c_n.z_K1 + c_n.w_K2 + c.w_K1 + c_s.w_K2 + c_s.z_K1 + c_s.y_K2 + c.y_K1 + c_n.y_K2,
	c_n.w_K1 + c_ne.x_K2 + c_e.x_K1 + c_se.x_K2 + c_s.w_K1 + c_s.z_K2 + c.z_K1 + c_n.z_K2 ) + c*_K0;
	float4 laplacian_d = (float4)(d_n.x_K1 + d_n.y_K2 + d.y_K1 + d_s.y_K2 + d_s.x_K1 + d_sw.w_K2 + d_w.w_K1 + d_nw.w_K2,
	d_n.y_K1 + d_n.z_K2 + d.z_K1 + d_s.z_K2 + d_s.y_K1 + d_s.x_K2 + d.x_K1 + d_n.x_K2,
	d_n.z_K1 + d_n.w_K2 + d.w_K1 + d_s.w_K2 + d_s.z_K1 + d_s.y_K2 + d.y_K1 + d_n.y_K2,
	d_n.w_K1 + d_ne.x_K2 + d_e.x_K1 + d_se.x_K2 + d_s.w_K1 + d_s.z_K2 + d.z_K1 + d_n.z_K2 ) + d*_K0;
	float4 laplacian_e = (float4)(e_n.x_K1 + e_n.y_K2 + e.y_K1 + e_s.y_K2 + e_s.x_K1 + e_sw.w_K2 + e_w.w_K1 + e_nw.w_K2,
	e_n.y_K1 + e_n.z_K2 + e.z_K1 + e_s.z_K2 + e_s.y_K1 + e_s.x_K2 + e.x_K1 + e_n.x_K2,
	e_n.z_K1 + e_n.w_K2 + e.w_K1 + e_s.w_K2 + e_s.z_K1 + e_s.y_K2 + e.y_K1 + e_n.y_K2,
	e_n.w_K1 + e_ne.x_K2 + e_e.x_K1 + e_se.x_K2 + e_s.w_K1 + e_s.z_K2 + e.z_K1 + e_n.z_K2 ) + e*_K0;

	float4 delta_a = 0.0f;
	float4 delta_b = 0.0f;
	float4 delta_c = 0.0f;
	float4 delta_d = 0.0f;
	float4 delta_e = 0.0f;

	float4 D_a = 0.000150f;
	float4 D_b = 0.000150f;
	float4 D_c = 0.009600f;
	float4 k3 = 8.500000f;
	float4 lambda = 2.000000f;
	float4 k1 = -6.920000f;
	float4 theta = 1.000000f;
	float4 tau = 48.000000f;
	float4 dx = 0.009000f;
	float4 timestep = 0.002000f;

	// chemicals a, b, and c produce gliders that are thresholded against to generate points for the distance field
	delta_a = D_a * laplacian_a / (dxdx) - b - k3c + lambdaa - aa*a + k1;
	delta_b = ( D_b * laplacian_b / (dx*dx) + a - b ) / tau;
	delta_c = ( D_c * laplacian_c / (dx*dx) + a - c ) / theta;

	// threshold value
	const float4 th = 0.2f;

	const float4 zero = 0.0f;
	const float4 one = 1.0f;
	const float4 two = 2.0f;
	const float4 sqrt_2 = sqrt(2.0f);

	// scaling factor for distance increments
	const float4 scale = 0.005f;

	// separately compute min for cardinal and diagonal directions
	// note this is vectorized, 4 points in a row are computed
	float2 min0c0 = min((float2)(d_n.x, d.y ), (float2)(d_s.x, d_w.w));
	float2 min1c0 = min((float2)(d_n.y, d.z ), (float2)(d_s.y, d.x ));
	float2 min2c0 = min((float2)(d_n.z, d.w ), (float2)(d_s.z, d.y ));
	float2 min3c0 = min((float2)(d_n.w, d_e.x), (float2)(d_s.w, d.z ));

	float2 min0d0 = min((float2)(d_n.y , d_s.y ), (float2)(d_sw.w, d_nw.w));
	float2 min1d0 = min((float2)(d_n.z , d_s.z ), (float2)(d_s.x , d_n.x ));
	float2 min2d0 = min((float2)(d_n.w , d_s.w ), (float2)(d_s.y , d_n.y ));
	float2 min3d0 = min((float2)(d_ne.x, d_se.x), (float2)(d_s.z , d_n.z ));

	float min0c1 = min(min0c0.x, min0c0.y);
	float min1c1 = min(min1c0.x, min1c0.y);
	float min2c1 = min(min2c0.x, min2c0.y);
	float min3c1 = min(min3c0.x, min3c0.y);

	float min0d1 = min(min0d0.x, min0d0.y);
	float min1d1 = min(min1d0.x, min1d0.y);
	float min2d1 = min(min2d0.x, min2d0.y);
	float min3d1 = min(min3d0.x, min3d0.y);

	float4 d_min_c = (float4)(min0c1, min1c1, min2c1, min3c1);
	float4 d_min_d = (float4)(min0d1, min1d1, min2d1, min3d1);

	// difference between cardinal min and diagonal min input to a sigmoid
	float4 sd = -one + two /(one + exp(-4.0f * (d_min_c - d_min_d)/scale));
	float4 sdd = 1.0f + (sqrt_2 - 1.0f) * sd;
	float4 sdc = 1.0f + (sqrt_2 - 1.0f) * (1.0f - sd);

	// compute minimum distance using scaling factors
	float4 d_min = min(d_min_c + scale * sdc, d_min_d + scale * sdd);

	// if above the threshold, distance is zero, otherwise distance is d_min
	d = clamp(select(d_min, zero, isgreater(a, th)), 0.0f, 1.0f);

	// visualize the radial position estimate
	e = sd;


	a_out[index_here] = a + timestep * delta_a;
	b_out[index_here] = b + timestep * delta_b;
	c_out[index_here] = c + timestep * delta_c;
	d_out[index_here] = d + timestep * delta_d;
	e_out[index_here] = e + timestep * delta_e;
	}