Skip to content

Instantly share code, notes, and snippets.

@CornuAmmonis
Last active December 4, 2015 00:17
Show Gist options
  • Save CornuAmmonis/696ad676b59114241df9 to your computer and use it in GitHub Desktop.
Save CornuAmmonis/696ad676b59114241df9 to your computer and use it in GitHub Desktop.
gliders and a propagating distance field
__kernel void rd_compute(__global float4 *a_in,__global float4 *b_in,__global float4 *c_in,__global float4 *d_in,__global float4 *e_in,__global float4 *a_out,__global float4 *b_out,__global float4 *c_out,__global float4 *d_out,__global float4 *e_out)
{
const int index_x = get_global_id(0);
const int index_y = get_global_id(1);
const int index_z = get_global_id(2);
const int X = get_global_size(0);
const int Y = get_global_size(1);
const int Z = get_global_size(2);
const int index_here = X*(Y*index_z + index_y) + index_x;
float4 a = a_in[index_here];
float4 b = b_in[index_here];
float4 c = c_in[index_here];
float4 d = d_in[index_here];
float4 e = e_in[index_here];
// compute the Laplacians of each chemical
// 2D standard 9-point stencil: [ 1,4,1; 4,-20,4; 1,4,1 ] / 6
const int xm1 = (index_x-1+X) & (X-1); // wrap (assumes X is a power of 2)
const int xp1 = (index_x+1) & (X-1);
const int ym1 = (index_y-1+Y) & (Y-1);
const int yp1 = (index_y+1) & (Y-1);
const int index_n = X*(Y*index_z + ym1) + index_x;
const int index_ne = X*(Y*index_z + ym1) + xp1;
const int index_e = X*(Y*index_z + index_y) + xp1;
const int index_se = X*(Y*index_z + yp1) + xp1;
const int index_s = X*(Y*index_z + yp1) + index_x;
const int index_sw = X*(Y*index_z + yp1) + xm1;
const int index_w = X*(Y*index_z + index_y) + xm1;
const int index_nw = X*(Y*index_z + ym1) + xm1;
float4 a_n = a_in[index_n];
float4 a_ne = a_in[index_ne];
float4 a_e = a_in[index_e];
float4 a_se = a_in[index_se];
float4 a_s = a_in[index_s];
float4 a_sw = a_in[index_sw];
float4 a_w = a_in[index_w];
float4 a_nw = a_in[index_nw];
float4 b_n = b_in[index_n];
float4 b_ne = b_in[index_ne];
float4 b_e = b_in[index_e];
float4 b_se = b_in[index_se];
float4 b_s = b_in[index_s];
float4 b_sw = b_in[index_sw];
float4 b_w = b_in[index_w];
float4 b_nw = b_in[index_nw];
float4 c_n = c_in[index_n];
float4 c_ne = c_in[index_ne];
float4 c_e = c_in[index_e];
float4 c_se = c_in[index_se];
float4 c_s = c_in[index_s];
float4 c_sw = c_in[index_sw];
float4 c_w = c_in[index_w];
float4 c_nw = c_in[index_nw];
float4 d_n = d_in[index_n];
float4 d_ne = d_in[index_ne];
float4 d_e = d_in[index_e];
float4 d_se = d_in[index_se];
float4 d_s = d_in[index_s];
float4 d_sw = d_in[index_sw];
float4 d_w = d_in[index_w];
float4 d_nw = d_in[index_nw];
float4 e_n = e_in[index_n];
float4 e_ne = e_in[index_ne];
float4 e_e = e_in[index_e];
float4 e_se = e_in[index_se];
float4 e_s = e_in[index_s];
float4 e_sw = e_in[index_sw];
float4 e_w = e_in[index_w];
float4 e_nw = e_in[index_nw];
const float4 _K0 = -20.0f/6.0f; // center weight
const float _K1 = 4.0f/6.0f; // edge-neighbors
const float _K2 = 1.0f/6.0f; // vertex-neighbors
float4 laplacian_a = (float4)(a_n.x*_K1 + a_n.y*_K2 + a.y*_K1 + a_s.y*_K2 + a_s.x*_K1 + a_sw.w*_K2 + a_w.w*_K1 + a_nw.w*_K2,
a_n.y*_K1 + a_n.z*_K2 + a.z*_K1 + a_s.z*_K2 + a_s.y*_K1 + a_s.x*_K2 + a.x*_K1 + a_n.x*_K2,
a_n.z*_K1 + a_n.w*_K2 + a.w*_K1 + a_s.w*_K2 + a_s.z*_K1 + a_s.y*_K2 + a.y*_K1 + a_n.y*_K2,
a_n.w*_K1 + a_ne.x*_K2 + a_e.x*_K1 + a_se.x*_K2 + a_s.w*_K1 + a_s.z*_K2 + a.z*_K1 + a_n.z*_K2 ) + a*_K0;
float4 laplacian_b = (float4)(b_n.x*_K1 + b_n.y*_K2 + b.y*_K1 + b_s.y*_K2 + b_s.x*_K1 + b_sw.w*_K2 + b_w.w*_K1 + b_nw.w*_K2,
b_n.y*_K1 + b_n.z*_K2 + b.z*_K1 + b_s.z*_K2 + b_s.y*_K1 + b_s.x*_K2 + b.x*_K1 + b_n.x*_K2,
b_n.z*_K1 + b_n.w*_K2 + b.w*_K1 + b_s.w*_K2 + b_s.z*_K1 + b_s.y*_K2 + b.y*_K1 + b_n.y*_K2,
b_n.w*_K1 + b_ne.x*_K2 + b_e.x*_K1 + b_se.x*_K2 + b_s.w*_K1 + b_s.z*_K2 + b.z*_K1 + b_n.z*_K2 ) + b*_K0;
float4 laplacian_c = (float4)(c_n.x*_K1 + c_n.y*_K2 + c.y*_K1 + c_s.y*_K2 + c_s.x*_K1 + c_sw.w*_K2 + c_w.w*_K1 + c_nw.w*_K2,
c_n.y*_K1 + c_n.z*_K2 + c.z*_K1 + c_s.z*_K2 + c_s.y*_K1 + c_s.x*_K2 + c.x*_K1 + c_n.x*_K2,
c_n.z*_K1 + c_n.w*_K2 + c.w*_K1 + c_s.w*_K2 + c_s.z*_K1 + c_s.y*_K2 + c.y*_K1 + c_n.y*_K2,
c_n.w*_K1 + c_ne.x*_K2 + c_e.x*_K1 + c_se.x*_K2 + c_s.w*_K1 + c_s.z*_K2 + c.z*_K1 + c_n.z*_K2 ) + c*_K0;
float4 laplacian_d = (float4)(d_n.x*_K1 + d_n.y*_K2 + d.y*_K1 + d_s.y*_K2 + d_s.x*_K1 + d_sw.w*_K2 + d_w.w*_K1 + d_nw.w*_K2,
d_n.y*_K1 + d_n.z*_K2 + d.z*_K1 + d_s.z*_K2 + d_s.y*_K1 + d_s.x*_K2 + d.x*_K1 + d_n.x*_K2,
d_n.z*_K1 + d_n.w*_K2 + d.w*_K1 + d_s.w*_K2 + d_s.z*_K1 + d_s.y*_K2 + d.y*_K1 + d_n.y*_K2,
d_n.w*_K1 + d_ne.x*_K2 + d_e.x*_K1 + d_se.x*_K2 + d_s.w*_K1 + d_s.z*_K2 + d.z*_K1 + d_n.z*_K2 ) + d*_K0;
float4 laplacian_e = (float4)(e_n.x*_K1 + e_n.y*_K2 + e.y*_K1 + e_s.y*_K2 + e_s.x*_K1 + e_sw.w*_K2 + e_w.w*_K1 + e_nw.w*_K2,
e_n.y*_K1 + e_n.z*_K2 + e.z*_K1 + e_s.z*_K2 + e_s.y*_K1 + e_s.x*_K2 + e.x*_K1 + e_n.x*_K2,
e_n.z*_K1 + e_n.w*_K2 + e.w*_K1 + e_s.w*_K2 + e_s.z*_K1 + e_s.y*_K2 + e.y*_K1 + e_n.y*_K2,
e_n.w*_K1 + e_ne.x*_K2 + e_e.x*_K1 + e_se.x*_K2 + e_s.w*_K1 + e_s.z*_K2 + e.z*_K1 + e_n.z*_K2 ) + e*_K0;
float4 delta_a = 0.0f;
float4 delta_b = 0.0f;
float4 delta_c = 0.0f;
float4 delta_d = 0.0f;
float4 delta_e = 0.0f;
float4 D_a = 0.000150f;
float4 D_b = 0.000150f;
float4 D_c = 0.009600f;
float4 k3 = 8.500000f;
float4 lambda = 2.000000f;
float4 k1 = -6.920000f;
float4 theta = 1.000000f;
float4 tau = 48.000000f;
float4 dx = 0.009000f;
float4 timestep = 0.002000f;
// chemicals a, b, and c produce gliders that are thresholded against to generate points for the distance field
delta_a = D_a * laplacian_a / (dx*dx) - b - k3*c + lambda*a - a*a*a + k1;
delta_b = ( D_b * laplacian_b / (dx*dx) + a - b ) / tau;
delta_c = ( D_c * laplacian_c / (dx*dx) + a - c ) / theta;
// threshold value
const float4 th = 0.2f;
const float4 zero = 0.0f;
const float4 one = 1.0f;
const float4 two = 2.0f;
const float4 sqrt_2 = sqrt(2.0f);
// scaling factor for distance increments
const float4 scale = 0.005f;
// separately compute min for cardinal and diagonal directions
// note this is vectorized, 4 points in a row are computed
float2 min0c0 = min((float2)(d_n.x, d.y ), (float2)(d_s.x, d_w.w));
float2 min1c0 = min((float2)(d_n.y, d.z ), (float2)(d_s.y, d.x ));
float2 min2c0 = min((float2)(d_n.z, d.w ), (float2)(d_s.z, d.y ));
float2 min3c0 = min((float2)(d_n.w, d_e.x), (float2)(d_s.w, d.z ));
float2 min0d0 = min((float2)(d_n.y , d_s.y ), (float2)(d_sw.w, d_nw.w));
float2 min1d0 = min((float2)(d_n.z , d_s.z ), (float2)(d_s.x , d_n.x ));
float2 min2d0 = min((float2)(d_n.w , d_s.w ), (float2)(d_s.y , d_n.y ));
float2 min3d0 = min((float2)(d_ne.x, d_se.x), (float2)(d_s.z , d_n.z ));
float min0c1 = min(min0c0.x, min0c0.y);
float min1c1 = min(min1c0.x, min1c0.y);
float min2c1 = min(min2c0.x, min2c0.y);
float min3c1 = min(min3c0.x, min3c0.y);
float min0d1 = min(min0d0.x, min0d0.y);
float min1d1 = min(min1d0.x, min1d0.y);
float min2d1 = min(min2d0.x, min2d0.y);
float min3d1 = min(min3d0.x, min3d0.y);
float4 d_min_c = (float4)(min0c1, min1c1, min2c1, min3c1);
float4 d_min_d = (float4)(min0d1, min1d1, min2d1, min3d1);
// difference between cardinal min and diagonal min input to a sigmoid
float4 sd = -one + two /(one + exp(-4.0f * (d_min_c - d_min_d)/scale));
float4 sdd = 1.0f + (sqrt_2 - 1.0f) * sd;
float4 sdc = 1.0f + (sqrt_2 - 1.0f) * (1.0f - sd);
// compute minimum distance using scaling factors
float4 d_min = min(d_min_c + scale * sdc, d_min_d + scale * sdd);
// if above the threshold, distance is zero, otherwise distance is d_min
d = clamp(select(d_min, zero, isgreater(a, th)), 0.0f, 1.0f);
// visualize the radial position estimate
e = sd;
a_out[index_here] = a + timestep * delta_a;
b_out[index_here] = b + timestep * delta_b;
c_out[index_here] = c + timestep * delta_c;
d_out[index_here] = d + timestep * delta_d;
e_out[index_here] = e + timestep * delta_e;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment