Last active
December 4, 2015 00:17
-
-
Save CornuAmmonis/696ad676b59114241df9 to your computer and use it in GitHub Desktop.
gliders and a propagating distance field
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void rd_compute(__global float4 *a_in,__global float4 *b_in,__global float4 *c_in,__global float4 *d_in,__global float4 *e_in,__global float4 *a_out,__global float4 *b_out,__global float4 *c_out,__global float4 *d_out,__global float4 *e_out) | |
{ | |
const int index_x = get_global_id(0); | |
const int index_y = get_global_id(1); | |
const int index_z = get_global_id(2); | |
const int X = get_global_size(0); | |
const int Y = get_global_size(1); | |
const int Z = get_global_size(2); | |
const int index_here = X*(Y*index_z + index_y) + index_x; | |
float4 a = a_in[index_here]; | |
float4 b = b_in[index_here]; | |
float4 c = c_in[index_here]; | |
float4 d = d_in[index_here]; | |
float4 e = e_in[index_here]; | |
// compute the Laplacians of each chemical | |
// 2D standard 9-point stencil: [ 1,4,1; 4,-20,4; 1,4,1 ] / 6 | |
const int xm1 = (index_x-1+X) & (X-1); // wrap (assumes X is a power of 2) | |
const int xp1 = (index_x+1) & (X-1); | |
const int ym1 = (index_y-1+Y) & (Y-1); | |
const int yp1 = (index_y+1) & (Y-1); | |
const int index_n = X*(Y*index_z + ym1) + index_x; | |
const int index_ne = X*(Y*index_z + ym1) + xp1; | |
const int index_e = X*(Y*index_z + index_y) + xp1; | |
const int index_se = X*(Y*index_z + yp1) + xp1; | |
const int index_s = X*(Y*index_z + yp1) + index_x; | |
const int index_sw = X*(Y*index_z + yp1) + xm1; | |
const int index_w = X*(Y*index_z + index_y) + xm1; | |
const int index_nw = X*(Y*index_z + ym1) + xm1; | |
float4 a_n = a_in[index_n]; | |
float4 a_ne = a_in[index_ne]; | |
float4 a_e = a_in[index_e]; | |
float4 a_se = a_in[index_se]; | |
float4 a_s = a_in[index_s]; | |
float4 a_sw = a_in[index_sw]; | |
float4 a_w = a_in[index_w]; | |
float4 a_nw = a_in[index_nw]; | |
float4 b_n = b_in[index_n]; | |
float4 b_ne = b_in[index_ne]; | |
float4 b_e = b_in[index_e]; | |
float4 b_se = b_in[index_se]; | |
float4 b_s = b_in[index_s]; | |
float4 b_sw = b_in[index_sw]; | |
float4 b_w = b_in[index_w]; | |
float4 b_nw = b_in[index_nw]; | |
float4 c_n = c_in[index_n]; | |
float4 c_ne = c_in[index_ne]; | |
float4 c_e = c_in[index_e]; | |
float4 c_se = c_in[index_se]; | |
float4 c_s = c_in[index_s]; | |
float4 c_sw = c_in[index_sw]; | |
float4 c_w = c_in[index_w]; | |
float4 c_nw = c_in[index_nw]; | |
float4 d_n = d_in[index_n]; | |
float4 d_ne = d_in[index_ne]; | |
float4 d_e = d_in[index_e]; | |
float4 d_se = d_in[index_se]; | |
float4 d_s = d_in[index_s]; | |
float4 d_sw = d_in[index_sw]; | |
float4 d_w = d_in[index_w]; | |
float4 d_nw = d_in[index_nw]; | |
float4 e_n = e_in[index_n]; | |
float4 e_ne = e_in[index_ne]; | |
float4 e_e = e_in[index_e]; | |
float4 e_se = e_in[index_se]; | |
float4 e_s = e_in[index_s]; | |
float4 e_sw = e_in[index_sw]; | |
float4 e_w = e_in[index_w]; | |
float4 e_nw = e_in[index_nw]; | |
const float4 _K0 = -20.0f/6.0f; // center weight | |
const float _K1 = 4.0f/6.0f; // edge-neighbors | |
const float _K2 = 1.0f/6.0f; // vertex-neighbors | |
float4 laplacian_a = (float4)(a_n.x*_K1 + a_n.y*_K2 + a.y*_K1 + a_s.y*_K2 + a_s.x*_K1 + a_sw.w*_K2 + a_w.w*_K1 + a_nw.w*_K2, | |
a_n.y*_K1 + a_n.z*_K2 + a.z*_K1 + a_s.z*_K2 + a_s.y*_K1 + a_s.x*_K2 + a.x*_K1 + a_n.x*_K2, | |
a_n.z*_K1 + a_n.w*_K2 + a.w*_K1 + a_s.w*_K2 + a_s.z*_K1 + a_s.y*_K2 + a.y*_K1 + a_n.y*_K2, | |
a_n.w*_K1 + a_ne.x*_K2 + a_e.x*_K1 + a_se.x*_K2 + a_s.w*_K1 + a_s.z*_K2 + a.z*_K1 + a_n.z*_K2 ) + a*_K0; | |
float4 laplacian_b = (float4)(b_n.x*_K1 + b_n.y*_K2 + b.y*_K1 + b_s.y*_K2 + b_s.x*_K1 + b_sw.w*_K2 + b_w.w*_K1 + b_nw.w*_K2, | |
b_n.y*_K1 + b_n.z*_K2 + b.z*_K1 + b_s.z*_K2 + b_s.y*_K1 + b_s.x*_K2 + b.x*_K1 + b_n.x*_K2, | |
b_n.z*_K1 + b_n.w*_K2 + b.w*_K1 + b_s.w*_K2 + b_s.z*_K1 + b_s.y*_K2 + b.y*_K1 + b_n.y*_K2, | |
b_n.w*_K1 + b_ne.x*_K2 + b_e.x*_K1 + b_se.x*_K2 + b_s.w*_K1 + b_s.z*_K2 + b.z*_K1 + b_n.z*_K2 ) + b*_K0; | |
float4 laplacian_c = (float4)(c_n.x*_K1 + c_n.y*_K2 + c.y*_K1 + c_s.y*_K2 + c_s.x*_K1 + c_sw.w*_K2 + c_w.w*_K1 + c_nw.w*_K2, | |
c_n.y*_K1 + c_n.z*_K2 + c.z*_K1 + c_s.z*_K2 + c_s.y*_K1 + c_s.x*_K2 + c.x*_K1 + c_n.x*_K2, | |
c_n.z*_K1 + c_n.w*_K2 + c.w*_K1 + c_s.w*_K2 + c_s.z*_K1 + c_s.y*_K2 + c.y*_K1 + c_n.y*_K2, | |
c_n.w*_K1 + c_ne.x*_K2 + c_e.x*_K1 + c_se.x*_K2 + c_s.w*_K1 + c_s.z*_K2 + c.z*_K1 + c_n.z*_K2 ) + c*_K0; | |
float4 laplacian_d = (float4)(d_n.x*_K1 + d_n.y*_K2 + d.y*_K1 + d_s.y*_K2 + d_s.x*_K1 + d_sw.w*_K2 + d_w.w*_K1 + d_nw.w*_K2, | |
d_n.y*_K1 + d_n.z*_K2 + d.z*_K1 + d_s.z*_K2 + d_s.y*_K1 + d_s.x*_K2 + d.x*_K1 + d_n.x*_K2, | |
d_n.z*_K1 + d_n.w*_K2 + d.w*_K1 + d_s.w*_K2 + d_s.z*_K1 + d_s.y*_K2 + d.y*_K1 + d_n.y*_K2, | |
d_n.w*_K1 + d_ne.x*_K2 + d_e.x*_K1 + d_se.x*_K2 + d_s.w*_K1 + d_s.z*_K2 + d.z*_K1 + d_n.z*_K2 ) + d*_K0; | |
float4 laplacian_e = (float4)(e_n.x*_K1 + e_n.y*_K2 + e.y*_K1 + e_s.y*_K2 + e_s.x*_K1 + e_sw.w*_K2 + e_w.w*_K1 + e_nw.w*_K2, | |
e_n.y*_K1 + e_n.z*_K2 + e.z*_K1 + e_s.z*_K2 + e_s.y*_K1 + e_s.x*_K2 + e.x*_K1 + e_n.x*_K2, | |
e_n.z*_K1 + e_n.w*_K2 + e.w*_K1 + e_s.w*_K2 + e_s.z*_K1 + e_s.y*_K2 + e.y*_K1 + e_n.y*_K2, | |
e_n.w*_K1 + e_ne.x*_K2 + e_e.x*_K1 + e_se.x*_K2 + e_s.w*_K1 + e_s.z*_K2 + e.z*_K1 + e_n.z*_K2 ) + e*_K0; | |
float4 delta_a = 0.0f; | |
float4 delta_b = 0.0f; | |
float4 delta_c = 0.0f; | |
float4 delta_d = 0.0f; | |
float4 delta_e = 0.0f; | |
float4 D_a = 0.000150f; | |
float4 D_b = 0.000150f; | |
float4 D_c = 0.009600f; | |
float4 k3 = 8.500000f; | |
float4 lambda = 2.000000f; | |
float4 k1 = -6.920000f; | |
float4 theta = 1.000000f; | |
float4 tau = 48.000000f; | |
float4 dx = 0.009000f; | |
float4 timestep = 0.002000f; | |
// chemicals a, b, and c produce gliders that are thresholded against to generate points for the distance field | |
delta_a = D_a * laplacian_a / (dx*dx) - b - k3*c + lambda*a - a*a*a + k1; | |
delta_b = ( D_b * laplacian_b / (dx*dx) + a - b ) / tau; | |
delta_c = ( D_c * laplacian_c / (dx*dx) + a - c ) / theta; | |
// threshold value | |
const float4 th = 0.2f; | |
const float4 zero = 0.0f; | |
const float4 one = 1.0f; | |
const float4 two = 2.0f; | |
const float4 sqrt_2 = sqrt(2.0f); | |
// scaling factor for distance increments | |
const float4 scale = 0.005f; | |
// separately compute min for cardinal and diagonal directions | |
// note this is vectorized, 4 points in a row are computed | |
float2 min0c0 = min((float2)(d_n.x, d.y ), (float2)(d_s.x, d_w.w)); | |
float2 min1c0 = min((float2)(d_n.y, d.z ), (float2)(d_s.y, d.x )); | |
float2 min2c0 = min((float2)(d_n.z, d.w ), (float2)(d_s.z, d.y )); | |
float2 min3c0 = min((float2)(d_n.w, d_e.x), (float2)(d_s.w, d.z )); | |
float2 min0d0 = min((float2)(d_n.y , d_s.y ), (float2)(d_sw.w, d_nw.w)); | |
float2 min1d0 = min((float2)(d_n.z , d_s.z ), (float2)(d_s.x , d_n.x )); | |
float2 min2d0 = min((float2)(d_n.w , d_s.w ), (float2)(d_s.y , d_n.y )); | |
float2 min3d0 = min((float2)(d_ne.x, d_se.x), (float2)(d_s.z , d_n.z )); | |
float min0c1 = min(min0c0.x, min0c0.y); | |
float min1c1 = min(min1c0.x, min1c0.y); | |
float min2c1 = min(min2c0.x, min2c0.y); | |
float min3c1 = min(min3c0.x, min3c0.y); | |
float min0d1 = min(min0d0.x, min0d0.y); | |
float min1d1 = min(min1d0.x, min1d0.y); | |
float min2d1 = min(min2d0.x, min2d0.y); | |
float min3d1 = min(min3d0.x, min3d0.y); | |
float4 d_min_c = (float4)(min0c1, min1c1, min2c1, min3c1); | |
float4 d_min_d = (float4)(min0d1, min1d1, min2d1, min3d1); | |
// difference between cardinal min and diagonal min input to a sigmoid | |
float4 sd = -one + two /(one + exp(-4.0f * (d_min_c - d_min_d)/scale)); | |
float4 sdd = 1.0f + (sqrt_2 - 1.0f) * sd; | |
float4 sdc = 1.0f + (sqrt_2 - 1.0f) * (1.0f - sd); | |
// compute minimum distance using scaling factors | |
float4 d_min = min(d_min_c + scale * sdc, d_min_d + scale * sdd); | |
// if above the threshold, distance is zero, otherwise distance is d_min | |
d = clamp(select(d_min, zero, isgreater(a, th)), 0.0f, 1.0f); | |
// visualize the radial position estimate | |
e = sd; | |
a_out[index_here] = a + timestep * delta_a; | |
b_out[index_here] = b + timestep * delta_b; | |
c_out[index_here] = c + timestep * delta_c; | |
d_out[index_here] = d + timestep * delta_d; | |
e_out[index_here] = e + timestep * delta_e; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment