Skip to content

Instantly share code, notes, and snippets.

@dadeba
Created March 23, 2012 02:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dadeba/2166341 to your computer and use it in GitHub Desktop.
Save dadeba/2166341 to your computer and use it in GitHub Desktop.
OpenCL: A vectorized kernel for gravity interaction
#define READONLY_P const * restrict
__kernel
void
grav1(
__global float4 READONLY_P x,
__global float4 READONLY_P y,
__global float4 READONLY_P z,
__global float4 READONLY_P m,
__global float4 *ax,
__global float4 *ay,
__global float4 *az,
__global float4 *pt,
const int n,
const float eps2
)
{
unsigned int g_xid = get_global_id(0);
unsigned int g_yid = get_global_id(1);
unsigned int g_w = get_global_size(0);
unsigned int gid = g_yid*g_w + g_xid;
unsigned int i = gid;
float4 xi = x[i];
float4 yi = y[i];
float4 zi = z[i];
float4 e2 = (float4)(eps2);
float4 a_x = (float4)(0.0f);
float4 a_y = (float4)(0.0f);
float4 a_z = (float4)(0.0f);
float4 p_t = (float4)(0.0f);
for(unsigned int j = 0; j < n/4; j++) {
float4 xj = x[j];
float4 yj = y[j];
float4 zj = z[j];
float4 mj = m[j];
for(unsigned int k = 0; k < 4; k++) {
float4 dx, dy, dz;
dx = xj - xi;
dy = yj - yi;
dz = zj - zi;
float4 r2 = dx*dx + dy*dy + dz*dz + e2;
float4 r1i = native_rsqrt(r2);
float4 r2i = r1i*r1i;
float4 r1im = mj*r1i;
float4 r3im = r1im*r2i;
a_x += dx*r3im;
a_y += dy*r3im;
a_z += dz*r3im;
p_t += -r1im;
uint4 mask = (uint4)(3, 0, 1, 2);
xj = shuffle(xj, mask);
yj = shuffle(yj, mask);
zj = shuffle(zj, mask);
mj = shuffle(mj, mask);
}
}
ax[i] = a_x;
ay[i] = a_y;
az[i] = a_z;
pt[i] = p_t;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment