Created
March 23, 2012 02:43
-
-
Save dadeba/2166341 to your computer and use it in GitHub Desktop.
OpenCL: A vectorized kernel for gravity interaction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define READONLY_P const * restrict | |
__kernel | |
void | |
grav1( | |
__global float4 READONLY_P x, | |
__global float4 READONLY_P y, | |
__global float4 READONLY_P z, | |
__global float4 READONLY_P m, | |
__global float4 *ax, | |
__global float4 *ay, | |
__global float4 *az, | |
__global float4 *pt, | |
const int n, | |
const float eps2 | |
) | |
{ | |
unsigned int g_xid = get_global_id(0); | |
unsigned int g_yid = get_global_id(1); | |
unsigned int g_w = get_global_size(0); | |
unsigned int gid = g_yid*g_w + g_xid; | |
unsigned int i = gid; | |
float4 xi = x[i]; | |
float4 yi = y[i]; | |
float4 zi = z[i]; | |
float4 e2 = (float4)(eps2); | |
float4 a_x = (float4)(0.0f); | |
float4 a_y = (float4)(0.0f); | |
float4 a_z = (float4)(0.0f); | |
float4 p_t = (float4)(0.0f); | |
for(unsigned int j = 0; j < n/4; j++) { | |
float4 xj = x[j]; | |
float4 yj = y[j]; | |
float4 zj = z[j]; | |
float4 mj = m[j]; | |
for(unsigned int k = 0; k < 4; k++) { | |
float4 dx, dy, dz; | |
dx = xj - xi; | |
dy = yj - yi; | |
dz = zj - zi; | |
float4 r2 = dx*dx + dy*dy + dz*dz + e2; | |
float4 r1i = native_rsqrt(r2); | |
float4 r2i = r1i*r1i; | |
float4 r1im = mj*r1i; | |
float4 r3im = r1im*r2i; | |
a_x += dx*r3im; | |
a_y += dy*r3im; | |
a_z += dz*r3im; | |
p_t += -r1im; | |
uint4 mask = (uint4)(3, 0, 1, 2); | |
xj = shuffle(xj, mask); | |
yj = shuffle(yj, mask); | |
zj = shuffle(zj, mask); | |
mj = shuffle(mj, mask); | |
} | |
} | |
ax[i] = a_x; | |
ay[i] = a_y; | |
az[i] = a_z; | |
pt[i] = p_t; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment