Skip to content

Instantly share code, notes, and snippets.

@zeux
Created February 12, 2016 08:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zeux/218be90b7ce38c81777e to your computer and use it in GitHub Desktop.
Save zeux/218be90b7ce38c81777e to your computer and use it in GitHub Desktop.
View frustum culling optimization: Vectorize me
#include <stdbool.h>
#include <spu_intrinsics.h>
struct matrix43_t
{
vec_float4 row0;
vec_float4 row1;
vec_float4 row2;
vec_float4 row3;
};
struct aabb_t
{
vec_float4 min;
vec_float4 max;
};
struct frustum_t
{
vec_float4 planes[6];
};
static inline qword transform_point(qword p, const struct matrix43_t* mat)
{
qword px = si_shufb(p, p, (qword)(vec_uint4)(0x00010203));
qword py = si_shufb(p, p, (qword)(vec_uint4)(0x04050607));
qword pz = si_shufb(p, p, (qword)(vec_uint4)(0x08090a0b));
qword result = (qword)mat->row3;
result = si_fma(pz, (qword)mat->row2, result);
result = si_fma(py, (qword)mat->row1, result);
result = si_fma(px, (qword)mat->row0, result);
result = si_selb(result, ((qword)(vec_float4){0, 0, 0, 1}), ((qword)(vec_uint4){0, 0, 0, ~0}));
return result;
}
static inline float dot(qword lhs, qword rhs)
{
qword mul = si_fm(lhs, rhs);
// two pairs of sums
qword mul_zwxy = si_rotqbyi(mul, 8);
qword sum_2 = si_fa(mul, mul_zwxy);
// single sum
qword sum_2y = si_rotqbyi(sum_2, 4);
qword sum_1 = si_fa(sum_2, sum_2y);
// return result
return si_to_float(sum_1);
}
__attribute__((noinline)) bool is_visible(struct matrix43_t* transform, struct aabb_t* aabb, struct frustum_t* frustum)
{
qword min = (qword)aabb->min;
qword max = (qword)aabb->max;
// get aabb points
qword points[] =
{
min, // x y z
si_selb(min, max, ((qword)(vec_uint4){~0, 0, 0, 0})), // X y z
si_selb(min, max, ((qword)(vec_uint4){~0, ~0, 0, 0})), // X Y z
si_selb(min, max, ((qword)(vec_uint4){0, ~0, 0, 0})), // x Y z
si_selb(min, max, ((qword)(vec_uint4){0, 0, ~0, 0})), // x y Z
si_selb(min, max, ((qword)(vec_uint4){~0, 0, ~0, 0})), // X y Z
max, // X Y Z
si_selb(min, max, ((qword)(vec_uint4){0, ~0, ~0, 0})), // x Y Z
};
// transform points to world space
for (int i = 0; i < 8; ++i)
{
points[i] = transform_point(points[i], transform);
}
// for each plane...
for (int i = 0; i < 6; ++i)
{
bool inside = false;
qword plane = (qword)frustum->planes[i];
for (int j = 0; j < 8; ++j)
{
if (dot(points[j], plane) > 0)
{
inside = true;
break;
}
}
if (!inside)
{
return false;
}
}
return true;
}
// simple ortho frustum
struct frustum_t frustum =
{
{
{ 1, 0, 0, 10 },
{ -1, 0, 0, 10 },
{ 0, 1, 0, 10 },
{ 0, -1, 0, 10 },
{ 0, 0, 1, 10 },
{ 0, 0, -1, 10 }
}
};
// small box
struct aabb_t aabb =
{
{ -1, -2, -3 },
{ 1, 2, 3 }
};
// and some weird matrix
struct matrix43_t transform =
{
{ 0.123f, 0.456f, 0.789f },
{ 0.456f, 0.123f, 0.789f },
{ 0.789f, 0.123f, 0.456f },
{ 1.f, -1.f, 1.f }
};
void _start()
{
is_visible(&transform, &aabb, &frustum);
si_stop(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment