Created
February 12, 2016 08:01
-
-
Save zeux/218be90b7ce38c81777e to your computer and use it in GitHub Desktop.
View frustum culling optimization: Vectorize me
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdbool.h> | |
#include <spu_intrinsics.h> | |
struct matrix43_t | |
{ | |
vec_float4 row0; | |
vec_float4 row1; | |
vec_float4 row2; | |
vec_float4 row3; | |
}; | |
struct aabb_t | |
{ | |
vec_float4 min; | |
vec_float4 max; | |
}; | |
struct frustum_t | |
{ | |
vec_float4 planes[6]; | |
}; | |
static inline qword transform_point(qword p, const struct matrix43_t* mat) | |
{ | |
qword px = si_shufb(p, p, (qword)(vec_uint4)(0x00010203)); | |
qword py = si_shufb(p, p, (qword)(vec_uint4)(0x04050607)); | |
qword pz = si_shufb(p, p, (qword)(vec_uint4)(0x08090a0b)); | |
qword result = (qword)mat->row3; | |
result = si_fma(pz, (qword)mat->row2, result); | |
result = si_fma(py, (qword)mat->row1, result); | |
result = si_fma(px, (qword)mat->row0, result); | |
result = si_selb(result, ((qword)(vec_float4){0, 0, 0, 1}), ((qword)(vec_uint4){0, 0, 0, ~0})); | |
return result; | |
} | |
static inline float dot(qword lhs, qword rhs) | |
{ | |
qword mul = si_fm(lhs, rhs); | |
// two pairs of sums | |
qword mul_zwxy = si_rotqbyi(mul, 8); | |
qword sum_2 = si_fa(mul, mul_zwxy); | |
// single sum | |
qword sum_2y = si_rotqbyi(sum_2, 4); | |
qword sum_1 = si_fa(sum_2, sum_2y); | |
// return result | |
return si_to_float(sum_1); | |
} | |
__attribute__((noinline)) bool is_visible(struct matrix43_t* transform, struct aabb_t* aabb, struct frustum_t* frustum) | |
{ | |
qword min = (qword)aabb->min; | |
qword max = (qword)aabb->max; | |
// get aabb points | |
qword points[] = | |
{ | |
min, // x y z | |
si_selb(min, max, ((qword)(vec_uint4){~0, 0, 0, 0})), // X y z | |
si_selb(min, max, ((qword)(vec_uint4){~0, ~0, 0, 0})), // X Y z | |
si_selb(min, max, ((qword)(vec_uint4){0, ~0, 0, 0})), // x Y z | |
si_selb(min, max, ((qword)(vec_uint4){0, 0, ~0, 0})), // x y Z | |
si_selb(min, max, ((qword)(vec_uint4){~0, 0, ~0, 0})), // X y Z | |
max, // X Y Z | |
si_selb(min, max, ((qword)(vec_uint4){0, ~0, ~0, 0})), // x Y Z | |
}; | |
// transform points to world space | |
for (int i = 0; i < 8; ++i) | |
{ | |
points[i] = transform_point(points[i], transform); | |
} | |
// for each plane... | |
for (int i = 0; i < 6; ++i) | |
{ | |
bool inside = false; | |
qword plane = (qword)frustum->planes[i]; | |
for (int j = 0; j < 8; ++j) | |
{ | |
if (dot(points[j], plane) > 0) | |
{ | |
inside = true; | |
break; | |
} | |
} | |
if (!inside) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
// simple ortho frustum | |
struct frustum_t frustum = | |
{ | |
{ | |
{ 1, 0, 0, 10 }, | |
{ -1, 0, 0, 10 }, | |
{ 0, 1, 0, 10 }, | |
{ 0, -1, 0, 10 }, | |
{ 0, 0, 1, 10 }, | |
{ 0, 0, -1, 10 } | |
} | |
}; | |
// small box | |
struct aabb_t aabb = | |
{ | |
{ -1, -2, -3 }, | |
{ 1, 2, 3 } | |
}; | |
// and some weird matrix | |
struct matrix43_t transform = | |
{ | |
{ 0.123f, 0.456f, 0.789f }, | |
{ 0.456f, 0.123f, 0.789f }, | |
{ 0.789f, 0.123f, 0.456f }, | |
{ 1.f, -1.f, 1.f } | |
}; | |
void _start() | |
{ | |
is_visible(&transform, &aabb, &frustum); | |
si_stop(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment