Last active
May 7, 2020 23:58
-
-
Save NiallHornFX/5c890a416229a584007d4938cf5150af to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SSE Cross Product Test Implementations Test - | |
temp_vec3<float> a(1.0f, 2.0f, 3.0f); temp_vec3<float> b(4.0f, 5.0f, 6.0f); | |
for (std::size_t j = 0; j < 5; ++j) | |
{ | |
std::cout << "\nITER = " << j << "\n"; | |
// Version A) \\ | |
auto start = std::chrono::system_clock::now(); | |
__m128 a_0 = _mm_shuffle_ps(a.sv, a.sv, _MM_SHUFFLE(3, 0, 2, 1)); // = a.y,a.z,a.x,0f | |
__m128 a_1 = _mm_shuffle_ps(a.sv, a.sv, _MM_SHUFFLE(3, 1, 0, 2)); // = a.z,a.x,a.y,0f | |
__m128 b_0 = _mm_shuffle_ps(b.sv, b.sv, _MM_SHUFFLE(3, 1, 0, 2)); // = b.z,b.x,b.y,0f | |
__m128 b_1 = _mm_shuffle_ps(b.sv, b.sv, _MM_SHUFFLE(3, 0, 2, 1)); // = b.y,b.z,b.x,0f | |
// Actual CP Math - ( a_0 * b_0 ) - ( a_1 * b_1 ) | |
__m128 res_A = _mm_sub_ps(_mm_mul_ps(a_0, b_0), _mm_mul_ps(a_1, b_1)); // (a.y,a.z,a.x,0f * b.z,b.x,b.y,0f) - (a.z,a.x,a.y,0f * b.y,b.z,b.x,0f) | |
auto end = std::chrono::system_clock::now(); | |
//for (std::size_t i = 0; i < 3; ++i) std::cout << res_A.m128_f32[i] << " | "; | |
std::cout << "\n ============== SSE Cross Product - Version A =============\n"; | |
std::cout << "Calc Time = " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << "ns\n"; | |
std::cout << "Calc Time = " << std::fixed << std::setprecision(8) << double((std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)).count()) / double(1e+09) << "s\n"; | |
// Version B) \\ | |
start = std::chrono::system_clock::now(); | |
__m128 tmp0 = _mm_shuffle_ps(b.sv, b.sv, _MM_SHUFFLE(3, 0, 2, 1)); | |
// B --> tmp0 = (by,bz,bx,0) | |
__m128 tmp1 = _mm_shuffle_ps(a.sv, a.sv, _MM_SHUFFLE(3, 0, 2, 1)); | |
// A -- > tmp1 = (ay,az,ax,0) | |
tmp0 = _mm_mul_ps(tmp0, a.sv); | |
// tmp0 = (by * ax, bz * ay, bx * az, 0) | |
tmp1 = _mm_mul_ps(tmp1, b.sv); | |
// tmp1 = (ay * bx, az * by, ax * bz, 0) | |
__m128 tmp2 = _mm_sub_ps(tmp0, tmp1); | |
// tmp2 = ( ((by * ax) - (ay * bx)) , ((bz * ay) - (az * by)) , ((bx * az) - ax * bz), (0-0)) | |
__m128 res_B = _mm_shuffle_ps(tmp2, tmp2, _MM_SHUFFLE(3, 0, 2, 1)); // Have to shuffle to correct x,yz return order. | |
// res = ( ((bz * ay) - (az * by)), ((bx * az) - (ax * bz)), ((by * ax) - (ay * bx)), (0-0)) | |
end = std::chrono::system_clock::now(); | |
// Debug __m128s. | |
//for (std::size_t j = 0; j < 3; ++j) std::cout << res_B.m128_f32[j] << " | "; | |
std::cout << "\n ============== SSE Cross Product - Version B =============\n"; | |
std::cout << "Calc Time = " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << "ns\n"; | |
std::cout << "Calc Time = " << std::fixed << std::setprecision(8) << double((std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)).count()) / double(1e+09) << "s\n"; | |
} | |
// temp_vec3<T> | |
template <class T> | |
struct temp_vec3 | |
{ | |
temp_vec3() = delete; | |
temp_vec3(T xx, T yy, T zz) : x(xx), y(yy), z(zz) {} | |
explicit temp_vec3(const T *a) { v = a }; | |
explicit temp_vec3(__m128 ssv) : sv(ssv) {} | |
union | |
{ | |
struct { T x, y, z; }; | |
T v[3]; | |
__m128 sv; | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment