This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Scalar Trilinear Interoplation Lambda using SSE. | |
auto trilerp_v = [&](int i0, int j0, int k0, float z, float y, float x) -> auto | |
{ | |
vec4 aa(this->getdata(i0, j0, k0), this->getdata(i0, j0 + 1, k0), this->getdata(i0 + 1, j0, k0), this->getdata(i0 + 1, j0 + 1, k0)); | |
vec4 bb(this->getdata(i0, j0, k0 + 1), this->getdata(i0, j0 + 1, k0 + 1), this->getdata(i0 + 1, j0, k0 + 1), this->getdata(i0 + 1, j0 + 1, k0 + 1)); | |
vec4 coeff_z(z); vec4 ccoeff_z(1.0f - z); | |
__m128 L_a = _mm_fmadd_ps(ccoeff_z.sa, aa.sa, _mm_mul_ps(coeff_z.sa, bb.sa)); | |
__m128 cc = _mm_shuffle_ps(L_a, L_a, _MM_SHUFFLE(1, 0, 1, 0)); // (z0,z1,|z0,z1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SSE Cross Product Test Implementations Test - | |
temp_vec3<float> a(1.0f, 2.0f, 3.0f); temp_vec3<float> b(4.0f, 5.0f, 6.0f); | |
for (std::size_t j = 0; j < 5; ++j) | |
{ | |
std::cout << "\nITER = " << j << "\n"; | |
// Version A) \\ | |
auto start = std::chrono::system_clock::now(); |