Created Jun 5, 2019

A quick test of small Eigen vector classes. Tested on visual Studio 15.9.2, release 64 bit.
 #include "stdafx.h" // A quick test of small Eigen vector classes. Tested on visual Studio 15.9.2, release 64 bit. #define EIGEN_VECTORIZE_SSE #define EIGEN_VECTORIZE_SSE2 #define EIGEN_VECTORIZE_SSE3 #define EIGEN_VECTORIZE_SSE4_1 #include "Eigen/Dense" #include #include using Eigen::Vector3f; using Eigen::Vector4f; using Float3 = DirectX::XMFLOAT3; using Float4 = DirectX::XMFLOAT4; __forceinline Vector3f load( const Float3& v ) { return Vector3f{ v.x, v.y, v.z }; } __forceinline Vector4f load( const Float4& v ) { return Vector4f{ v.x, v.y, v.z, v.w }; } float __declspec( noinline, dllexport ) testVectorMath3( const Float3& fa, const Float3& fb ) { const Vector3f a = load( fa ); const Vector3f b = load( fb ); __debugbreak(); // This code is not vectorized. Compiles into stuff like mulss, addss, subss, even with /fp:fast, produces 26 instructions. // Either there's problem in Eigen or VC++, or it's just not complex enough, hard to tell. const Vector3f c = a.array() * b.array(); const Vector3f d = a + b + c; const Vector3f e = c.array() * d.array(); return d.dot( e ); } float __declspec( noinline, dllexport ) testVectorMath4( const Float4& fa, const Float4& fb ) { const Vector4f a = load( fa ); const Vector4f b = load( fb ); __debugbreak(); // This code is vectorized fine, compiles into stuff like mulps, addps, shufps, only emits 14 instructions. const Vector4f c = a.array() * b.array(); const Vector4f d = a + b + c; const Vector4f e = c.array() * d.array(); return d.dot( e ); } void test3() { const Float3 a{ 1, 2, 3 }, b{ 4, 5, 6 }; const float res = testVectorMath3( a, b ); printf( "result 3: %f\n", res ); } void test4() { const Float4 a{ 1, 2, 3, 4 }, b{ 5, 6, 7, 8 }; const float res = testVectorMath4( a, b ); printf( "result 4: %f\n", res ); } int main() { test3(); test4(); return 0; }
