Skip to content

Instantly share code, notes, and snippets.

@hi2p-perim
Created July 12, 2013 12:52
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hi2p-perim/5984231 to your computer and use it in GitHub Desktop.
Save hi2p-perim/5984231 to your computer and use it in GitHub Desktop.
Simple benchmark of 3D math libraries
//
// 3D math library benchmark
// Entries
// - glm 0.9.4.4
// -- http://glm.g-truc.net/
// - Eigen 3.1.3
// -- http://eigen.tuxfamily.org
// - DirectXMath
// -- http://msdn.microsoft.com/en-us/library/ee415571(v=vs.85).aspx
// -- http://code.msdn.microsoft.com/windowsdesktop/Direct3D-Tutorial-Win32-829979ef
// - vectormath (part of the Bullet library) 2.81
// -- http://www.bulletphysics.com/
//
#include <iostream>
#include <string>
#include <vector>
#include <functional>
#include <chrono>
#include <glm.hpp>
#include <gtx/simd_vec4.hpp>
#include <gtx/simd_mat4.hpp>
#include <gtc/type_ptr.hpp>
#include <DirectXMath.h>
#include <vmInclude.h>
#include <Eigen/Core>
using namespace DirectX;
namespace ch = std::chrono;
const int Iter = 1<<28;
void RunBench_GLM()
{
glm::vec4 v(1.0f);
glm::vec4 v2;
glm::mat4 m(1.0f);
for (int i = 0; i < Iter; i++)
{
v2 += m * v;
}
auto t = v2;
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_Eigen()
{
Eigen::Vector4f v(1.0f, 1.0f, 1.0f, 1.0f);
Eigen::Vector4f v2;
auto m = Eigen::Matrix4f::Identity();
for (int i = 0; i < Iter; i++)
{
v2 += m * v;
}
auto t = v2;
std::cout << t.x() << " " << t.y() << " " << t.z() << " " << t.w() << std::endl;
}
void RunBench_GLM_SIMD()
{
glm::detail::fvec4SIMD v(1.0f);
glm::detail::fvec4SIMD v2(0.0f);
glm::detail::fmat4x4SIMD m(1.0f);
for (int i = 0; i < Iter; i++)
{
v2 += v * m;
}
auto t = glm::vec4_cast(v2);
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_DirectXMath()
{
auto v = XMVectorReplicate(1.0f);
auto v2 = XMVectorReplicate(0.0f);
auto m = XMMatrixIdentity();
for (int i = 0; i < Iter; i++)
{
v2 += XMVector4Transform(v, m);
}
XMFLOAT4A t;
XMStoreFloat4A(&t, v2);
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_Bullet_VectorMath()
{
namespace vm = Vectormath::Aos;
vm::Vector4 v(1.0f);
vm::Vector4 v2(0.0f);
auto m = vm::Matrix4::identity();
for (int i = 0; i < Iter; i++)
{
v2 += m * v;
}
std::cout << v2[0] << " " << v2[1] << " " << v2[2] << " " << v2[3] << std::endl;
}
void RunBench_Double_GLM()
{
glm::dvec4 v(1.0);
glm::dvec4 v2;
glm::dmat4 m(1.0);
for (int i = 0; i < Iter; i++)
{
v2 += v * m;
}
auto t = v2;
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_Double_AVX()
{
__m256d v = _mm256_set_pd(1, 1, 1, 1);
__m256d s = _mm256_setzero_pd();
__m256d m[4] =
{
_mm256_set_pd(1, 0, 0, 0),
_mm256_set_pd(0, 1, 0, 0),
_mm256_set_pd(0, 0, 1, 0),
_mm256_set_pd(0, 0, 0, 1)
};
for (int i = 0; i < Iter; i++)
{
__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3));
__m256d m0 = _mm256_mul_pd(m[0], v0);
__m256d m1 = _mm256_mul_pd(m[1], v1);
__m256d m2 = _mm256_mul_pd(m[2], v2);
__m256d m3 = _mm256_mul_pd(m[3], v3);
__m256d a0 = _mm256_add_pd(m0, m1);
__m256d a1 = _mm256_add_pd(m2, m3);
__m256d a2 = _mm256_add_pd(a0, a1);
s = _mm256_add_pd(s, a2);
}
_declspec(align(16)) double t[4];
_mm256_store_pd(t, s);
std::cout << t[0] << " " << t[1] << " " << t[2] << " " << t[3] << std::endl;
}
int main()
{
std::vector<std::pair<std::string, std::function<void ()>>> benches;
benches.push_back(std::make_pair("GLM", RunBench_GLM));
benches.push_back(std::make_pair("Eigen", RunBench_Eigen));
benches.push_back(std::make_pair("GLM_SIMD", RunBench_GLM_SIMD));
benches.push_back(std::make_pair("DirectXMath", RunBench_DirectXMath));
benches.push_back(std::make_pair("Bullet_VectorMath", RunBench_Bullet_VectorMath));
benches.push_back(std::make_pair("Double_GLM", RunBench_Double_GLM));
benches.push_back(std::make_pair("Double_AVX", RunBench_Double_AVX));
for (auto& bench : benches)
{
std::cout << "Begin [ " << bench.first << " ]" << std::endl;
auto start = ch::high_resolution_clock::now();
bench.second();
auto end = ch::high_resolution_clock::now();
double elapsed = (double)ch::duration_cast<ch::milliseconds>(end - start).count() / 1000.0;
std::cout << "End [ " << bench.first << " ] : " << elapsed << " seconds" << std::endl;
}
std::cin.get();
return 0;
}
@neuro-sys
Copy link

Would be nice if you pasted the results as a comment or something!

@mathijs727
Copy link

mathijs727 commented May 19, 2020

Your AVX2 code is incorrect, the compiler warns (MSVC) or gives an error (clang) about this:

__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3));

shuffle_pd can only shuffle within the top 128 bits and bottom 128 bits, not across them. Hence the intermediate that you pass should also only be 4 bits ( _MM_SHUFFLE creates an 8 bit intermediate).
In this specific case because all values of v are the same it will result in the same answer but this code is will produce incorrect result if not every value in v is the same.

The correct code is:

__m256d v0 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 3, 3, 3));

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment