Skip to content

Instantly share code, notes, and snippets.

@dwilliamson
Created December 11, 2012 19:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dwilliamson/4261550 to your computer and use it in GitHub Desktop.
Save dwilliamson/4261550 to your computer and use it in GitHub Desktop.
SSE Shuffle helpers
namespace simd
{
typedef __m128i v128i;
typedef __m128d v128d;
typedef __m128 v128f;
enum VectorSelect
{
A0, A1, A2, A3, A4, A5, A6, A7,
B0, B1, B2, B3, B4, B5, B6, B7,
};
template <VectorSelect S0, VectorSelect S1, VectorSelect S2, VectorSelect S3>
inline v128f shuffle_ps(v128f x, v128f y)
{
STATIC_ASSERT(S0 >= A0 && S0 <= A3);
STATIC_ASSERT(S1 >= A0 && S1 <= A3);
STATIC_ASSERT(S2 >= B0 && S2 <= B3);
STATIC_ASSERT(S3 >= B0 && S3 <= B3);
return _mm_shuffle_ps(x, y, S0 + S1 * 4 + (S2 - B0) * 16 + (S3 - B0) * 64);
}
template<VectorSelect S0, VectorSelect S1, VectorSelect S2, VectorSelect S3>
inline v128f blend_ps(v128f x, v128f y)
{
STATIC_ASSERT(S0 == A0 || S0 == B0);
STATIC_ASSERT(S1 == A1 || S1 == B1);
STATIC_ASSERT(S2 == A2 || S2 == B2);
STATIC_ASSERT(S3 == A3 || S3 == B3);
return _mm_blend_ps(x, y, (S0 / B0) * 1 + (S1 / B1) * 2 + (S2 / B2) * 4 + (S3 / B3) * 8);
}
template<VectorSelect S0, VectorSelect S1, VectorSelect S2>
inline v128f blend_ps(v128f x, v128f y)
{
STATIC_ASSERT(S0 == A0 || S0 == B0);
STATIC_ASSERT(S1 == A1 || S1 == B1);
STATIC_ASSERT(S2 == A2 || S2 == B2);
return _mm_blend_ps(x, y, (S0 / B0) * 1 + (S1 / B1) * 2 + (S2 / B2) * 4);
}
template <VectorSelect S0, VectorSelect S1, VectorSelect S2, VectorSelect S3>
inline v128i shuffle_epi32(v128i x)
{
STATIC_ASSERT(S0 >= A0 && S0 <= A3);
STATIC_ASSERT(S1 >= A0 && S1 <= A3);
STATIC_ASSERT(S2 >= A0 && S2 <= A3);
STATIC_ASSERT(S3 >= A0 && S3 <= A3);
return _mm_shuffle_epi32(x, S0 + S1 * 4 + S2 * 16 + S3 * 64);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment