Skip to content

Instantly share code, notes, and snippets.

@mntone
Last active December 26, 2019 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mntone/f04afa0cb26c2bf7bb0aaffd36065a6a to your computer and use it in GitHub Desktop.
Save mntone/f04afa0cb26c2bf7bb0aaffd36065a6a to your computer and use it in GitHub Desktop.
ゴリ押しやった記念。もっといい書き方あったので本番コードはそれを採用。でもこれだけ並んでるのも楽しいよね。 // MIT License
typedef __m128i uint16x8_t;
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return a; }
static inline uint16x8_t _mm_alignr_epi8_sse2(uint16x8_t a, uint16x8_t b, int align) {
return _mm_or_si128(_mm_slli_si128(a, 16 - 2 * align), _mm_srli_si128(b, 2 * align));
}
#if defined(_SIMD_X86_SSSE3)
template<> inline uint16x8_t uint16x8_swizzle<7, 0, 1, 2, 3, 4, 5, 6>(uint16x8_t a) { return _mm_alignr_epi8(a, a, 2); }
template<> inline uint16x8_t uint16x8_swizzle<5, 6, 7, 0, 1, 2, 3, 4>(uint16x8_t a) { return _mm_alignr_epi8(a, a, 6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 4, 5, 6, 7, 0, 1, 2>(uint16x8_t a) { return _mm_alignr_epi8(a, a, 10); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 4, 5, 6, 7, 0>(uint16x8_t a) { return _mm_alignr_epi8(a, a, 14); }
#elif defined(_SIMD_X86_SSE2)
template<> inline uint16x8_t uint16x8_swizzle<7, 0, 1, 2, 3, 4, 5, 6>(uint16x8_t a) { return _mm_alignr_epi8_sse2(a, a, 2); }
template<> inline uint16x8_t uint16x8_swizzle<5, 6, 7, 0, 1, 2, 3, 4>(uint16x8_t a) { return _mm_alignr_epi8_sse2(a, a, 6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 4, 5, 6, 7, 0, 1, 2>(uint16x8_t a) { return _mm_alignr_epi8_sse2(a, a, 10); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 4, 5, 6, 7, 0>(uint16x8_t a) { return _mm_alignr_epi8_sse2(a, a, 14); }
#endif
#if defined(_SIMD_X86_SSE2)
// ---
// 32-bit Unpack
// ---
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 2, 3, 2, 3>(uint16x8_t a) { return _mm_unpacklo_epi32(a, a); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 6, 7, 6, 7>(uint16x8_t a) { return _mm_unpackhi_epi32(a, a); }
// ---
// 64-bit Unpack
// ---
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 0, 1, 2, 3>(uint16x8_t a) { return _mm_unpacklo_epi64(a, a); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 4, 5, 6, 7>(uint16x8_t a) { return _mm_unpackhi_epi64(a, a); }
// ---
// 16-bit Shuffle Low
// ---
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x00); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x01); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x02); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x03); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x04); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x05); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x06); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x07); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x08); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x09); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 0, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x0F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x10); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x11); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x12); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x13); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x14); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x15); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x16); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x17); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x18); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x19); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1A); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x1F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x20); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x21); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x22); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x23); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x24); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x25); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x26); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x27); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x28); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x29); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 2, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x2F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x30); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x31); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x32); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x33); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x34); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x35); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x36); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x37); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x38); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x39); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 3, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x3F); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x40); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x41); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x42); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x43); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x44); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x45); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x46); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x47); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x48); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x49); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4A); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4B); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4C); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4D); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4E); }
template<> inline uint16x8_t uint16x8_swizzle<1, 0, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x4F); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x50); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x51); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x52); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x53); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x54); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x55); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x56); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x57); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x58); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x59); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5A); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5B); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5C); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5D); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5E); }
template<> inline uint16x8_t uint16x8_swizzle<1, 1, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x5F); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x60); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x61); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x62); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x63); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x64); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x65); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x66); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x67); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x68); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x69); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6A); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6B); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6C); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6D); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6E); }
template<> inline uint16x8_t uint16x8_swizzle<1, 2, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x6F); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x70); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x71); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x72); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x73); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x74); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x75); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x76); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x77); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x78); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x79); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7A); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7B); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7C); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7D); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7E); }
template<> inline uint16x8_t uint16x8_swizzle<1, 3, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x7F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x80); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x81); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x82); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x83); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x84); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x85); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x86); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x87); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x88); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x89); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 0, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x8F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x90); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x91); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x92); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x93); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x94); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x95); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x96); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x97); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x98); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x99); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 1, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0x9F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA0); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA1); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA2); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA3); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA4); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA5); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA6); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA7); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA8); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xA9); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAA); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAB); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAC); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAD); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAE); }
template<> inline uint16x8_t uint16x8_swizzle<2, 2, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xAF); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB0); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB1); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB2); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB3); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB4); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB5); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB6); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB7); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB8); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xB9); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBA); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBB); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBC); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBD); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBE); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xBF); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC0); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC1); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC2); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC3); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC4); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC5); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC7); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC8); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xC9); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCA); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCB); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCC); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCD); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCE); }
template<> inline uint16x8_t uint16x8_swizzle<3, 0, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xCF); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD0); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD1); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD2); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD3); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD4); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD5); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD7); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD8); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xD9); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDA); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDB); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDC); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDD); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDE); }
template<> inline uint16x8_t uint16x8_swizzle<3, 1, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xDF); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE0); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE1); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE2); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE3); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE4); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE5); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE7); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE8); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xE9); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xEA); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xEB); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xEC); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xED); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xEE); }
template<> inline uint16x8_t uint16x8_swizzle<3, 2, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xEF); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 0, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF0); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF1); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 0, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF2); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 0, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF3); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 1, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF4); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 1, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF5); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 1, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF6); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 1, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF7); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 2, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF8); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 2, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xF9); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 2, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFA); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFB); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 3, 0, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFC); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 3, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFD); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 3, 2, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFE); }
template<> inline uint16x8_t uint16x8_swizzle<3, 3, 3, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflelo_epi16(a, 0xFF); }
// ---
// 16-bit Shuffle High
// ---
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x00); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x01); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x02); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x03); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x04); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x05); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x06); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x07); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x08); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x09); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 4, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x0F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x10); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x11); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x12); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x13); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x15); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x16); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x17); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x18); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x18); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x19); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1A); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x1F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x20); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x21); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x22); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x23); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x24); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x25); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x26); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x27); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x28); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x29); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 6, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x2F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x30); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x31); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x32); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x33); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x34); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x35); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x36); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x37); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x38); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x39); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 7, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x3F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x40); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x41); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x42); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x43); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x44); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x45); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x46); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x47); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x48); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x49); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 4, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x4F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x50); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x51); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x52); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x53); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x54); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x55); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x56); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x57); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x58); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x59); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 5, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x5F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x60); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x61); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x62); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x63); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x64); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x65); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x66); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x67); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x68); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x69); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 6, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x6F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x70); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x71); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x72); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x73); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x74); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x75); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x76); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x77); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x78); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x79); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 5, 7, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x7F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x80); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x81); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x82); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x83); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x84); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x85); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x86); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x87); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x88); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x89); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 4, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x8F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x90); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x91); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x92); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x93); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x94); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x95); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x96); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x97); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x98); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x99); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 5, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0x9F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xA9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAD); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 6, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xAF); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xB9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBD); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xBF); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xC9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCD); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 4, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xCF); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xD9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDD); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 5, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xDF); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xE9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xEA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xEB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xEC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xED); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xEE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 6, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xEF); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 4, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF0); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 4, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF1); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 4, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF2); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 4, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF3); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 5, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF4); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 5, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF5); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 5, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF6); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 5, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF7); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 6, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF8); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 6, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xF9); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 6, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFA); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 6, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFB); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 7, 4>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFC); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 7, 5>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFD); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 7, 6>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFE); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 7, 7, 7, 7>(uint16x8_t a) { return _mm_shufflehi_epi16(a, 0xFF); }
// ---
// 32-bit Shuffle
// ---
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x00); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x01); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x02); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x03); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x04); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x05); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x06); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x07); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x08); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x09); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 0, 1, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x0F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x10); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x11); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x12); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x13); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x14); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x15); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x16); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x17); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x18); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x19); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1A); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1D); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1E); }
//template<> inline uint16x8_t uint16x8_swizzle<0, 1, 2, 3, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x1F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x20); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x21); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x22); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x23); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x24); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x25); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x26); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x27); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x28); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x29); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 4, 5, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x2F); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x30); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x31); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x32); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x33); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x34); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x35); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x36); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x37); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x38); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x39); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3A); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3B); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3C); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3D); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3E); }
template<> inline uint16x8_t uint16x8_swizzle<0, 1, 6, 7, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x3F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x40); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x41); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x42); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x43); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x44); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x45); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x46); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x47); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x48); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x49); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 0, 1, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x4F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x50); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x51); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x52); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x53); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x54); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x55); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x56); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x57); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x58); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x59); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 4, 5, 5, 6>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 2, 3, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x5F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x60); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x61); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x62); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x63); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x64); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x65); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x66); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x67); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x68); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x69); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 4, 5, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x6F); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x70); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x71); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x72); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x73); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x74); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x75); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x76); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x77); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x78); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x79); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7A); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7B); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7C); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7D); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7E); }
template<> inline uint16x8_t uint16x8_swizzle<2, 3, 6, 7, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x7F); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x80); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x81); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x82); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x83); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x84); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x85); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x86); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x87); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x88); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x89); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8A); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8B); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8C); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8D); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8E); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 0, 1, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x8F); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x90); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x91); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x92); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x93); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x94); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x95); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x96); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x97); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x98); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x99); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9A); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 4, 5, 5, 6>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9B); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9C); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9D); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9E); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 2, 3, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0x9F); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA0); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA1); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA2); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA3); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA4); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA5); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA6); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA7); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA8); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xA9); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAA); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAB); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAC); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAD); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAE); }
//template<> inline uint16x8_t uint16x8_swizzle<4, 5, 4, 5, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xAF); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB0); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB1); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB2); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB3); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB4); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB5); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB6); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB7); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB8); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xB9); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBA); }
//template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBB); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBC); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBD); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBE); }
template<> inline uint16x8_t uint16x8_swizzle<4, 5, 6, 7, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xBF); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC0); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC1); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC2); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC3); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC4); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC5); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC6); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC7); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC8); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xC9); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCA); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCB); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCC); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCD); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCE); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 0, 1, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xCF); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD0); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD1); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD2); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD3); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD4); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD5); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD6); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD7); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD8); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xD9); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDA); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 4, 5, 5, 6>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDB); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDC); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDD); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDE); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 2, 3, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xDF); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE0); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE1); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE2); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE3); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE4); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE5); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE6); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE7); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE8); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xE9); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xEA); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xEB); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xEC); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xED); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xEE); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 4, 5, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xEF); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 0, 1, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF0); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 0, 1, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF1); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 0, 1, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF2); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 0, 1, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF3); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 2, 3, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF4); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 2, 3, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF5); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 2, 3, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF6); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 2, 3, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF7); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 4, 5, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF8); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 4, 5, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xF9); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 4, 5, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFA); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 4, 5, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFB); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 6, 7, 0, 1>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFC); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 6, 7, 2, 3>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFD); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 6, 7, 4, 5>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFE); }
template<> inline uint16x8_t uint16x8_swizzle<6, 7, 6, 7, 6, 7, 6, 7>(uint16x8_t a) { return _mm_shuffle_epi32(a, 0xFF); }
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment