Skip to content

Instantly share code, notes, and snippets.

@dmikushin
Last active March 28, 2016 22:15
Show Gist options
  • Save dmikushin/f2d56c5d6efcd4b0bea1 to your computer and use it in GitHub Desktop.
Save dmikushin/f2d56c5d6efcd4b0bea1 to your computer and use it in GitHub Desktop.
AVX-512 swizzle in native assembly for k1om (Intel Xeon Phi Knights Corner)
// AVX-512 swizzle in native assembly for k1om (Intel Xeon Phi Knights Corner)
//
// (c) 2016 Dmitry Mikushin dmitry@parallel-compute.org
//
// $ icc -no-gcc -mmic -O3 -std=c99 test_swizzle.c -o test_swizzle
// $ micnativeloadex ./test_swizzle
// 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 8.000000
// 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000
#include <immintrin.h>
#include <stdio.h>
#ifdef __GNUC__
/* Constants for register swizzle primitives. */
typedef enum {
_MM_SWIZ_REG_NONE, /* hgfe dcba - Nop */
#define _MM_SWIZ_REG_DCBA _MM_SWIZ_REG_NONE
_MM_SWIZ_REG_CDAB, /* ghef cdab - Swap pairs */
_MM_SWIZ_REG_BADC, /* fehg badc - Swap with two-away */
_MM_SWIZ_REG_AAAA, /* eeee aaaa - broadcast a element */
_MM_SWIZ_REG_BBBB, /* ffff bbbb - broadcast b element */
_MM_SWIZ_REG_CCCC, /* gggg cccc - broadcast c element */
_MM_SWIZ_REG_DDDD, /* hhhh dddd - broadcast d element */
_MM_SWIZ_REG_DACB /* hegf dacb - cross-product */
} _MM_SWIZZLE_ENUM;
#endif
inline __m512d mm512_swizzle_pd(__m512d val, _MM_SWIZZLE_ENUM imm)
{
#if 1
switch (imm)
{
case _MM_SWIZ_REG_DCBA : break;
case _MM_SWIZ_REG_CDAB : __asm__ ("vmovapd %0{{cdab}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_BADC : __asm__ ("vmovapd %0{{badc}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_AAAA : __asm__ ("vmovapd %0{{aaaa}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_BBBB : __asm__ ("vmovapd %0{{bbbb}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_CCCC : __asm__ ("vmovapd %0{{cccc}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_DDDD : __asm__ ("vmovapd %0{{dddd}}, %0" : "+x"(val)); break;
case _MM_SWIZ_REG_DACB : __asm__ ("vmovapd %0{{dacb}}, %0" : "+x"(val)); break;
}
return val;
#else
return _mm512_swizzle_pd(val, _MM_SWIZ_REG_CDAB);
#endif
}
static void _mm512_print_pd(__m512d val)
{
double values[8] __attribute__((aligned(64)));
_mm512_store_pd(values, val);
for (int i = 0; i < 8; i++)
printf("%f ", values[i]);
printf("\n");
}
int main()
{
double values[8] __attribute__((aligned(64))) = { 1, 2, 3, 4, 5, 6, 7, 8 };
__m512d val = _mm512_load_pd(values);
_mm512_print_pd(val);
val = mm512_swizzle_pd(val, _MM_SWIZ_REG_CDAB);
_mm512_print_pd(val);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment