Last active
March 28, 2016 22:15
-
-
Save dmikushin/f2d56c5d6efcd4b0bea1 to your computer and use it in GitHub Desktop.
AVX-512 swizzle in native assembly for k1om (Intel Xeon Phi Knights Corner)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// AVX-512 swizzle in native assembly for k1om (Intel Xeon Phi Knights Corner) | |
// | |
// (c) 2016 Dmitry Mikushin dmitry@parallel-compute.org | |
// | |
// $ icc -no-gcc -mmic -O3 -std=c99 test_swizzle.c -o test_swizzle | |
// $ micnativeloadex ./test_swizzle | |
// 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 8.000000 | |
// 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000 8.000000 | |
#include <immintrin.h> | |
#include <stdio.h> | |
#ifdef __GNUC__ | |
/* Constants for register swizzle primitives. */ | |
typedef enum { | |
_MM_SWIZ_REG_NONE, /* hgfe dcba - Nop */ | |
#define _MM_SWIZ_REG_DCBA _MM_SWIZ_REG_NONE | |
_MM_SWIZ_REG_CDAB, /* ghef cdab - Swap pairs */ | |
_MM_SWIZ_REG_BADC, /* fehg badc - Swap with two-away */ | |
_MM_SWIZ_REG_AAAA, /* eeee aaaa - broadcast a element */ | |
_MM_SWIZ_REG_BBBB, /* ffff bbbb - broadcast b element */ | |
_MM_SWIZ_REG_CCCC, /* gggg cccc - broadcast c element */ | |
_MM_SWIZ_REG_DDDD, /* hhhh dddd - broadcast d element */ | |
_MM_SWIZ_REG_DACB /* hegf dacb - cross-product */ | |
} _MM_SWIZZLE_ENUM; | |
#endif | |
inline __m512d mm512_swizzle_pd(__m512d val, _MM_SWIZZLE_ENUM imm) | |
{ | |
#if 1 | |
switch (imm) | |
{ | |
case _MM_SWIZ_REG_DCBA : break; | |
case _MM_SWIZ_REG_CDAB : __asm__ ("vmovapd %0{{cdab}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_BADC : __asm__ ("vmovapd %0{{badc}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_AAAA : __asm__ ("vmovapd %0{{aaaa}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_BBBB : __asm__ ("vmovapd %0{{bbbb}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_CCCC : __asm__ ("vmovapd %0{{cccc}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_DDDD : __asm__ ("vmovapd %0{{dddd}}, %0" : "+x"(val)); break; | |
case _MM_SWIZ_REG_DACB : __asm__ ("vmovapd %0{{dacb}}, %0" : "+x"(val)); break; | |
} | |
return val; | |
#else | |
return _mm512_swizzle_pd(val, _MM_SWIZ_REG_CDAB); | |
#endif | |
} | |
static void _mm512_print_pd(__m512d val) | |
{ | |
double values[8] __attribute__((aligned(64))); | |
_mm512_store_pd(values, val); | |
for (int i = 0; i < 8; i++) | |
printf("%f ", values[i]); | |
printf("\n"); | |
} | |
int main() | |
{ | |
double values[8] __attribute__((aligned(64))) = { 1, 2, 3, 4, 5, 6, 7, 8 }; | |
__m512d val = _mm512_load_pd(values); | |
_mm512_print_pd(val); | |
val = mm512_swizzle_pd(val, _MM_SWIZ_REG_CDAB); | |
_mm512_print_pd(val); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment