Skip to content

Instantly share code, notes, and snippets.

@armintoepfer
Last active October 27, 2015 09:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save armintoepfer/00c3ee53cd4ff48204b7 to your computer and use it in GitHub Desktop.
Save armintoepfer/00c3ee53cd4ff48204b7 to your computer and use it in GitHub Desktop.
mm512 exp KNC implementation
#include <immintrin.h>
static const __m512d c256 = _mm512_set1_pd(1.0 / 256.0);
static const __m512d c0 = _mm512_set1_pd(1);
static const __m512d c1 = _mm512_set1_pd(1.00000000006177459);
static const __m512d c2 = _mm512_set1_pd(0.49999988007542528);
static const __m512d c3 = _mm512_set1_pd(0.16666663108604157);
static const __m512d c4 = _mm512_set1_pd(0.041694294620381676);
static const __m512d c5 = _mm512_set1_pd(0.0083383426505236529);
__m512 exp512_ps(__m512 x)
{
__m512d t0 = _mm512_cvtpslo_pd(x);
__m512d t1 = _mm512_cvtpslo_pd(_mm512_permute4f128_ps(x, _MM_PERM_BADC));
__m512d y0, y1;
t0 = _mm512_mul_pd(t0, c256);
t1 = _mm512_mul_pd(t1, c256);
y0 = _mm512_fmadd_pd(c5, t0, c4);
y1 = _mm512_fmadd_pd(c5, t1, c4);
y0 = _mm512_fmadd_pd(y0, t0, c3);
y1 = _mm512_fmadd_pd(y1, t1, c3);
y0 = _mm512_fmadd_pd(y0, t0, c2);
y1 = _mm512_fmadd_pd(y1, t1, c2);
y0 = _mm512_fmadd_pd(y0, t0, c1);
y1 = _mm512_fmadd_pd(y1, t1, c1);
y0 = _mm512_fmadd_pd(y0, t0, c0);
y1 = _mm512_fmadd_pd(y1, t1, c0);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
y0 = _mm512_mul_pd(y0, y0); y1 = _mm512_mul_pd(y1, y1);
__m512 out = _mm512_cvtpd_pslo(y0);
__m512 out2 = _mm512_cvtpd_pslo(y1);
out = _mm512_mask_permute4f128_ps(out, 0xff00, out2, _MM_PERM_BADC);
return m512f(out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment