Last active
March 24, 2024 09:20
-
-
Save bellbind/38d7ae4c32d6babda916 to your computer and use it in GitHub Desktop.
[c][avx][simd]example for intel SSE/AVX intrinsic api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang -std=c11 -mavx avxvec.c -o avxvec | |
#include <x86intrin.h> | |
#include <stdio.h> | |
// see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX | |
int main() { | |
double mem[4] = {1.0, 2.0, 3.0, 4.0}; | |
//__m256d v = _mm256_load_pd(mem); | |
__m256d v = _mm256_set_pd(1, 2, 3, 4); // (e3, e2, e1, e1) | |
__m256d sq = _mm256_mul_pd(v, v); | |
_mm256_store_pd(mem, sq); | |
printf("%f, %f , %f, %f\n", mem[0], mem[1], mem[2], mem[3]); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang -std=c11 -msse3 mandel.c -o mandel | |
#include <stdio.h> | |
#include <complex.h> | |
#include <x86intrin.h> | |
// basic arith for complex functions with SIMD | |
typedef __m128d float64x2; | |
static inline double cnorm(const float64x2 z) { | |
const float64x2 sq = _mm_mul_pd(z, z); | |
const float64x2 norm = _mm_hadd_pd(sq, sq); | |
double mem[2]; | |
_mm_store_pd(mem, norm); | |
return mem[0]; | |
} | |
static inline float64x2 cadd(const float64x2 a, const float64x2 b) { | |
return _mm_add_pd(a, b); | |
} | |
static inline float64x2 cmul(const float64x2 a, const float64x2 b) { | |
// r = [a0*b0 - a1*b1, a0*b1 + a1*b0] | |
// r = addsub(c=[a0*b0, a0*b1], d=[a1*b1, a1*b0]) | |
// c = [a0, a0] * [b0, b1], d = [a1, a1] * [b1, b0] | |
//return _mm_addsub_pd(_mm_mul_pd(_mm_unpacklo_pd(a, a), b), | |
// _mm_mul_pd(_mm_unpackhi_pd(a, a), | |
// _mm_shuffle_pd(b, b, 0b01))); | |
const float64x2 a0 = _mm_unpacklo_pd(a, a); | |
const float64x2 c = _mm_mul_pd(a0, b); // as a0 | |
const float64x2 a1 = _mm_unpackhi_pd(a, a); // as a | |
//e.g.: shuffle(a, b, 0b01) => [a1, b0], shuffle(a, b, 0b10) => [a0, b1] | |
const float64x2 b_ = _mm_shuffle_pd(b, b, 0b01);// as b | |
const float64x2 d = _mm_mul_pd(a1, b_); // as a | |
return _mm_addsub_pd(c, d); | |
} | |
// from standard complex type in C99 | |
static inline float64x2 c2m(const double complex z) { | |
// set_pd(e1, e0) => [e0, e1] | |
return _mm_set_pd(cimag(z), creal(z)); | |
} | |
static inline double complex m2c(const float64x2 m) { | |
double mem[2]; | |
_mm_store_pd(mem, m); | |
return mem[0] + mem[1]*I; | |
} | |
static inline float64x2 d2m(const double re, const double im) { | |
return _mm_set_pd(im, re); | |
} | |
// mandelbrot | |
static int mandel(const float64x2 c) { | |
float64x2 z = c; | |
for (int i = 0; i < 50; i++) { | |
if (cnorm(z) > 4.0) return i; | |
z = cadd(cmul(z, z), c); | |
} | |
return -1; | |
} | |
static char plot(int n) { | |
if (n == -1) return '@'; | |
if (n > 10) return '*'; | |
if (n > 5) return '+'; | |
if (n > 3) return '-'; | |
if (n > 2) return '.'; | |
return ' '; | |
} | |
extern int main() { | |
const int w = 80, h = 24; | |
for (int y = 0; y < h; y++) { | |
for (int x = 0; x < w; x++) { | |
const double re = 2.0 * (2.0 * x / w - 1.0), im = (2.0 * y / h - 1.0); | |
putchar(plot(mandel(d2m(re, im)))); | |
} | |
putchar('\n'); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment