Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active March 24, 2024 09:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bellbind/38d7ae4c32d6babda916 to your computer and use it in GitHub Desktop.
Save bellbind/38d7ae4c32d6babda916 to your computer and use it in GitHub Desktop.
[c][avx][simd]example for intel SSE/AVX intrinsic api
// clang -std=c11 -mavx avxvec.c -o avxvec
#include <x86intrin.h>
#include <stdio.h>
// see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX
int main() {
double mem[4] = {1.0, 2.0, 3.0, 4.0};
//__m256d v = _mm256_load_pd(mem);
__m256d v = _mm256_set_pd(1, 2, 3, 4); // (e3, e2, e1, e1)
__m256d sq = _mm256_mul_pd(v, v);
_mm256_store_pd(mem, sq);
printf("%f, %f , %f, %f\n", mem[0], mem[1], mem[2], mem[3]);
return 0;
}
// clang -std=c11 -msse3 mandel.c -o mandel
#include <stdio.h>
#include <complex.h>
#include <x86intrin.h>
// basic arith for complex functions with SIMD
typedef __m128d float64x2;
static inline double cnorm(const float64x2 z) {
const float64x2 sq = _mm_mul_pd(z, z);
const float64x2 norm = _mm_hadd_pd(sq, sq);
double mem[2];
_mm_store_pd(mem, norm);
return mem[0];
}
static inline float64x2 cadd(const float64x2 a, const float64x2 b) {
return _mm_add_pd(a, b);
}
static inline float64x2 cmul(const float64x2 a, const float64x2 b) {
// r = [a0*b0 - a1*b1, a0*b1 + a1*b0]
// r = addsub(c=[a0*b0, a0*b1], d=[a1*b1, a1*b0])
// c = [a0, a0] * [b0, b1], d = [a1, a1] * [b1, b0]
//return _mm_addsub_pd(_mm_mul_pd(_mm_unpacklo_pd(a, a), b),
// _mm_mul_pd(_mm_unpackhi_pd(a, a),
// _mm_shuffle_pd(b, b, 0b01)));
const float64x2 a0 = _mm_unpacklo_pd(a, a);
const float64x2 c = _mm_mul_pd(a0, b); // as a0
const float64x2 a1 = _mm_unpackhi_pd(a, a); // as a
//e.g.: shuffle(a, b, 0b01) => [a1, b0], shuffle(a, b, 0b10) => [a0, b1]
const float64x2 b_ = _mm_shuffle_pd(b, b, 0b01);// as b
const float64x2 d = _mm_mul_pd(a1, b_); // as a
return _mm_addsub_pd(c, d);
}
// from standard complex type in C99
static inline float64x2 c2m(const double complex z) {
// set_pd(e1, e0) => [e0, e1]
return _mm_set_pd(cimag(z), creal(z));
}
static inline double complex m2c(const float64x2 m) {
double mem[2];
_mm_store_pd(mem, m);
return mem[0] + mem[1]*I;
}
static inline float64x2 d2m(const double re, const double im) {
return _mm_set_pd(im, re);
}
// mandelbrot
static int mandel(const float64x2 c) {
float64x2 z = c;
for (int i = 0; i < 50; i++) {
if (cnorm(z) > 4.0) return i;
z = cadd(cmul(z, z), c);
}
return -1;
}
static char plot(int n) {
if (n == -1) return '@';
if (n > 10) return '*';
if (n > 5) return '+';
if (n > 3) return '-';
if (n > 2) return '.';
return ' ';
}
extern int main() {
const int w = 80, h = 24;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
const double re = 2.0 * (2.0 * x / w - 1.0), im = (2.0 * y / h - 1.0);
putchar(plot(mandel(d2m(re, im))));
}
putchar('\n');
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment