bellbind/avxvec.c

## avxvec.c
// clang -std=c11 -mavx avxvec.c -o avxvec
#include <x86intrin.h>
#include <stdio.h>

// see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX
int main() {
  double mem[4] = {1.0, 2.0, 3.0, 4.0};
  //__m256d v = _mm256_load_pd(mem);
  __m256d v = _mm256_set_pd(1, 2, 3, 4); // (e3, e2, e1, e1)
  __m256d sq = _mm256_mul_pd(v, v);
  _mm256_store_pd(mem, sq);

  printf("%f, %f , %f, %f\n", mem[0], mem[1], mem[2], mem[3]);
  return 0;
}

## mandel.c
// clang -std=c11 -msse3 mandel.c -o mandel
#include <stdio.h>
#include <complex.h>
#include <x86intrin.h>

// basic arith for complex functions with SIMD
typedef __m128d float64x2;

static inline double cnorm(const float64x2 z) {
  const float64x2 sq = _mm_mul_pd(z, z);
  const float64x2 norm = _mm_hadd_pd(sq, sq);
  double mem[2];
  _mm_store_pd(mem, norm);
  return mem[0];
}
static inline float64x2 cadd(const float64x2 a, const float64x2 b) {
  return _mm_add_pd(a, b);
}
static inline float64x2 cmul(const float64x2 a, const float64x2 b) {
  // r = [a0*b0 - a1*b1, a0*b1 + a1*b0]
  // r = addsub(c=[a0*b0, a0*b1], d=[a1*b1, a1*b0])
  // c = [a0, a0] * [b0, b1], d = [a1, a1] * [b1, b0]
  //return _mm_addsub_pd(_mm_mul_pd(_mm_unpacklo_pd(a, a), b),
  //                     _mm_mul_pd(_mm_unpackhi_pd(a, a),
  //                                _mm_shuffle_pd(b, b, 0b01)));
  const float64x2 a0 = _mm_unpacklo_pd(a, a);
  const float64x2 c = _mm_mul_pd(a0, b); // as a0
  const float64x2 a1 = _mm_unpackhi_pd(a, a); // as a
  //e.g.: shuffle(a, b, 0b01) => [a1, b0], shuffle(a, b, 0b10) => [a0, b1]
  const float64x2 b_ = _mm_shuffle_pd(b, b, 0b01);// as b
  const float64x2 d = _mm_mul_pd(a1, b_); // as a
  return _mm_addsub_pd(c, d);
}

// from standard complex type in C99
static inline float64x2 c2m(const double complex z) {
  // set_pd(e1, e0) => [e0, e1]
  return _mm_set_pd(cimag(z), creal(z));
}
static inline double complex m2c(const float64x2 m) {
  double mem[2];
  _mm_store_pd(mem, m);
  return mem[0] + mem[1]*I;
}
static inline float64x2 d2m(const double re, const double im) {
  return _mm_set_pd(im, re);
}

// mandelbrot
static int mandel(const float64x2 c) {
  float64x2 z = c;
  for (int i = 0; i < 50; i++) {
    if (cnorm(z) > 4.0) return i;
    z = cadd(cmul(z, z), c);
  }
  return -1;
}


static char plot(int n) {
  if (n == -1) return '@';
  if (n > 10) return '*';
  if (n > 5) return '+';
  if (n > 3) return '-';
  if (n > 2) return '.';
  return ' ';
}

extern int main() {
  const int w = 80, h = 24;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      const double re = 2.0 * (2.0 * x / w - 1.0), im = (2.0 * y / h - 1.0);
      putchar(plot(mandel(d2m(re, im))));
    }
    putchar('\n');
  }
  return 0;
}
	// clang -std=c11 -mavx avxvec.c -o avxvec
	#include <x86intrin.h>
	#include <stdio.h>

	// see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX
	int main() {
	double mem[4] = {1.0, 2.0, 3.0, 4.0};
	//__m256d v = _mm256_load_pd(mem);
	__m256d v = _mm256_set_pd(1, 2, 3, 4); // (e3, e2, e1, e1)
	__m256d sq = _mm256_mul_pd(v, v);
	_mm256_store_pd(mem, sq);

	printf("%f, %f , %f, %f\n", mem[0], mem[1], mem[2], mem[3]);
	return 0;
	}
	// clang -std=c11 -msse3 mandel.c -o mandel
	#include <stdio.h>
	#include <complex.h>
	#include <x86intrin.h>

	// basic arith for complex functions with SIMD
	typedef __m128d float64x2;

	static inline double cnorm(const float64x2 z) {
	const float64x2 sq = _mm_mul_pd(z, z);
	const float64x2 norm = _mm_hadd_pd(sq, sq);
	double mem[2];
	_mm_store_pd(mem, norm);
	return mem[0];
	}
	static inline float64x2 cadd(const float64x2 a, const float64x2 b) {
	return _mm_add_pd(a, b);
	}
	static inline float64x2 cmul(const float64x2 a, const float64x2 b) {
	// r = [a0b0 - a1b1, a0b1 + a1b0]
	// r = addsub(c=[a0b0, a0b1], d=[a1b1, a1b0])
	// c = [a0, a0] * [b0, b1], d = [a1, a1] * [b1, b0]
	//return _mm_addsub_pd(_mm_mul_pd(_mm_unpacklo_pd(a, a), b),
	// _mm_mul_pd(_mm_unpackhi_pd(a, a),
	// _mm_shuffle_pd(b, b, 0b01)));
	const float64x2 a0 = _mm_unpacklo_pd(a, a);
	const float64x2 c = _mm_mul_pd(a0, b); // as a0
	const float64x2 a1 = _mm_unpackhi_pd(a, a); // as a
	//e.g.: shuffle(a, b, 0b01) => [a1, b0], shuffle(a, b, 0b10) => [a0, b1]
	const float64x2 b_ = _mm_shuffle_pd(b, b, 0b01);// as b
	const float64x2 d = _mm_mul_pd(a1, b_); // as a
	return _mm_addsub_pd(c, d);
	}

	// from standard complex type in C99
	static inline float64x2 c2m(const double complex z) {
	// set_pd(e1, e0) => [e0, e1]
	return _mm_set_pd(cimag(z), creal(z));
	}
	static inline double complex m2c(const float64x2 m) {
	double mem[2];
	_mm_store_pd(mem, m);
	return mem[0] + mem[1]*I;
	}
	static inline float64x2 d2m(const double re, const double im) {
	return _mm_set_pd(im, re);
	}

	// mandelbrot
	static int mandel(const float64x2 c) {
	float64x2 z = c;
	for (int i = 0; i < 50; i++) {
	if (cnorm(z) > 4.0) return i;
	z = cadd(cmul(z, z), c);
	}
	return -1;
	}


	static char plot(int n) {
	if (n == -1) return '@';
	if (n > 10) return '*';
	if (n > 5) return '+';
	if (n > 3) return '-';
	if (n > 2) return '.';
	return ' ';
	}

	extern int main() {
	const int w = 80, h = 24;
	for (int y = 0; y < h; y++) {
	for (int x = 0; x < w; x++) {
	const double re = 2.0 * (2.0 * x / w - 1.0), im = (2.0 * y / h - 1.0);
	putchar(plot(mandel(d2m(re, im))));
	}
	putchar('\n');
	}
	return 0;
	}