garrettsickles/avx2_optimized_cross_product.h

## avx2_optimized_cross_product.h
// --------------------------------------------------------------- //
// Need To Know
//    _MSC_VER: Microsoft C/C++ Compiler
//    __AVX2__: AVX2 Instruction Set Flag
//    __FMA__: Fused Multiply Add Flag
// --------------------------------------------------------------- //

// On Windows, __AVX2__ is defined but __FMA__ so define it
#if defined(_MSC_VER) && defined(__AVX2__) && !defined(__FMA__)
	#define __FMA__
#endif

// AVX2 and FMA instruction set
#if defined(__AVX2__) && defined(__FMA__)

	// Definition of AVX2/FMA intrinsics
	#include <immintrin.h>;

	// --------------------------------------------------------------- //
	// a x b: Cross product of vector a with vector b                  //
	// Notes:                                                          //
	//    - Vectors a and b should be of type __m256d                  //
	// Summary:                                                        //
	//      This macro consists of a one-liner to compute the cross    //
	//    product of vectors a and b by shuffling the elements in each //
	//    vector and multiplying it with the other. It then subtracts  //
	//    the two resultant vectors and shuffles the difference into   //
	//    into the returned vector.                                    //
	// --------------------------------------------------------------- //
	#define CROSS_PRODUCT(a,b) _mm256_permute4x64_pd(\
		_mm256_sub_pd(\
			_mm256_mul_pd(a, _mm256_permute4x64_pd(b, _MM_SHUFFLE(3, 0, 2, 1))),\
			_mm256_mul_pd(b, _mm256_permute4x64_pd(a, _MM_SHUFFLE(3, 0, 2, 1)))\
		), _MM_SHUFFLE(3, 0, 2, 1)\
	)
#endif
	// --------------------------------------------------------------- //
	// Need To Know
	// _MSC_VER: Microsoft C/C++ Compiler
	// __AVX2__: AVX2 Instruction Set Flag
	// __FMA__: Fused Multiply Add Flag
	// --------------------------------------------------------------- //

	// On Windows, __AVX2__ is defined but __FMA__ so define it
	#if defined(_MSC_VER) && defined(__AVX2__) && !defined(__FMA__)
	#define __FMA__
	#endif

	// AVX2 and FMA instruction set
	#if defined(__AVX2__) && defined(__FMA__)

	// Definition of AVX2/FMA intrinsics
	#include <immintrin.h>;

	// --------------------------------------------------------------- //
	// a x b: Cross product of vector a with vector b //
	// Notes: //
	// - Vectors a and b should be of type __m256d //
	// Summary: //
	// This macro consists of a one-liner to compute the cross //
	// product of vectors a and b by shuffling the elements in each //
	// vector and multiplying it with the other. It then subtracts //
	// the two resultant vectors and shuffles the difference into //
	// into the returned vector. //
	// --------------------------------------------------------------- //
	#define CROSS_PRODUCT(a,b) _mm256_permute4x64_pd(\
	_mm256_sub_pd(\
	_mm256_mul_pd(a, _mm256_permute4x64_pd(b, _MM_SHUFFLE(3, 0, 2, 1))),\
	_mm256_mul_pd(b, _mm256_permute4x64_pd(a, _MM_SHUFFLE(3, 0, 2, 1)))\
	), _MM_SHUFFLE(3, 0, 2, 1)\
	)
	#endif