Last active
May 6, 2019 18:52
-
-
Save amadio/6395811 to your computer and use it in GitHub Desktop.
Multiplication of quaternions with minimal instructions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <pmmintrin.h> /* SSE3 intrinsics */ | |
/* multiplication of two quaternions (x, y, z, w) x (a, b, c, d) */ | |
__m128 _mm_cross4_ps(__m128 xyzw, __m128 abcd) | |
{ | |
/* the product of two quaternions is: */ | |
/* (X,Y,Z,W) = (xd+yc-zb+wa, -xc+yd+za+wb, xb-ya+zd+wc, -xa-yb-zc+wd) */ | |
__m128 wzyx = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(0,1,2,3)); | |
__m128 baba = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(0,1,0,1)); | |
__m128 dcdc = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(2,3,2,3)); | |
/* variable names below are for parts of componens of result (X,Y,Z,W) */ | |
/* nX stands for -X and similarly for the other components */ | |
/* znxwy = (xb - ya, zb - wa, wd - zc, yd - xc) */ | |
__m128 ZnXWY = _mm_hsub_ps(_mm_mul_ps(xyzw, baba), _mm_mul_ps(wzyx, dcdc)); | |
/* xzynw = (xd + yc, zd + wc, wb + za, yb + xa) */ | |
__m128 XZYnW = _mm_hadd_ps(_mm_mul_ps(xyzw, dcdc), _mm_mul_ps(wzyx, baba)); | |
/* _mm_shuffle_ps(XZYnW, ZnXWY, _MM_SHUFFLE(3,2,1,0)) */ | |
/* = (xd + yc, zd + wc, wd - zc, yd - xc) */ | |
/* _mm_shuffle_ps(ZnXWY, XZYnW, _MM_SHUFFLE(2,3,0,1)) */ | |
/* = (zb - wa, xb - ya, yb + xa, wb + za) */ | |
/* _mm_addsub_ps adds elements 1 and 3 and subtracts elements 0 and 2, so we get: */ | |
/* _mm_addsub_ps(*, *) = (xd+yc-zb+wa, xb-ya+zd+wc, wd-zc+yb+xa, yd-xc+wb+za) */ | |
__m128 XZWY = _mm_addsub_ps(_mm_shuffle_ps(XZYnW, ZnXWY, _MM_SHUFFLE(3,2,1,0)), | |
_mm_shuffle_ps(ZnXWY, XZYnW, _MM_SHUFFLE(2,3,0,1))); | |
/* now we only need to shuffle the components in place and return the result */ | |
return _mm_shuffle_ps(XZWY, XZWY, _MM_SHUFFLE(2,1,3,0)); | |
/* total operations: 6 shuffles, 4 multiplications, 3 compound additions/subtractions */ | |
/* in assembly, this should compile to 18 instructions in total with optimizations */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
helpful