Skip to content

Instantly share code, notes, and snippets.

@ess7
Created October 16, 2018 04:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ess7/81b69e48ec55752d8497cae642e4ec12 to your computer and use it in GitHub Desktop.
Save ess7/81b69e48ec55752d8497cae642e4ec12 to your computer and use it in GitHub Desktop.
JSFX extension: dot product
// params: y0, y1, interleaved x, coeff, n
static EEL_F NSEEL_CGEN_CALL dotprod2(void *opaque, INT_PTR np, EEL_F **parms) {
EEL_F **blocks = (EEL_F **)opaque;
int xofs = *parms[2];
int cofs = *parms[3];
int n = *parms[4];
*parms[0] = 0.0;
*parms[1] = 0.0;
if (unlikely(n <= 0 || xofs < 0 || cofs < 0 ||
xofs >= NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK || cofs >= NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK ||
(xofs&(NSEEL_RAM_ITEMSPERBLOCK-1)) + 2*n > NSEEL_RAM_ITEMSPERBLOCK ||
(cofs&(NSEEL_RAM_ITEMSPERBLOCK-1)) + n > NSEEL_RAM_ITEMSPERBLOCK)) {
return 0.0;
}
EEL_F *xptr =__NSEEL_RAMAlloc(blocks, xofs);
if (unlikely(!xptr || xptr == nseel_ramalloc_onfail)) {
return 0.0;
}
EEL_F *cptr =__NSEEL_RAMAlloc(blocks, cofs);
if (unlikely(!cptr || cptr == nseel_ramalloc_onfail)) {
return 0.0;
}
const int remaining = n % 4;
n -= remaining;
__m128d y1 = _mm_setzero_pd();
__m128d y2 = _mm_setzero_pd();
__m128d y3 = _mm_setzero_pd();
__m128d y4 = _mm_setzero_pd();
while (n > 0) {
y1 = _mm_add_pd(y1, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
y2 = _mm_add_pd(y2, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
y3 = _mm_add_pd(y3, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
y4 = _mm_add_pd(y4, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
n -= 4;
}
switch (remaining) {
case 3:
y1 = _mm_add_pd(y1, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
case 2:
y2 = _mm_add_pd(y2, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr++)));
xptr += 2;
case 1:
y3 = _mm_add_pd(y3, _mm_mul_pd(_mm_loadu_pd(xptr), _mm_load1_pd(cptr)));
}
EEL_F y[2];
_mm_storeu_pd(y, _mm_add_pd(_mm_add_pd(y1, y2), _mm_add_pd(y3, y4)));
*parms[0] = y[0];
*parms[1] = y[1];
return 0.0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment