Skip to content

Instantly share code, notes, and snippets.

@engie
Created April 21, 2012 19:59
Show Gist options
  • Save engie/2439337 to your computer and use it in GitHub Desktop.
Save engie/2439337 to your computer and use it in GitHub Desktop.
Hand written SSE matrix * vector product
const double* b_p = &(B.data()[0]);
double* w_p = &(W.data()[0]);
for( uint32 i = 0; i < rows; i++ ) //For each row
{
double* w_p_row = w_p;
double dp = 0;
/* Loop over 400 elements
* Sum into sum
* Leave the pointers munged
*/
asm (
"xorpd %%xmm1, %%xmm1\n\t" //Clear xmm1 (will sum up into xmm1)
"mov $200, %%ecx\n\t" //Set ecx to 200
"jmp bottom\n\t" //Go to the comparison at the bottom
"top:\n\t"
//Centre of the loop
"movapd (%[va]), %%xmm0\n\t" //Move 2 doubles from b_p to xmm0
"mulpd (%[vb]), %%xmm0\n\t" //Multiple 2 doubles in xmm0 by 2 from w_p_row
"addpd %%xmm0, %%xmm1\n\t" //Add the doubles to the accumulator in xmm1
"lea 16( %[va] ), %[va]\n\t" //Increment the pointers
"lea 16( %[vb] ), %[vb]\n\t"
"dec %%ecx\n\t" //Decrement ecx
"bottom:\n\t"
"cmp $0, %%ecx\n\t" //Jump back if necessary
"jne top\n\t"
//Sum now in xmm1
"movapd %%xmm1, %%xmm0\n\t" //Copy athe accumulator into xmm0
"shufpd $1, %%xmm0, %%xmm0\n\t" //Move the top of xmm0 into the bottom of xmm0
"addpd %%xmm0, %%xmm1\n\t" //Add the bottoms of xmm1 and xmm0
"movq %%xmm1, %[sum]\n\t" //Move that total out to a normal register
: [sum] "=r" (dp), [va] "+r" (b_p), [vb] "+r" (w_p_row)
:
: "ecx", "xmm0", "xmm1" //list of clobbered registers
);
myX[i] = dp;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment