Skip to content

Instantly share code, notes, and snippets.

@unnonouno
Last active August 29, 2015 14:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unnonouno/f92dac1959599db28620 to your computer and use it in GitHub Desktop.
Save unnonouno/f92dac1959599db28620 to your computer and use it in GitHub Desktop.
sse inner product
vmovss (%rax), %xmm1
vmulss (%rdx), %xmm1, %xmm1
vaddss %xmm1, %xmm0, %xmm0
addq $4, %rdx
addq $4, %rax
decq %rcx
jne LBB0_2
float dot(const vector<float>& x,
const vector<float>& y) {
float s = 0;
for (size_t i = 0, size = x.size(); i < size; ++i) {
s += x[i] * y[i];
}
return s;
}
float dot_sse(const vector<float>& x,
const vector<float>& y) {
float s1 = 0, s2 = 0, s3 = 0, s4 = 0;
for (size_t i = 0, size = x.size(); i < size; i += 4) {
s1 += x[i] * y[i];
s2 += x[i + 1] * y[i + 1];
s3 += x[i + 2] * y[i + 2];
s4 += x[i + 3] * y[i + 3];
}
return s1 + s2 + s3 + s4;
}
vmovss (%rax,%rsi,4), %xmm1
vinsertps $16, 12(%rax,%rsi,4), %xmm1, %xmm1
vinsertps $32, 8(%rax,%rsi,4), %xmm1, %xmm1
vinsertps $48, 4(%rax,%rsi,4), %xmm1, %xmm1
vmovss (%rdx,%rsi,4), %xmm2
vinsertps $16, 12(%rdx,%rsi,4), %xmm2, %xmm2
vinsertps $32, 8(%rdx,%rsi,4), %xmm2, %xmm2
vinsertps $48, 4(%rdx,%rsi,4), %xmm2, %xmm2
vmulps %xmm2, %xmm1, %xmm1
vaddps %xmm1, %xmm0, %xmm0
addq $4, %rsi
cmpq %rcx, %rsi
jb LBB2_2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment