Skip to content

Instantly share code, notes, and snippets.

@bzoli22
Created January 14, 2009 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bzoli22/47092 to your computer and use it in GitHub Desktop.
Save bzoli22/47092 to your computer and use it in GitHub Desktop.
automatic vectorization depending on alignment of the input
_add:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
movl 8(%ebp), %ebx
movl 12(%ebp), %ecx
movl 16(%ebp), %edx
leal 16(%ebx), %esi
testb $15, %bl
je L13
L2:
xorl %eax, %eax
.p2align 2,,3
L7:
movss (%ecx,%eax,4), %xmm0
addss (%edx,%eax,4), %xmm0
movss %xmm0, (%ebx,%eax,4)
incl %eax
cmpl $1024, %eax
jne L7
popl %ebx
popl %esi
leave
ret
L13:
leal 16(%ecx), %eax
cmpl %eax, %ebx
jbe L14
leal 16(%edx), %eax
cmpl %eax, %ebx
jbe L15
L9:
xorl %eax, %eax
.p2align 2,,3
L5:
movlps (%ecx,%eax), %xmm1
movhps 8(%ecx,%eax), %xmm1
movlps (%edx,%eax), %xmm0
movhps 8(%edx,%eax), %xmm0
addps %xmm0, %xmm1
movaps %xmm1, (%ebx,%eax)
addl $16, %eax
cmpl $4096, %eax
jne L5
popl %ebx
popl %esi
leave
ret
L14:
cmpl %ecx, %esi
jae L2
leal 16(%edx), %eax
cmpl %eax, %ebx
ja L9
L15:
cmpl %esi, %edx
jbe L2
jmp L9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment