bzoli22 (owner)

Revisions

gist: 47092 Download_button fork
public
Description:
automatic vectorization depending on alignment of the input
Public Clone URL: git://gist.github.com/47092.git
Embed All Files: show embed
vectorization-4.s #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
_add:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
movl 8(%ebp), %ebx
movl 12(%ebp), %ecx
movl 16(%ebp), %edx
leal 16(%ebx), %esi
testb $15, %bl
je L13
L2:
xorl %eax, %eax
.p2align 2,,3
L7:
movss (%ecx,%eax,4), %xmm0
addss (%edx,%eax,4), %xmm0
movss %xmm0, (%ebx,%eax,4)
incl %eax
cmpl $1024, %eax
jne L7
popl %ebx
popl %esi
leave
ret
L13:
leal 16(%ecx), %eax
cmpl %eax, %ebx
jbe L14
leal 16(%edx), %eax
cmpl %eax, %ebx
jbe L15
L9:
xorl %eax, %eax
.p2align 2,,3
L5:
movlps (%ecx,%eax), %xmm1
movhps 8(%ecx,%eax), %xmm1
movlps (%edx,%eax), %xmm0
movhps 8(%edx,%eax), %xmm0
addps %xmm0, %xmm1
movaps %xmm1, (%ebx,%eax)
addl $16, %eax
cmpl $4096, %eax
jne L5
popl %ebx
popl %esi
leave
ret
L14:
cmpl %ecx, %esi
jae L2
leal 16(%edx), %eax
cmpl %eax, %ebx
ja L9
L15:
cmpl %esi, %edx
jbe L2
jmp L9