Created
June 27, 2013 04:19
-
-
Save tetsu-koba/5873910 to your computer and use it in GitHub Desktop.
Auto vectorizing of ARM NEON integer operation in gcc
arm-linux-gnueabihf-gcc -mfpu=neon -O2 -S int_average.c arm-linux-gnueabihf-gcc -mfpu=neon -O3 -S int_average.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int int_average(int* array, int size) | |
{ | |
int i; | |
long long total = 0; | |
if (size <= 0) { | |
return 0; | |
} | |
for (i = 0; i < size; i++) { | |
total += array[i]; | |
} | |
return total / size; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.syntax unified | |
.arch armv7-a | |
.eabi_attribute 27, 3 | |
.eabi_attribute 28, 1 | |
.fpu neon | |
.eabi_attribute 20, 1 | |
.eabi_attribute 21, 1 | |
.eabi_attribute 23, 3 | |
.eabi_attribute 24, 1 | |
.eabi_attribute 25, 1 | |
.eabi_attribute 26, 2 | |
.eabi_attribute 30, 2 | |
.eabi_attribute 34, 1 | |
.eabi_attribute 18, 4 | |
.thumb | |
.file "int_average.c" | |
.global __aeabi_ldivmod | |
.text | |
.align 2 | |
.global int_average | |
.thumb | |
.thumb_func | |
.type int_average, %function | |
int_average: | |
@ args = 0, pretend = 0, frame = 0 | |
@ frame_needed = 0, uses_anonymous_args = 0 | |
push {r3, r4, r5, lr} | |
subs r3, r1, #0 | |
ble .L4 | |
subs r2, r0, #4 | |
movs r1, #0 | |
movs r0, #0 | |
movs r4, #0 | |
.L3: | |
ldr r5, [r2, #4]! | |
adds r4, r4, #1 | |
adds r0, r0, r5 | |
adc r1, r1, r5, asr #31 | |
cmp r4, r3 | |
bne .L3 | |
mov r2, r4 | |
asrs r3, r4, #31 | |
bl __aeabi_ldivmod | |
pop {r3, r4, r5, pc} | |
.L4: | |
movs r0, #0 | |
pop {r3, r4, r5, pc} | |
.size int_average, .-int_average | |
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" | |
.section .note.GNU-stack,"",%progbits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.syntax unified | |
.arch armv7-a | |
.eabi_attribute 27, 3 | |
.eabi_attribute 28, 1 | |
.fpu neon | |
.eabi_attribute 20, 1 | |
.eabi_attribute 21, 1 | |
.eabi_attribute 23, 3 | |
.eabi_attribute 24, 1 | |
.eabi_attribute 25, 1 | |
.eabi_attribute 26, 2 | |
.eabi_attribute 30, 2 | |
.eabi_attribute 34, 1 | |
.eabi_attribute 18, 4 | |
.thumb | |
.file "int_average.c" | |
.global __aeabi_ldivmod | |
.text | |
.align 2 | |
.global int_average | |
.thumb | |
.thumb_func | |
.type int_average, %function | |
int_average: | |
@ args = 0, pretend = 0, frame = 0 | |
@ frame_needed = 0, uses_anonymous_args = 0 | |
cmp r1, #0 | |
push {r3, r4, r5, r6, r7, lr} | |
ble .L10 | |
ubfx r2, r0, #2, #2 | |
negs r2, r2 | |
and r2, r2, #3 | |
cmp r2, r1 | |
it cs | |
movcs r2, r1 | |
cmp r2, #0 | |
beq .L11 | |
subs r3, r0, #4 | |
movs r4, #0 | |
movs r5, #0 | |
movs r6, #0 | |
.L4: | |
ldr r7, [r3, #4]! | |
adds r6, r6, #1 | |
adds r4, r4, r7 | |
adc r5, r5, r7, asr #31 | |
cmp r6, r2 | |
bcc .L4 | |
cmp r1, r2 | |
beq .L5 | |
.L3: | |
rsb lr, r2, r1 | |
lsr ip, lr, #2 | |
lsl r7, ip, #2 | |
cmp ip, #0 | |
beq .L6 | |
vmov.i32 q9, #0 @ v2di | |
add r2, r0, r2, lsl #2 | |
movs r3, #0 | |
.L7: | |
vldmia r2!, {d16-d17} | |
vmovl.s32 q10, d16 | |
adds r3, r3, #1 | |
vmovl.s32 q8, d17 | |
vadd.i64 q9, q10, q9 | |
cmp r3, ip | |
vadd.i64 q9, q8, q9 | |
bcc .L7 | |
vadd.i64 d18, d18, d19 | |
adds r6, r6, r7 | |
vmov r2, r3, d18 @ v2di | |
adds r4, r4, r2 | |
adc r5, r5, r3 | |
cmp lr, r7 | |
beq .L5 | |
.L6: | |
add r0, r0, r6, lsl #2 | |
.L9: | |
ldr r3, [r0], #4 | |
adds r6, r6, #1 | |
adds r4, r4, r3 | |
adc r5, r5, r3, asr #31 | |
cmp r1, r6 | |
bgt .L9 | |
.L5: | |
mov r2, r1 | |
mov r0, r4 | |
mov r1, r5 | |
asrs r3, r2, #31 | |
bl __aeabi_ldivmod | |
pop {r3, r4, r5, r6, r7, pc} | |
.L10: | |
movs r0, #0 | |
pop {r3, r4, r5, r6, r7, pc} | |
.L11: | |
movs r4, #0 | |
movs r5, #0 | |
mov r6, r2 | |
b .L3 | |
.size int_average, .-int_average | |
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" | |
.section .note.GNU-stack,"",%progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment