Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Auto vectorizing of ARM NEON integer operation in gcc arm-linux-gnueabihf-gcc -mfpu=neon -O2 -S int_average.c arm-linux-gnueabihf-gcc -mfpu=neon -O3 -S int_average.c
int int_average(int* array, int size)
{
int i;
long long total = 0;
if (size <= 0) {
return 0;
}
for (i = 0; i < size; i++) {
total += array[i];
}
return total / size;
}
.syntax unified
.arch armv7-a
.eabi_attribute 27, 3
.eabi_attribute 28, 1
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "int_average.c"
.global __aeabi_ldivmod
.text
.align 2
.global int_average
.thumb
.thumb_func
.type int_average, %function
int_average:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
push {r3, r4, r5, lr}
subs r3, r1, #0
ble .L4
subs r2, r0, #4
movs r1, #0
movs r0, #0
movs r4, #0
.L3:
ldr r5, [r2, #4]!
adds r4, r4, #1
adds r0, r0, r5
adc r1, r1, r5, asr #31
cmp r4, r3
bne .L3
mov r2, r4
asrs r3, r4, #31
bl __aeabi_ldivmod
pop {r3, r4, r5, pc}
.L4:
movs r0, #0
pop {r3, r4, r5, pc}
.size int_average, .-int_average
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",%progbits
.syntax unified
.arch armv7-a
.eabi_attribute 27, 3
.eabi_attribute 28, 1
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "int_average.c"
.global __aeabi_ldivmod
.text
.align 2
.global int_average
.thumb
.thumb_func
.type int_average, %function
int_average:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
cmp r1, #0
push {r3, r4, r5, r6, r7, lr}
ble .L10
ubfx r2, r0, #2, #2
negs r2, r2
and r2, r2, #3
cmp r2, r1
it cs
movcs r2, r1
cmp r2, #0
beq .L11
subs r3, r0, #4
movs r4, #0
movs r5, #0
movs r6, #0
.L4:
ldr r7, [r3, #4]!
adds r6, r6, #1
adds r4, r4, r7
adc r5, r5, r7, asr #31
cmp r6, r2
bcc .L4
cmp r1, r2
beq .L5
.L3:
rsb lr, r2, r1
lsr ip, lr, #2
lsl r7, ip, #2
cmp ip, #0
beq .L6
vmov.i32 q9, #0 @ v2di
add r2, r0, r2, lsl #2
movs r3, #0
.L7:
vldmia r2!, {d16-d17}
vmovl.s32 q10, d16
adds r3, r3, #1
vmovl.s32 q8, d17
vadd.i64 q9, q10, q9
cmp r3, ip
vadd.i64 q9, q8, q9
bcc .L7
vadd.i64 d18, d18, d19
adds r6, r6, r7
vmov r2, r3, d18 @ v2di
adds r4, r4, r2
adc r5, r5, r3
cmp lr, r7
beq .L5
.L6:
add r0, r0, r6, lsl #2
.L9:
ldr r3, [r0], #4
adds r6, r6, #1
adds r4, r4, r3
adc r5, r5, r3, asr #31
cmp r1, r6
bgt .L9
.L5:
mov r2, r1
mov r0, r4
mov r1, r5
asrs r3, r2, #31
bl __aeabi_ldivmod
pop {r3, r4, r5, r6, r7, pc}
.L10:
movs r0, #0
pop {r3, r4, r5, r6, r7, pc}
.L11:
movs r4, #0
movs r5, #0
mov r6, r2
b .L3
.size int_average, .-int_average
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",%progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.