Created
January 19, 2016 14:43
-
-
Save ssvb/0151f6a64ab1cfcc5377 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Usage: gcc -DCPU_CLOCK_FREQUENCY=1008000000 arm64-neon-throughput-test.S && time ./a.out | |
*/ | |
.cpu cortex-a53+fp+simd | |
.file "1.c" | |
.text | |
.align 2 | |
.global main | |
.type main, %function | |
main: | |
#ifndef CPU_CLOCK_FREQUENCY | |
#error CPU_CLOCK_FREQUENCY must be defined | |
#endif | |
#define LOOP_UNROLL_FACTOR 30 | |
sub sp, sp, #128 | |
add x2, x2, #16 | |
bic x2, x2, #15 | |
add x3, x2, #64 | |
ldr x1, =(CPU_CLOCK_FREQUENCY / LOOP_UNROLL_FACTOR) | |
b 1f | |
.balign 64 | |
1: | |
.rept LOOP_UNROLL_FACTOR | |
add v0.16b, v0.16b, v0.16b | |
add v1.16b, v1.16b, v1.16b | |
add v2.16b, v2.16b, v2.16b | |
add v3.16b, v3.16b, v3.16b | |
add v4.16b, v4.16b, v4.16b | |
add v5.16b, v5.16b, v5.16b | |
add v6.16b, v6.16b, v6.16b | |
add v7.16b, v7.16b, v7.16b | |
.endr | |
subs x1, x1, #1 | |
bne 1b | |
add sp, sp, #128 | |
mov w0, 0 | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment