Skip to content

Instantly share code, notes, and snippets.

@ssvb
Created January 19, 2016 14:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ssvb/0151f6a64ab1cfcc5377 to your computer and use it in GitHub Desktop.
Save ssvb/0151f6a64ab1cfcc5377 to your computer and use it in GitHub Desktop.
/* Usage: gcc -DCPU_CLOCK_FREQUENCY=1008000000 arm64-neon-throughput-test.S && time ./a.out
*/
.cpu cortex-a53+fp+simd
.file "1.c"
.text
.align 2
.global main
.type main, %function
main:
#ifndef CPU_CLOCK_FREQUENCY
#error CPU_CLOCK_FREQUENCY must be defined
#endif
#define LOOP_UNROLL_FACTOR 30
sub sp, sp, #128
add x2, x2, #16
bic x2, x2, #15
add x3, x2, #64
ldr x1, =(CPU_CLOCK_FREQUENCY / LOOP_UNROLL_FACTOR)
b 1f
.balign 64
1:
.rept LOOP_UNROLL_FACTOR
add v0.16b, v0.16b, v0.16b
add v1.16b, v1.16b, v1.16b
add v2.16b, v2.16b, v2.16b
add v3.16b, v3.16b, v3.16b
add v4.16b, v4.16b, v4.16b
add v5.16b, v5.16b, v5.16b
add v6.16b, v6.16b, v6.16b
add v7.16b, v7.16b, v7.16b
.endr
subs x1, x1, #1
bne 1b
add sp, sp, #128
mov w0, 0
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment