Created
July 26, 2016 19:35
-
-
Save jar/c6166366295fa124fdd14a37628f90d5 to your computer and use it in GitHub Desktop.
An optimized single precision dot product routine for the Epiphany architecture
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* // Optimized Dot Product routine follows this protoype | |
float dot_product(const float* a, const float* b, int nd8m1) { | |
int i; | |
float c = 0.0f; | |
int n = (nd8m1+1)*8; | |
for (i=0; i<n; i++) { | |
c += a[i] * b[i]; | |
} | |
return c; | |
} */ | |
.global _dot_product | |
_dot_product: | |
movts.l lc, r2 | |
mov r2, %low(.Lstart) | |
movts ls, r2 | |
mov r2, %low(.Lend-4) | |
movts le, r2 | |
.balignw 8,0x01a2 | |
mov r24, #0 | |
mov r25, #0 | |
mov r26, #0 | |
mov r27, #0 | |
ldrd r48, [r0], #1 | |
fsub r44, r24, r24 | |
ldrd r50, [r1], #1 | |
fsub r45, r24, r24 | |
ldrd r52, [r0], #1 | |
fsub r46, r24, r24 | |
ldrd r54, [r1], #1 | |
fsub r47, r24, r24 | |
ldrd r56, [r0], #1 | |
fmadd r24, r48, r50 | |
ldrd r58, [r1], #1 | |
fmadd r25, r49, r51 | |
ldrd r60, [r0], #1 | |
fmadd r26, r52, r54 | |
ldrd r62, [r1], #1 | |
fmadd r27, r53, r55 | |
.Lstart: | |
ldrd r48, [r0], #1 | |
fmadd r44, r56, r58 | |
ldrd r50, [r1], #1 | |
fmadd r45, r57, r59 | |
ldrd r52, [r0], #1 | |
fmadd r46, r60, r62 | |
ldrd r54, [r1], #1 | |
fmadd r47, r61, r63 | |
ldrd r56, [r0], #1 | |
fmadd r24, r48, r50 | |
ldrd r58, [r1], #1 | |
fmadd r25, r49, r51 | |
ldrd r60, [r0], #1 | |
fmadd r26, r52, r54 | |
ldrd r62, [r1], #1 | |
fmadd r27, r53, r55 | |
.Lend: | |
fmadd r44, r56, r58 | |
fmadd r45, r57, r59 | |
fmadd r46, r60, r62 | |
fmadd r47, r61, r63 | |
fadd r24, r24, r25 | |
fadd r26, r26, r27 | |
fadd r44, r44, r45 | |
fadd r46, r46, r47 | |
fadd r24, r24, r26 | |
fadd r44, r44, r46 | |
fadd r0, r24, r44 | |
rts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment