Skip to content

Instantly share code, notes, and snippets.

@jar jar/dot_product.S
Created Jul 26, 2016

Embed
What would you like to do?
An optimized single precision dot product routine for the Epiphany architecture
/* // Optimized Dot Product routine follows this protoype
float dot_product(const float* a, const float* b, int nd8m1) {
int i;
float c = 0.0f;
int n = (nd8m1+1)*8;
for (i=0; i<n; i++) {
c += a[i] * b[i];
}
return c;
} */
.global _dot_product
_dot_product:
movts.l lc, r2
mov r2, %low(.Lstart)
movts ls, r2
mov r2, %low(.Lend-4)
movts le, r2
.balignw 8,0x01a2
mov r24, #0
mov r25, #0
mov r26, #0
mov r27, #0
ldrd r48, [r0], #1
fsub r44, r24, r24
ldrd r50, [r1], #1
fsub r45, r24, r24
ldrd r52, [r0], #1
fsub r46, r24, r24
ldrd r54, [r1], #1
fsub r47, r24, r24
ldrd r56, [r0], #1
fmadd r24, r48, r50
ldrd r58, [r1], #1
fmadd r25, r49, r51
ldrd r60, [r0], #1
fmadd r26, r52, r54
ldrd r62, [r1], #1
fmadd r27, r53, r55
.Lstart:
ldrd r48, [r0], #1
fmadd r44, r56, r58
ldrd r50, [r1], #1
fmadd r45, r57, r59
ldrd r52, [r0], #1
fmadd r46, r60, r62
ldrd r54, [r1], #1
fmadd r47, r61, r63
ldrd r56, [r0], #1
fmadd r24, r48, r50
ldrd r58, [r1], #1
fmadd r25, r49, r51
ldrd r60, [r0], #1
fmadd r26, r52, r54
ldrd r62, [r1], #1
fmadd r27, r53, r55
.Lend:
fmadd r44, r56, r58
fmadd r45, r57, r59
fmadd r46, r60, r62
fmadd r47, r61, r63
fadd r24, r24, r25
fadd r26, r26, r27
fadd r44, r44, r45
fadd r46, r46, r47
fadd r24, r24, r26
fadd r44, r44, r46
fadd r0, r24, r44
rts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.