Skip to content

Instantly share code, notes, and snippets.

@samgiles
Last active August 29, 2015 13:58
Show Gist options
  • Save samgiles/9994055 to your computer and use it in GitHub Desktop.
Save samgiles/9994055 to your computer and use it in GitHub Desktop.
auto-vectorisation example in GCC
void test_vec(double* restrict a, double* restrict b) {
int i;
double *x = __builtin_assume_aligned(a, 16);
double *y = __builtin_assume_aligned(b, 16);
for(i = 0; i < SIZE; i++) {
x[i] = x[i] + y[i];
}
}
-------------------------------------------------------------------------------------------
$ c99 -S -O2 autovec.c -o output.s (GNU Compiler Collection)
test_vec
.LFB22:
.cfi_startproc
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
movsd (%rdi,%rax), %xmm0
addsd (%rsi,%rax), %xmm0
movsd %xmm0, (%rdi,%rax)
addq $8, %rax
cmpq $256, %rax
jne .L3
rep ret
.cfi_endproc
-------------------------------------------------------------------------------------------
$ c99 -S -O3 autovec.c -o output.s (GNU Compiler Collection)
4006d0: 31 c0 xor %eax,%eax
4006d2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
4006d8: 66 0f 28 04 07 movapd (%rdi,%rax,1),%xmm0
4006dd: 66 0f 58 04 06 addpd (%rsi,%rax,1),%xmm0
4006e2: 66 0f 29 04 07 movapd %xmm0,(%rdi,%rax,1)
4006e7: 48 83 c0 10 add $0x10,%rax
4006eb: 48 3d 00 00 08 00 cmp $0x80000,%rax
4006f1: 75 e5 jne 4006d8 <test_vec+0x8>
4006f3: f3 c3 repz retq
4006f5: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1)
@samgiles
Copy link
Author

samgiles commented Apr 5, 2014

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment