Example snippets for ISO C aliasing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
CC=arm-linux-gnueabihf-gcc | |
DUMP="arm-linux-gnueabihf-objdump -M reg-names-gcc" | |
BUILDCMD="$CC ${@:1}" | |
while [[ $# -gt 1 ]] | |
do | |
key="$1" | |
case $key in | |
-o|--searchpath) | |
OBJECT="$2" | |
shift | |
;; | |
*) | |
;; | |
esac | |
shift | |
done | |
OUTFILE="$(basename -s .o $OBJECT).s" | |
GCCVERSION="$($CC --version | grep gcc)" | |
echo -e "# Object built with: \n# $GCCVERSION\n# \$ $BUILDCMD\n" > $OUTFILE | |
$BUILDCMD && $DUMP -d -S $OBJECT >> $OUTFILE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
BUILDCMD="gcc ${@:1}" | |
while [[ $# -gt 1 ]] | |
do | |
key="$1" | |
case $key in | |
-o|--searchpath) | |
OBJECT="$2" | |
shift | |
;; | |
*) | |
;; | |
esac | |
shift | |
done | |
OUTFILE="$(basename -s .o $OBJECT).s" | |
GCCVERSION="$(gcc --version | grep gcc)" | |
echo -e "# Object built with: \n# $GCCVERSION\n# \$ $BUILDCMD\n" > $OUTFILE | |
$BUILDCMD && objdump -d -M intel -S $OBJECT >> $OUTFILE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void bar(int * restrict p, int * restrict q, int n) { | |
while (n-- > 0) | |
*p++ = *q++; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void callbar(void) { | |
extern int d[100]; | |
bar(50, d + 50, d); // Valid! | |
bar(50, d + 1 , d); // Undefined behaviour! | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void threebar(int * restrict p, int * restrict q, int * restrict r, int n) { | |
for (int i=0; i<n; ++i) | |
p[i] = q[i] + r[i]; | |
} | |
void callthreebar(int n) { | |
int a[n]; | |
int b[n]; | |
threebar(n, a, b, b); // Valid! | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
int * restrict foo; | |
int * restrict bar; | |
foo = bar; // undefined behaviour | |
{ | |
int * restrict foo_inner = foo; // valid | |
int * restrict bar_inner = bar; // valid | |
foo = bar_inner; // undefined behaviour | |
bar_inner = foo_inner; // undefined behaviour | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ...same as above... | |
float* restrict acceleration_x = &acceleration->x; | |
float* restrict velocity_x = &velocity->x; | |
float* restrict position_x = &position->x; | |
float* restrict acceleration_y = &acceleration->y; | |
float* restrict velocity_y = &velocity->y; | |
float* restrict position_y = &position->y; | |
float* restrict acceleration_z = &acceleration->z; | |
float* restrict velocity_z = &velocity->z; | |
float* restrict position_z = &position->z; | |
for (size_t i=0; i<count*stride; i+=stride) { | |
// ...same as above... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ arm-linux-gnueabihf-gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example-move-01.arm.o example-move-01.c | |
example-move-01.arm.o: file format elf32-littlearm | |
Disassembly of section .text: | |
00000000 <move>: | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
0: b3a3 cbz r3, 6c <move+0x6c> | |
float x, y, z; | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
2: b410 push {r4} | |
4: 240c movs r4, #12 | |
6: fb04 0303 mla r3, r4, r3, r0 | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
a: ed92 7a00 vldr s14, [r2] | |
e: 300c adds r0, #12 | |
10: 320c adds r2, #12 | |
12: 310c adds r1, #12 | |
14: ed50 6a03 vldr s13, [r0, #-12] | |
velocity[i].y += acceleration[i].y * time_step; | |
18: ed50 7a02 vldr s15, [r0, #-8] | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
1c: ee40 6a07 vmla.f32 s13, s0, s14 | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
20: ed10 7a01 vldr s14, [r0, #-4] | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
24: ed40 6a03 vstr s13, [r0, #-12] | |
velocity[i].y += acceleration[i].y * time_step; | |
28: ed12 6a02 vldr s12, [r2, #-8] | |
2c: ee40 7a06 vmla.f32 s15, s0, s12 | |
30: ed40 7a02 vstr s15, [r0, #-8] | |
velocity[i].z += acceleration[i].z * time_step; | |
34: ed12 6a01 vldr s12, [r2, #-4] | |
38: ee00 7a06 vmla.f32 s14, s0, s12 | |
3c: ed00 7a01 vstr s14, [r0, #-4] | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
40: 4283 cmp r3, r0 | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
42: ed11 5a03 vldr s10, [r1, #-12] | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
46: ed51 5a01 vldr s11, [r1, #-4] | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
4a: ed11 6a02 vldr s12, [r1, #-8] | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
4e: ee00 5a26 vmla.f32 s10, s0, s13 | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
52: ee40 5a07 vmla.f32 s11, s0, s14 | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
56: ed01 5a03 vstr s10, [r1, #-12] | |
position[i].y += velocity[i].y * time_step; | |
5a: ee00 6a27 vmla.f32 s12, s0, s15 | |
position[i].z += velocity[i].z * time_step; | |
5e: ed41 5a01 vstr s11, [r1, #-4] | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
62: ed01 6a02 vstr s12, [r1, #-8] | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
66: d1d0 bne.n a <move+0xa> | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
} | |
} | |
68: f85d 4b04 ldr.w r4, [sp], #4 | |
6c: 4770 bx lr | |
6e: bf00 nop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
typedef struct { | |
float x, y, z; | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example-move-01.o example-move-01.c | |
example-move-01.o: file format elf64-x86-64 | |
Disassembly of section .text: | |
0000000000000000 <move>: | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
0: 48 85 c9 test rcx,rcx | |
3: 74 7e je 83 <move+0x83> | |
5: 48 8d 04 49 lea rax,[rcx+rcx*2] | |
9: 48 8d 04 87 lea rax,[rdi+rax*4] | |
d: 0f 1f 00 nop DWORD PTR [rax] | |
velocity[i].x += acceleration[i].x * time_step; | |
10: f3 0f 10 1a movss xmm3,DWORD PTR [rdx] | |
14: 48 83 c7 0c add rdi,0xc | |
18: 48 83 c2 0c add rdx,0xc | |
1c: f3 0f 59 d8 mulss xmm3,xmm0 | |
20: 48 83 c6 0c add rsi,0xc | |
24: f3 0f 58 5f f4 addss xmm3,DWORD PTR [rdi-0xc] | |
29: f3 0f 11 5f f4 movss DWORD PTR [rdi-0xc],xmm3 | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
2e: f3 0f 59 d8 mulss xmm3,xmm0 | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
32: f3 0f 10 52 f8 movss xmm2,DWORD PTR [rdx-0x8] | |
37: f3 0f 59 d0 mulss xmm2,xmm0 | |
3b: f3 0f 58 57 f8 addss xmm2,DWORD PTR [rdi-0x8] | |
40: f3 0f 11 57 f8 movss DWORD PTR [rdi-0x8],xmm2 | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
45: f3 0f 59 d0 mulss xmm2,xmm0 | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
49: f3 0f 10 4a fc movss xmm1,DWORD PTR [rdx-0x4] | |
4e: f3 0f 59 c8 mulss xmm1,xmm0 | |
52: f3 0f 58 4f fc addss xmm1,DWORD PTR [rdi-0x4] | |
57: f3 0f 11 4f fc movss DWORD PTR [rdi-0x4],xmm1 | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
5c: f3 0f 59 c8 mulss xmm1,xmm0 | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
60: f3 0f 58 5e f4 addss xmm3,DWORD PTR [rsi-0xc] | |
position[i].y += velocity[i].y * time_step; | |
65: f3 0f 58 56 f8 addss xmm2,DWORD PTR [rsi-0x8] | |
position[i].z += velocity[i].z * time_step; | |
6a: f3 0f 58 4e fc addss xmm1,DWORD PTR [rsi-0x4] | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
6f: f3 0f 11 5e f4 movss DWORD PTR [rsi-0xc],xmm3 | |
position[i].y += velocity[i].y * time_step; | |
74: f3 0f 11 56 f8 movss DWORD PTR [rsi-0x8],xmm2 | |
position[i].z += velocity[i].z * time_step; | |
79: f3 0f 11 4e fc movss DWORD PTR [rsi-0x4],xmm1 | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
7e: 48 39 f8 cmp rax,rdi | |
81: 75 8d jne 10 <move+0x10> | |
83: f3 c3 repz ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define PARTICLE_COUNT 1111 | |
typedef struct { | |
float x, y, z; | |
} vector3; | |
vector3 velocity[PARTICLE_COUNT]; // Window 1 | |
vector3 position[PARTICLE_COUNT]; // Window 2 | |
vector3 acceleration[PARTICLE_COUNT]; // Window 3 | |
void move(float time_step) | |
{ | |
for (size_t i = 0; i < count; ++i) { | |
velocity[i].x += acceleration[i].x * time_step; | |
velocity[i].y += acceleration[i].y * time_step; | |
velocity[i].z += acceleration[i].z * time_step; | |
position[i].x += velocity[i].x * time_step; | |
position[i].y += velocity[i].y * time_step; | |
position[i].z += velocity[i].z * time_step; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.L18: | |
add 12,8,6 | |
lfsx 12,8,6 # Load velocity_x | |
add 10,8,5 | |
lfsx 13,8,5 # Load acceleration_x | |
lfs 8,4(12) # Load velocity_y | |
add 4,8,7 | |
lfs 5,8(10) # Load acceleration_z | |
lfs 6,8(12) # Load velocity_z | |
lfs 7,4(10) # Load acceleration_y | |
fmadds 9,13,1,12 | |
fmadds 10,7,1,8 | |
fmadds 11,5,1,6 | |
lfsx 4,8,7 # Load position_x | |
lfs 3,4(4) # Load position_y | |
lfs 2,8(4) # Load position_z | |
fmadds 0,9,1,4 | |
fmadds 13,10,1,3 | |
fmadds 12,11,1,2 | |
stfsx 9,8,6 # Store velocity_x | |
stfs 11,8(12) # Store velocity_z | |
stfs 10,4(12) # Store velocity_y | |
stfsx 0,8,7 # Store position_x | |
addi 8,8,12 | |
stfs 13,4(4) # Store position_y | |
stfs 12,8(4) # Store position_z | |
bdnz .L18 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
typedef struct { | |
float x, y, z; | |
} vector3; | |
void move(vector3* velocity, vector3* position, vector3* acceleration, | |
float time_step, size_t count, size_t stride) | |
{ | |
float* restrict acceleration_x = &acceleration->x; | |
float* restrict velocity_x = &velocity->x; | |
float* restrict position_x = &position->x; | |
float* restrict acceleration_y = &acceleration->y; | |
float* restrict velocity_y = &velocity->y; | |
float* restrict position_y = &position->y; | |
float* restrict acceleration_z = &acceleration->z; | |
float* restrict velocity_z = &velocity->z; | |
float* restrict position_z = &position->z; | |
for (size_t i = 0; i < count * stride; i += stride) { | |
velocity_x[i] += acceleration_x[i] * time_step; | |
velocity_y[i] += acceleration_y[i] * time_step; | |
velocity_z[i] += acceleration_z[i] * time_step; | |
position_x[i] += velocity_x[i] * time_step; | |
position_y[i] += velocity_y[i] * time_step; | |
position_z[i] += velocity_z[i] * time_step; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.L31: | |
slwi 0,9,2 | |
lfsx 13,3,0 # Load velocity_x | |
add 9,9,30 | |
lfsx 8,12,0 # Load velocity_y | |
cmplw 7,31,9 | |
lfsx 6,10,0 # Load velocity_z | |
lfsx 12,5,0 # Load acceleration_x | |
lfsx 7,6,0 # Load acceleration_y | |
lfsx 5,7,0 # Load acceleration_z | |
fmadds 11,12,1,13 | |
fmadds 10,7,1,8 | |
fmadds 9,5,1,6 | |
lfsx 4,4,0 # Load position_x | |
lfsx 3,8,0 # Load position_y | |
lfsx 2,11,0 # Load position_z | |
fmadds 0,11,1,4 | |
fmadds 13,10,1,3 | |
fmadds 12,9,1,2 | |
stfsx 11,3,0 # Store velocity_x | |
stfsx 10,12,0 # Store velocity_y | |
stfsx 9,10,0 # Store velocity_z | |
stfsx 0,4,0 # Store position_x | |
stfsx 13,8,0 # Store position_y | |
stfsx 12,11,0 # Store position_z | |
bgt+ 7,.L31 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ...same as above... | |
float* restrict acceleration_x = &acceleration->x; | |
float* restrict velocity_x = &velocity->x; | |
float* restrict position_x = &position->x; | |
float* restrict acceleration_y = &acceleration->y; | |
float* restrict velocity_y = &velocity->y; | |
float* restrict position_y = &position->y; | |
float* restrict acceleration_z = &acceleration->z; | |
float* restrict velocity_z = &velocity->z; | |
float* restrict position_z = &position->z; | |
for (size_t i=0; i<count*stride; i+=stride) { | |
velocity_x[i] += acceleration_x[i] * time_step; | |
velocity_y[i] += acceleration_y[i] * time_step; | |
velocity_z[i] += acceleration_z[i] * time_step; | |
position_x[i] += velocity_x[i] * time_step; | |
position_y[i] += velocity_y[i] * time_step; | |
position_z[i] += velocity_z[i] * time_step; | |
// ...same as above... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
out_vector_a[i] = in_vector[i]; | |
out_vector_b[i] = in_vector[i]; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ gcc -std=c11 -O2 -g -c -fno-strict-aliasing -o example_func_01.o example_func.c | |
example_func_01.o: file format elf64-x86-64 | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
0: 31 c0 xor eax,eax | |
2: 85 c9 test ecx,ecx | |
4: 7e 2e jle 34 <foo+0x34> | |
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] | |
d: 00 00 00 | |
out_vector_a[i] = in_vector[i]; | |
10: 66 0f ef c0 pxor xmm0,xmm0 | |
14: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4] | |
19: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0 | |
out_vector_b[i] = in_vector[i]; | |
1e: 66 0f ef c0 pxor xmm0,xmm0 | |
22: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4] | |
27: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0 | |
2c: 48 83 c0 01 add rax,0x1 | |
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
30: 39 c1 cmp ecx,eax | |
32: 7f dc jg 10 <foo+0x10> | |
34: f3 c3 repz ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_02.o example_func.c | |
example_func_02.o: file format elf64-x86-64 | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
0: 31 c0 xor eax,eax | |
2: 85 c9 test ecx,ecx | |
4: 7e 25 jle 2b <foo+0x2b> | |
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] | |
d: 00 00 00 | |
out_vector_a[i] = in_vector[i]; | |
10: 66 0f ef c0 pxor xmm0,xmm0 | |
14: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4] | |
19: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0 | |
out_vector_b[i] = in_vector[i]; | |
1e: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0 | |
23: 48 83 c0 01 add rax,0x1 | |
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
27: 39 c1 cmp ecx,eax | |
29: 7f e5 jg 10 <foo+0x10> | |
2b: f3 c3 repz ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
out_vector_a[i] = in_vector[i]; | |
out_vector_b[i] = in_vector[i]; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_compatible.o example_func_compatible.c | |
example_func_compatible.o: file format elf64-x86-64 | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
0: 31 c0 xor eax,eax | |
2: 85 c9 test ecx,ecx | |
4: 7e 26 jle 2c <foo+0x2c> | |
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] | |
d: 00 00 00 | |
out_vector_a[i] = in_vector[i]; | |
10: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4] | |
15: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0 | |
out_vector_b[i] = in_vector[i]; | |
1a: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4] | |
1f: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0 | |
24: 48 83 c0 01 add rax,0x1 | |
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
28: 39 c1 cmp ecx,eax | |
2a: 7f e4 jg 10 <foo+0x10> | |
2c: f3 c3 repz ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
out_vector_a[i] = in_vector[i]; | |
out_vector_b[i] = in_vector[i]; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Object built with: | |
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 | |
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_compatible_restrict.o example_func_compatible_restrict.c | |
example_func_compatible_restrict.o: file format elf64-x86-64 | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
0: 31 c0 xor eax,eax | |
2: 85 c9 test ecx,ecx | |
4: 7e 21 jle 27 <foo+0x27> | |
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] | |
d: 00 00 00 | |
out_vector_a[i] = in_vector[i]; | |
10: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4] | |
15: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0 | |
out_vector_b[i] = in_vector[i]; | |
1a: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0 | |
1f: 48 83 c0 01 add rax,0x1 | |
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n) | |
{ | |
for(int i=0; i<n; ++i) { | |
23: 39 c1 cmp ecx,eax | |
25: 7f e9 jg 10 <foo+0x10> | |
27: f3 c3 repz ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment