Skip to content

Instantly share code, notes, and snippets.

@nazavode
Last active Mar 14, 2017
Embed
What would you like to do?
Example snippets for ISO C aliasing
#!/bin/bash
CC=arm-linux-gnueabihf-gcc
DUMP="arm-linux-gnueabihf-objdump -M reg-names-gcc"
BUILDCMD="$CC ${@:1}"
while [[ $# -gt 1 ]]
do
key="$1"
case $key in
-o|--searchpath)
OBJECT="$2"
shift
;;
*)
;;
esac
shift
done
OUTFILE="$(basename -s .o $OBJECT).s"
GCCVERSION="$($CC --version | grep gcc)"
echo -e "# Object built with: \n# $GCCVERSION\n# \$ $BUILDCMD\n" > $OUTFILE
$BUILDCMD && $DUMP -d -S $OBJECT >> $OUTFILE
#!/bin/bash
BUILDCMD="gcc ${@:1}"
while [[ $# -gt 1 ]]
do
key="$1"
case $key in
-o|--searchpath)
OBJECT="$2"
shift
;;
*)
;;
esac
shift
done
OUTFILE="$(basename -s .o $OBJECT).s"
GCCVERSION="$(gcc --version | grep gcc)"
echo -e "# Object built with: \n# $GCCVERSION\n# \$ $BUILDCMD\n" > $OUTFILE
$BUILDCMD && objdump -d -M intel -S $OBJECT >> $OUTFILE
void bar(int * restrict p, int * restrict q, int n) {
while (n-- > 0)
*p++ = *q++;
}
void callbar(void) {
extern int d[100];
bar(50, d + 50, d); // Valid!
bar(50, d + 1 , d); // Undefined behaviour!
}
void threebar(int * restrict p, int * restrict q, int * restrict r, int n) {
for (int i=0; i<n; ++i)
p[i] = q[i] + r[i];
}
void callthreebar(int n) {
int a[n];
int b[n];
threebar(n, a, b, b); // Valid!
}
{
int * restrict foo;
int * restrict bar;
foo = bar; // undefined behaviour
{
int * restrict foo_inner = foo; // valid
int * restrict bar_inner = bar; // valid
foo = bar_inner; // undefined behaviour
bar_inner = foo_inner; // undefined behaviour
}
}
// ...same as above...
float* restrict acceleration_x = &acceleration->x;
float* restrict velocity_x = &velocity->x;
float* restrict position_x = &position->x;
float* restrict acceleration_y = &acceleration->y;
float* restrict velocity_y = &velocity->y;
float* restrict position_y = &position->y;
float* restrict acceleration_z = &acceleration->z;
float* restrict velocity_z = &velocity->z;
float* restrict position_z = &position->z;
for (size_t i=0; i<count*stride; i+=stride) {
// ...same as above...
# Object built with:
# arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ arm-linux-gnueabihf-gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example-move-01.arm.o example-move-01.c
example-move-01.arm.o: file format elf32-littlearm
Disassembly of section .text:
00000000 <move>:
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
0: b3a3 cbz r3, 6c <move+0x6c>
float x, y, z;
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
2: b410 push {r4}
4: 240c movs r4, #12
6: fb04 0303 mla r3, r4, r3, r0
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
a: ed92 7a00 vldr s14, [r2]
e: 300c adds r0, #12
10: 320c adds r2, #12
12: 310c adds r1, #12
14: ed50 6a03 vldr s13, [r0, #-12]
velocity[i].y += acceleration[i].y * time_step;
18: ed50 7a02 vldr s15, [r0, #-8]
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
1c: ee40 6a07 vmla.f32 s13, s0, s14
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
20: ed10 7a01 vldr s14, [r0, #-4]
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
24: ed40 6a03 vstr s13, [r0, #-12]
velocity[i].y += acceleration[i].y * time_step;
28: ed12 6a02 vldr s12, [r2, #-8]
2c: ee40 7a06 vmla.f32 s15, s0, s12
30: ed40 7a02 vstr s15, [r0, #-8]
velocity[i].z += acceleration[i].z * time_step;
34: ed12 6a01 vldr s12, [r2, #-4]
38: ee00 7a06 vmla.f32 s14, s0, s12
3c: ed00 7a01 vstr s14, [r0, #-4]
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
40: 4283 cmp r3, r0
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
42: ed11 5a03 vldr s10, [r1, #-12]
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
46: ed51 5a01 vldr s11, [r1, #-4]
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
4a: ed11 6a02 vldr s12, [r1, #-8]
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
4e: ee00 5a26 vmla.f32 s10, s0, s13
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
52: ee40 5a07 vmla.f32 s11, s0, s14
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
56: ed01 5a03 vstr s10, [r1, #-12]
position[i].y += velocity[i].y * time_step;
5a: ee00 6a27 vmla.f32 s12, s0, s15
position[i].z += velocity[i].z * time_step;
5e: ed41 5a01 vstr s11, [r1, #-4]
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
62: ed01 6a02 vstr s12, [r1, #-8]
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
66: d1d0 bne.n a <move+0xa>
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
}
}
68: f85d 4b04 ldr.w r4, [sp], #4
6c: 4770 bx lr
6e: bf00 nop
#include <stddef.h>
typedef struct {
float x, y, z;
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
}
}
# Object built with:
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example-move-01.o example-move-01.c
example-move-01.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <move>:
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
0: 48 85 c9 test rcx,rcx
3: 74 7e je 83 <move+0x83>
5: 48 8d 04 49 lea rax,[rcx+rcx*2]
9: 48 8d 04 87 lea rax,[rdi+rax*4]
d: 0f 1f 00 nop DWORD PTR [rax]
velocity[i].x += acceleration[i].x * time_step;
10: f3 0f 10 1a movss xmm3,DWORD PTR [rdx]
14: 48 83 c7 0c add rdi,0xc
18: 48 83 c2 0c add rdx,0xc
1c: f3 0f 59 d8 mulss xmm3,xmm0
20: 48 83 c6 0c add rsi,0xc
24: f3 0f 58 5f f4 addss xmm3,DWORD PTR [rdi-0xc]
29: f3 0f 11 5f f4 movss DWORD PTR [rdi-0xc],xmm3
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
2e: f3 0f 59 d8 mulss xmm3,xmm0
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
32: f3 0f 10 52 f8 movss xmm2,DWORD PTR [rdx-0x8]
37: f3 0f 59 d0 mulss xmm2,xmm0
3b: f3 0f 58 57 f8 addss xmm2,DWORD PTR [rdi-0x8]
40: f3 0f 11 57 f8 movss DWORD PTR [rdi-0x8],xmm2
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
45: f3 0f 59 d0 mulss xmm2,xmm0
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
49: f3 0f 10 4a fc movss xmm1,DWORD PTR [rdx-0x4]
4e: f3 0f 59 c8 mulss xmm1,xmm0
52: f3 0f 58 4f fc addss xmm1,DWORD PTR [rdi-0x4]
57: f3 0f 11 4f fc movss DWORD PTR [rdi-0x4],xmm1
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
5c: f3 0f 59 c8 mulss xmm1,xmm0
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
60: f3 0f 58 5e f4 addss xmm3,DWORD PTR [rsi-0xc]
position[i].y += velocity[i].y * time_step;
65: f3 0f 58 56 f8 addss xmm2,DWORD PTR [rsi-0x8]
position[i].z += velocity[i].z * time_step;
6a: f3 0f 58 4e fc addss xmm1,DWORD PTR [rsi-0x4]
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
6f: f3 0f 11 5e f4 movss DWORD PTR [rsi-0xc],xmm3
position[i].y += velocity[i].y * time_step;
74: f3 0f 11 56 f8 movss DWORD PTR [rsi-0x8],xmm2
position[i].z += velocity[i].z * time_step;
79: f3 0f 11 4e fc movss DWORD PTR [rsi-0x4],xmm1
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count)
{
for (size_t i = 0; i < count; ++i) {
7e: 48 39 f8 cmp rax,rdi
81: 75 8d jne 10 <move+0x10>
83: f3 c3 repz ret
#define PARTICLE_COUNT 1111
typedef struct {
float x, y, z;
} vector3;
vector3 velocity[PARTICLE_COUNT]; // Window 1
vector3 position[PARTICLE_COUNT]; // Window 2
vector3 acceleration[PARTICLE_COUNT]; // Window 3
void move(float time_step)
{
for (size_t i = 0; i < count; ++i) {
velocity[i].x += acceleration[i].x * time_step;
velocity[i].y += acceleration[i].y * time_step;
velocity[i].z += acceleration[i].z * time_step;
position[i].x += velocity[i].x * time_step;
position[i].y += velocity[i].y * time_step;
position[i].z += velocity[i].z * time_step;
}
}
.L18:
add 12,8,6
lfsx 12,8,6 # Load velocity_x
add 10,8,5
lfsx 13,8,5 # Load acceleration_x
lfs 8,4(12) # Load velocity_y
add 4,8,7
lfs 5,8(10) # Load acceleration_z
lfs 6,8(12) # Load velocity_z
lfs 7,4(10) # Load acceleration_y
fmadds 9,13,1,12
fmadds 10,7,1,8
fmadds 11,5,1,6
lfsx 4,8,7 # Load position_x
lfs 3,4(4) # Load position_y
lfs 2,8(4) # Load position_z
fmadds 0,9,1,4
fmadds 13,10,1,3
fmadds 12,11,1,2
stfsx 9,8,6 # Store velocity_x
stfs 11,8(12) # Store velocity_z
stfs 10,4(12) # Store velocity_y
stfsx 0,8,7 # Store position_x
addi 8,8,12
stfs 13,4(4) # Store position_y
stfs 12,8(4) # Store position_z
bdnz .L18
#include <stddef.h>
typedef struct {
float x, y, z;
} vector3;
void move(vector3* velocity, vector3* position, vector3* acceleration,
float time_step, size_t count, size_t stride)
{
float* restrict acceleration_x = &acceleration->x;
float* restrict velocity_x = &velocity->x;
float* restrict position_x = &position->x;
float* restrict acceleration_y = &acceleration->y;
float* restrict velocity_y = &velocity->y;
float* restrict position_y = &position->y;
float* restrict acceleration_z = &acceleration->z;
float* restrict velocity_z = &velocity->z;
float* restrict position_z = &position->z;
for (size_t i = 0; i < count * stride; i += stride) {
velocity_x[i] += acceleration_x[i] * time_step;
velocity_y[i] += acceleration_y[i] * time_step;
velocity_z[i] += acceleration_z[i] * time_step;
position_x[i] += velocity_x[i] * time_step;
position_y[i] += velocity_y[i] * time_step;
position_z[i] += velocity_z[i] * time_step;
}
}
.L31:
slwi 0,9,2
lfsx 13,3,0 # Load velocity_x
add 9,9,30
lfsx 8,12,0 # Load velocity_y
cmplw 7,31,9
lfsx 6,10,0 # Load velocity_z
lfsx 12,5,0 # Load acceleration_x
lfsx 7,6,0 # Load acceleration_y
lfsx 5,7,0 # Load acceleration_z
fmadds 11,12,1,13
fmadds 10,7,1,8
fmadds 9,5,1,6
lfsx 4,4,0 # Load position_x
lfsx 3,8,0 # Load position_y
lfsx 2,11,0 # Load position_z
fmadds 0,11,1,4
fmadds 13,10,1,3
fmadds 12,9,1,2
stfsx 11,3,0 # Store velocity_x
stfsx 10,12,0 # Store velocity_y
stfsx 9,10,0 # Store velocity_z
stfsx 0,4,0 # Store position_x
stfsx 13,8,0 # Store position_y
stfsx 12,11,0 # Store position_z
bgt+ 7,.L31
// ...same as above...
float* restrict acceleration_x = &acceleration->x;
float* restrict velocity_x = &velocity->x;
float* restrict position_x = &position->x;
float* restrict acceleration_y = &acceleration->y;
float* restrict velocity_y = &velocity->y;
float* restrict position_y = &position->y;
float* restrict acceleration_z = &acceleration->z;
float* restrict velocity_z = &velocity->z;
float* restrict position_z = &position->z;
for (size_t i=0; i<count*stride; i+=stride) {
velocity_x[i] += acceleration_x[i] * time_step;
velocity_y[i] += acceleration_y[i] * time_step;
velocity_z[i] += acceleration_z[i] * time_step;
position_x[i] += velocity_x[i] * time_step;
position_y[i] += velocity_y[i] * time_step;
position_z[i] += velocity_z[i] * time_step;
// ...same as above...
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n)
{
for(int i=0; i<n; ++i) {
out_vector_a[i] = in_vector[i];
out_vector_b[i] = in_vector[i];
}
}
# Object built with:
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ gcc -std=c11 -O2 -g -c -fno-strict-aliasing -o example_func_01.o example_func.c
example_func_01.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n)
{
for(int i=0; i<n; ++i) {
0: 31 c0 xor eax,eax
2: 85 c9 test ecx,ecx
4: 7e 2e jle 34 <foo+0x34>
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
d: 00 00 00
out_vector_a[i] = in_vector[i];
10: 66 0f ef c0 pxor xmm0,xmm0
14: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4]
19: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0
out_vector_b[i] = in_vector[i];
1e: 66 0f ef c0 pxor xmm0,xmm0
22: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4]
27: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0
2c: 48 83 c0 01 add rax,0x1
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n)
{
for(int i=0; i<n; ++i) {
30: 39 c1 cmp ecx,eax
32: 7f dc jg 10 <foo+0x10>
34: f3 c3 repz ret
# Object built with:
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_02.o example_func.c
example_func_02.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n)
{
for(int i=0; i<n; ++i) {
0: 31 c0 xor eax,eax
2: 85 c9 test ecx,ecx
4: 7e 25 jle 2b <foo+0x2b>
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
d: 00 00 00
out_vector_a[i] = in_vector[i];
10: 66 0f ef c0 pxor xmm0,xmm0
14: f3 0f 2a 04 82 cvtsi2ss xmm0,DWORD PTR [rdx+rax*4]
19: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0
out_vector_b[i] = in_vector[i];
1e: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0
23: 48 83 c0 01 add rax,0x1
void foo (float *out_vector_a, float *out_vector_b, int *in_vector, int n)
{
for(int i=0; i<n; ++i) {
27: 39 c1 cmp ecx,eax
29: 7f e5 jg 10 <foo+0x10>
2b: f3 c3 repz ret
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n)
{
for(int i=0; i<n; ++i) {
out_vector_a[i] = in_vector[i];
out_vector_b[i] = in_vector[i];
}
}
# Object built with:
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_compatible.o example_func_compatible.c
example_func_compatible.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n)
{
for(int i=0; i<n; ++i) {
0: 31 c0 xor eax,eax
2: 85 c9 test ecx,ecx
4: 7e 26 jle 2c <foo+0x2c>
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
d: 00 00 00
out_vector_a[i] = in_vector[i];
10: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4]
15: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0
out_vector_b[i] = in_vector[i];
1a: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4]
1f: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0
24: 48 83 c0 01 add rax,0x1
void foo (float *out_vector_a, float *out_vector_b, float *in_vector, int n)
{
for(int i=0; i<n; ++i) {
28: 39 c1 cmp ecx,eax
2a: 7f e4 jg 10 <foo+0x10>
2c: f3 c3 repz ret
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n)
{
for(int i=0; i<n; ++i) {
out_vector_a[i] = in_vector[i];
out_vector_b[i] = in_vector[i];
}
}
# Object built with:
# gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
# $ gcc -std=c11 -O2 -g -c -fstrict-aliasing -o example_func_compatible_restrict.o example_func_compatible_restrict.c
example_func_compatible_restrict.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n)
{
for(int i=0; i<n; ++i) {
0: 31 c0 xor eax,eax
2: 85 c9 test ecx,ecx
4: 7e 21 jle 27 <foo+0x27>
6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
d: 00 00 00
out_vector_a[i] = in_vector[i];
10: f3 0f 10 04 82 movss xmm0,DWORD PTR [rdx+rax*4]
15: f3 0f 11 04 87 movss DWORD PTR [rdi+rax*4],xmm0
out_vector_b[i] = in_vector[i];
1a: f3 0f 11 04 86 movss DWORD PTR [rsi+rax*4],xmm0
1f: 48 83 c0 01 add rax,0x1
void foo (float * restrict out_vector_a, float * restrict out_vector_b, float * restrict in_vector, int n)
{
for(int i=0; i<n; ++i) {
23: 39 c1 cmp ecx,eax
25: 7f e9 jg 10 <foo+0x10>
27: f3 c3 repz ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment