Created
September 13, 2012 00:01
-
-
Save dtzWill/3710878 to your computer and use it in GitHub Desktop.
Array fill with varying store widths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <limits.h> | |
#include <stdint.h> | |
static uint64_t buf[16]; | |
#define NOINLINE __attribute__ ((noinline)) | |
static void NOINLINE func1() { | |
uint8_t* p = (uint8_t*)buf; | |
p[0] = 0xFF; | |
p[1] = 0xFF; | |
p[2] = 0xFF; | |
p[3] = 0xFF; | |
p[4] = 0xFF; | |
p[5] = 0xFF; | |
p[6] = 0xFF; | |
p[7] = 0xFF; | |
p[8] = 0xFF; | |
p[9] = 0xFF; | |
p[10] = 0xFF; | |
p[11] = 0xFF; | |
p[12] = 0xFF; | |
p[13] = 0xFF; | |
p[14] = 0xFF; | |
p[15] = 0xFF; | |
} | |
static void NOINLINE func2() { | |
uint32_t* p = (uint32_t*)buf; | |
p[0] = 0xFFFFFFFF; | |
p[1] = 0xFFFFFFFF; | |
p[2] = 0xFFFFFFFF; | |
p[3] = 0xFFFFFFFF; | |
} | |
static void NOINLINE func3() { | |
uint64_t* p = (uint64_t*)buf; | |
p[0] = 0xFFFFFFFFFFFFFFFF; | |
p[1] = 0xFFFFFFFFFFFFFFFF; | |
} | |
int main(int argc, const char *argv[]) { | |
unsigned i; | |
// Run one of the functions lots of times... | |
for(i = 0; i < UINT_MAX; ++i) { | |
FUNC(); | |
} | |
// Misc code to just use all of buf | |
unsigned v = 0; | |
for (i = 0 ; i < 16; ++i) | |
v |= buf[i]; | |
printf("%d\n", v); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
CC=${CC-gcc} | |
CFLAGS="-mtune=generic -mno-sse -O3" | |
$CC -v 2>&1 | |
$CC $CFLAGS array.c -o test1 -DFUNC=func1 | |
$CC $CFLAGS array.c -o test2 -DFUNC=func2 | |
$CC $CFLAGS array.c -o test3 -DFUNC=func3 | |
gdb --batch -ex "disassemble /r func1" test1 | |
gdb --batch -ex "disassemble /r func2" test2 | |
gdb --batch -ex "disassemble /r func3" test3 | |
echo "" | |
echo "=== Func1 Timing ===" | |
(time ./test1 > /dev/null) 2>&1 | |
echo "=== Func2 Timing ===" | |
(time ./test2 > /dev/null) 2>&1 | |
echo "=== Func3 Timing ===" | |
(time ./test3 > /dev/null) 2>&1 | |
rm test1 test2 test3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Using built-in specs. | |
Target: x86_64-redhat-linux | |
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux | |
Thread model: posix | |
gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) | |
Dump of assembler code for function func1: | |
0x0000000000400500 <+0>: c6 05 39 15 00 00 ff movb $0xff,0x1539(%rip) # 0x401a40 <buf> | |
0x0000000000400507 <+7>: c6 05 33 15 00 00 ff movb $0xff,0x1533(%rip) # 0x401a41 <buf+1> | |
0x000000000040050e <+14>: c6 05 2d 15 00 00 ff movb $0xff,0x152d(%rip) # 0x401a42 <buf+2> | |
0x0000000000400515 <+21>: c6 05 27 15 00 00 ff movb $0xff,0x1527(%rip) # 0x401a43 <buf+3> | |
0x000000000040051c <+28>: c6 05 21 15 00 00 ff movb $0xff,0x1521(%rip) # 0x401a44 <buf+4> | |
0x0000000000400523 <+35>: c6 05 1b 15 00 00 ff movb $0xff,0x151b(%rip) # 0x401a45 <buf+5> | |
0x000000000040052a <+42>: c6 05 15 15 00 00 ff movb $0xff,0x1515(%rip) # 0x401a46 <buf+6> | |
0x0000000000400531 <+49>: c6 05 0f 15 00 00 ff movb $0xff,0x150f(%rip) # 0x401a47 <buf+7> | |
0x0000000000400538 <+56>: c6 05 09 15 00 00 ff movb $0xff,0x1509(%rip) # 0x401a48 <buf+8> | |
0x000000000040053f <+63>: c6 05 03 15 00 00 ff movb $0xff,0x1503(%rip) # 0x401a49 <buf+9> | |
0x0000000000400546 <+70>: c6 05 fd 14 00 00 ff movb $0xff,0x14fd(%rip) # 0x401a4a <buf+10> | |
0x000000000040054d <+77>: c6 05 f7 14 00 00 ff movb $0xff,0x14f7(%rip) # 0x401a4b <buf+11> | |
0x0000000000400554 <+84>: c6 05 f1 14 00 00 ff movb $0xff,0x14f1(%rip) # 0x401a4c <buf+12> | |
0x000000000040055b <+91>: c6 05 eb 14 00 00 ff movb $0xff,0x14eb(%rip) # 0x401a4d <buf+13> | |
0x0000000000400562 <+98>: c6 05 e5 14 00 00 ff movb $0xff,0x14e5(%rip) # 0x401a4e <buf+14> | |
0x0000000000400569 <+105>: c6 05 df 14 00 00 ff movb $0xff,0x14df(%rip) # 0x401a4f <buf+15> | |
0x0000000000400570 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x0000000000400500 <+0>: c7 05 e6 14 00 00 ff ff ff ff movl $0xffffffff,0x14e6(%rip) # 0x4019f0 <buf> | |
0x000000000040050a <+10>: c7 05 e0 14 00 00 ff ff ff ff movl $0xffffffff,0x14e0(%rip) # 0x4019f4 <buf+4> | |
0x0000000000400514 <+20>: c7 05 da 14 00 00 ff ff ff ff movl $0xffffffff,0x14da(%rip) # 0x4019f8 <buf+8> | |
0x000000000040051e <+30>: c7 05 d4 14 00 00 ff ff ff ff movl $0xffffffff,0x14d4(%rip) # 0x4019fc <buf+12> | |
0x0000000000400528 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x0000000000400500 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax | |
0x0000000000400507 <+7>: 48 89 05 d2 14 00 00 mov %rax,0x14d2(%rip) # 0x4019e0 <buf> | |
0x000000000040050e <+14>: 48 89 05 d3 14 00 00 mov %rax,0x14d3(%rip) # 0x4019e8 <buf+8> | |
0x0000000000400515 <+21>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.464s | |
user 0m20.434s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m9.630s | |
user 0m9.615s | |
sys 0m0.000s | |
=== Func3 Timing === | |
real 0m7.219s | |
user 0m7.203s | |
sys 0m0.003s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Reading specs from /usr/lib64/gcc/x86_64-slackware-linux/4.7.1/specs | |
COLLECT_GCC=gcc | |
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-slackware-linux/4.7.1/lto-wrapper | |
Target: x86_64-slackware-linux | |
Configured with: ../gcc-4.7.1/configure --prefix=/usr --libdir=/usr/lib64 --mandir=/usr/man --infodir=/usr/info --enable-shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,go,java,lto,objc --enable-threads=posix --enable-checking=release --enable-objc-gc --with-system-zlib --with-python-dir=/lib64/python2.7/site-packages --disable-libunwind-exceptions --enable-__cxa_atexit --enable-libssp --enable-lto --with-gnu-ld --verbose --enable-java-home --with-java-home=/usr/lib64/jvm/jre --with-jvm-root-dir=/usr/lib64/jvm --with-jvm-jar-dir=/usr/lib64/jvm/jvm-exports --with-arch-directory=amd64 --with-antlr-jar=/home/slackware/slackbuilds/gcc/antlr-runtime-3.4.jar --enable-multilib --target=x86_64-slackware-linux --build=x86_64-slackware-linux --host=x86_64-slackware-linux | |
Thread model: posix | |
gcc version 4.7.1 (GCC) | |
Dump of assembler code for function func1: | |
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0 | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0 | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004006a0 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m9.941s | |
user 0m9.938s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m9.943s | |
user 0m9.937s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m9.938s | |
user 0m9.935s | |
sys 0m0.000s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang version 3.1 (git@github.com:llvm-mirror/clang.git 6f576c9bfa9a22e2801485768fe56b3336ea18a7) (git@github.com:llvm-mirror/llvm.git 02b87df98afb03136a1f5076c042696c98524947) | |
Target: x86_64-unknown-linux-gnu | |
Thread model: posix | |
Dump of assembler code for function func1: | |
0x00000000004005b0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a20 <buf> | |
0x00000000004005b7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a21 <buf+1> | |
0x00000000004005be <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a22 <buf+2> | |
0x00000000004005c5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a23 <buf+3> | |
0x00000000004005cc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a24 <buf+4> | |
0x00000000004005d3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a25 <buf+5> | |
0x00000000004005da <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a26 <buf+6> | |
0x00000000004005e1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a27 <buf+7> | |
0x00000000004005e8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a28 <buf+8> | |
0x00000000004005ef <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a29 <buf+9> | |
0x00000000004005f6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a2a <buf+10> | |
0x00000000004005fd <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a2b <buf+11> | |
0x0000000000400604 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a2c <buf+12> | |
0x000000000040060b <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a2d <buf+13> | |
0x0000000000400612 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a2e <buf+14> | |
0x0000000000400619 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a2f <buf+15> | |
0x0000000000400620 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004005b0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019d0 <buf> | |
0x00000000004005ba <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019d4 <buf+4> | |
0x00000000004005c4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019d8 <buf+8> | |
0x00000000004005ce <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019dc <buf+12> | |
0x00000000004005d8 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004005b0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019c0 <buf> | |
0x00000000004005bb <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019c8 <buf+8> | |
0x00000000004005c6 <+22>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.456s | |
user 0m20.426s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m8.420s | |
user 0m8.406s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m8.411s | |
user 0m8.398s | |
sys 0m0.000s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 40d734de4c19a34d2d1764b136470ee25993eb4e) | |
Target: x86_64-unknown-linux-gnu | |
Thread model: posix | |
Dump of assembler code for function func1: | |
0x00000000004005a0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a10 <buf> | |
0x00000000004005a7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a11 <buf+1> | |
0x00000000004005ae <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a12 <buf+2> | |
0x00000000004005b5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a13 <buf+3> | |
0x00000000004005bc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a14 <buf+4> | |
0x00000000004005c3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a15 <buf+5> | |
0x00000000004005ca <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a16 <buf+6> | |
0x00000000004005d1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a17 <buf+7> | |
0x00000000004005d8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a18 <buf+8> | |
0x00000000004005df <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a19 <buf+9> | |
0x00000000004005e6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a1a <buf+10> | |
0x00000000004005ed <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a1b <buf+11> | |
0x00000000004005f4 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a1c <buf+12> | |
0x00000000004005fb <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a1d <buf+13> | |
0x0000000000400602 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a1e <buf+14> | |
0x0000000000400609 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a1f <buf+15> | |
0x0000000000400610 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004005a0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019c0 <buf> | |
0x00000000004005aa <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019c4 <buf+4> | |
0x00000000004005b4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019c8 <buf+8> | |
0x00000000004005be <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019cc <buf+12> | |
0x00000000004005c8 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004005a0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019b0 <buf> | |
0x00000000004005ab <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019b8 <buf+8> | |
0x00000000004005b6 <+22>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.440s | |
user 0m20.410s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m8.418s | |
user 0m8.404s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m7.217s | |
user 0m7.196s | |
sys 0m0.002s |
Sorry, as a summary: All three versions (func1, func2, func3) should do the same thing. However, the resulting machine code is rather different and I'm not sure why.
I've posted source, a script to produce semi-nice output, and the result of the script run on a few hosts with different compilers.
Is this just a failure of LLVM's optimizers in some sense, or am I missing something?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have to ask: Why the devil doesn't clang/LLVM combine the stores?
For one of my LLVM projects I emit a series of stores (at the IR level) similar to those in array.c, expecting something like what gcc 4.7.1 produced: using word-sized stores when possible. However, as you can see, this doesn't happen.
Anyone have an explanation? Thoughts? Thanks!