Created
September 13, 2012 15:46
-
-
Save dtzWill/3715222 to your computer and use it in GitHub Desktop.
Merge adjacent stores of constants?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <limits.h> | |
#include <stdint.h> | |
static uint64_t buf[2]; | |
#define NOINLINE __attribute__ ((noinline)) | |
static void NOINLINE func1() { | |
uint8_t* p = (uint8_t*)buf; | |
p[0] = 0xFF; | |
p[1] = 0xFF; | |
p[2] = 0xFF; | |
p[3] = 0xFF; | |
p[4] = 0xFF; | |
p[5] = 0xFF; | |
p[6] = 0xFF; | |
p[7] = 0xFF; | |
p[8] = 0xFF; | |
p[9] = 0xFF; | |
p[10] = 0xFF; | |
p[11] = 0xFF; | |
p[12] = 0xFF; | |
p[13] = 0xFF; | |
p[14] = 0xFF; | |
p[15] = 0xFF; | |
} | |
static void NOINLINE func2() { | |
uint32_t* p = (uint32_t*)buf; | |
p[0] = 0xFFFFFFFF; | |
p[1] = 0xFFFFFFFF; | |
p[2] = 0xFFFFFFFF; | |
p[3] = 0xFFFFFFFF; | |
} | |
static void NOINLINE func3() { | |
uint64_t* p = (uint64_t*)buf; | |
p[0] = 0xFFFFFFFFFFFFFFFF; | |
p[1] = 0xFFFFFFFFFFFFFFFF; | |
} | |
int main(int argc, const char *argv[]) { | |
unsigned i; | |
// Run one of the functions lots of times... | |
for(i = 0; i < UINT_MAX; ++i) { | |
FUNC(); | |
} | |
// Misc code to just use all of buf | |
unsigned v = 0; | |
for (i = 0 ; i < 16; ++i) | |
v |= buf[i]; | |
printf("%u\n", v); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
CC=${CC-gcc} | |
CFLAGS="-mtune=generic -mno-sse -O3" | |
$CC -v 2>&1 | |
$CC $CFLAGS array.c -o test1 -DFUNC=func1 | |
$CC $CFLAGS array.c -o test2 -DFUNC=func2 | |
$CC $CFLAGS array.c -o test3 -DFUNC=func3 | |
gdb --batch -ex "disassemble /r func1" test1 | |
gdb --batch -ex "disassemble /r func2" test2 | |
gdb --batch -ex "disassemble /r func3" test3 | |
echo "" | |
echo "=== Func1 Timing ===" | |
(time ./test1 > /dev/null) 2>&1 | |
echo "=== Func2 Timing ===" | |
(time ./test2 > /dev/null) 2>&1 | |
echo "=== Func3 Timing ===" | |
(time ./test3 > /dev/null) 2>&1 | |
rm test1 test2 test3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Using built-in specs. | |
Target: x86_64-redhat-linux | |
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux | |
Thread model: posix | |
gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) | |
Dump of assembler code for function func1: | |
0x0000000000400500 <+0>: c6 05 39 15 00 00 ff movb $0xff,0x1539(%rip) # 0x401a40 <buf> | |
0x0000000000400507 <+7>: c6 05 33 15 00 00 ff movb $0xff,0x1533(%rip) # 0x401a41 <buf+1> | |
0x000000000040050e <+14>: c6 05 2d 15 00 00 ff movb $0xff,0x152d(%rip) # 0x401a42 <buf+2> | |
0x0000000000400515 <+21>: c6 05 27 15 00 00 ff movb $0xff,0x1527(%rip) # 0x401a43 <buf+3> | |
0x000000000040051c <+28>: c6 05 21 15 00 00 ff movb $0xff,0x1521(%rip) # 0x401a44 <buf+4> | |
0x0000000000400523 <+35>: c6 05 1b 15 00 00 ff movb $0xff,0x151b(%rip) # 0x401a45 <buf+5> | |
0x000000000040052a <+42>: c6 05 15 15 00 00 ff movb $0xff,0x1515(%rip) # 0x401a46 <buf+6> | |
0x0000000000400531 <+49>: c6 05 0f 15 00 00 ff movb $0xff,0x150f(%rip) # 0x401a47 <buf+7> | |
0x0000000000400538 <+56>: c6 05 09 15 00 00 ff movb $0xff,0x1509(%rip) # 0x401a48 <buf+8> | |
0x000000000040053f <+63>: c6 05 03 15 00 00 ff movb $0xff,0x1503(%rip) # 0x401a49 <buf+9> | |
0x0000000000400546 <+70>: c6 05 fd 14 00 00 ff movb $0xff,0x14fd(%rip) # 0x401a4a <buf+10> | |
0x000000000040054d <+77>: c6 05 f7 14 00 00 ff movb $0xff,0x14f7(%rip) # 0x401a4b <buf+11> | |
0x0000000000400554 <+84>: c6 05 f1 14 00 00 ff movb $0xff,0x14f1(%rip) # 0x401a4c <buf+12> | |
0x000000000040055b <+91>: c6 05 eb 14 00 00 ff movb $0xff,0x14eb(%rip) # 0x401a4d <buf+13> | |
0x0000000000400562 <+98>: c6 05 e5 14 00 00 ff movb $0xff,0x14e5(%rip) # 0x401a4e <buf+14> | |
0x0000000000400569 <+105>: c6 05 df 14 00 00 ff movb $0xff,0x14df(%rip) # 0x401a4f <buf+15> | |
0x0000000000400570 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x0000000000400500 <+0>: c7 05 e6 14 00 00 ff ff ff ff movl $0xffffffff,0x14e6(%rip) # 0x4019f0 <buf> | |
0x000000000040050a <+10>: c7 05 e0 14 00 00 ff ff ff ff movl $0xffffffff,0x14e0(%rip) # 0x4019f4 <buf+4> | |
0x0000000000400514 <+20>: c7 05 da 14 00 00 ff ff ff ff movl $0xffffffff,0x14da(%rip) # 0x4019f8 <buf+8> | |
0x000000000040051e <+30>: c7 05 d4 14 00 00 ff ff ff ff movl $0xffffffff,0x14d4(%rip) # 0x4019fc <buf+12> | |
0x0000000000400528 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x0000000000400500 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax | |
0x0000000000400507 <+7>: 48 89 05 d2 14 00 00 mov %rax,0x14d2(%rip) # 0x4019e0 <buf> | |
0x000000000040050e <+14>: 48 89 05 d3 14 00 00 mov %rax,0x14d3(%rip) # 0x4019e8 <buf+8> | |
0x0000000000400515 <+21>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.464s | |
user 0m20.434s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m9.630s | |
user 0m9.615s | |
sys 0m0.000s | |
=== Func3 Timing === | |
real 0m7.219s | |
user 0m7.203s | |
sys 0m0.003s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Reading specs from /usr/lib64/gcc/x86_64-slackware-linux/4.7.1/specs | |
COLLECT_GCC=gcc | |
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-slackware-linux/4.7.1/lto-wrapper | |
Target: x86_64-slackware-linux | |
Configured with: ../gcc-4.7.1/configure --prefix=/usr --libdir=/usr/lib64 --mandir=/usr/man --infodir=/usr/info --enable-shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,go,java,lto,objc --enable-threads=posix --enable-checking=release --enable-objc-gc --with-system-zlib --with-python-dir=/lib64/python2.7/site-packages --disable-libunwind-exceptions --enable-__cxa_atexit --enable-libssp --enable-lto --with-gnu-ld --verbose --enable-java-home --with-java-home=/usr/lib64/jvm/jre --with-jvm-root-dir=/usr/lib64/jvm --with-jvm-jar-dir=/usr/lib64/jvm/jvm-exports --with-arch-directory=amd64 --with-antlr-jar=/home/slackware/slackbuilds/gcc/antlr-runtime-3.4.jar --enable-multilib --target=x86_64-slackware-linux --build=x86_64-slackware-linux --host=x86_64-slackware-linux | |
Thread model: posix | |
gcc version 4.7.1 (GCC) | |
Dump of assembler code for function func1: | |
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0 | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0 | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004006a0 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax | |
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf> | |
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8> | |
0x00000000004006b5 <+21>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m9.941s | |
user 0m9.938s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m9.943s | |
user 0m9.937s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m9.938s | |
user 0m9.935s | |
sys 0m0.000s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang version 3.1 (git@github.com:llvm-mirror/clang.git 6f576c9bfa9a22e2801485768fe56b3336ea18a7) (git@github.com:llvm-mirror/llvm.git 02b87df98afb03136a1f5076c042696c98524947) | |
Target: x86_64-unknown-linux-gnu | |
Thread model: posix | |
Dump of assembler code for function func1: | |
0x00000000004005b0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a20 <buf> | |
0x00000000004005b7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a21 <buf+1> | |
0x00000000004005be <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a22 <buf+2> | |
0x00000000004005c5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a23 <buf+3> | |
0x00000000004005cc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a24 <buf+4> | |
0x00000000004005d3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a25 <buf+5> | |
0x00000000004005da <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a26 <buf+6> | |
0x00000000004005e1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a27 <buf+7> | |
0x00000000004005e8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a28 <buf+8> | |
0x00000000004005ef <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a29 <buf+9> | |
0x00000000004005f6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a2a <buf+10> | |
0x00000000004005fd <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a2b <buf+11> | |
0x0000000000400604 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a2c <buf+12> | |
0x000000000040060b <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a2d <buf+13> | |
0x0000000000400612 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a2e <buf+14> | |
0x0000000000400619 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a2f <buf+15> | |
0x0000000000400620 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004005b0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019d0 <buf> | |
0x00000000004005ba <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019d4 <buf+4> | |
0x00000000004005c4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019d8 <buf+8> | |
0x00000000004005ce <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019dc <buf+12> | |
0x00000000004005d8 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004005b0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019c0 <buf> | |
0x00000000004005bb <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019c8 <buf+8> | |
0x00000000004005c6 <+22>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.456s | |
user 0m20.426s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m8.420s | |
user 0m8.406s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m8.411s | |
user 0m8.398s | |
sys 0m0.000s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 40d734de4c19a34d2d1764b136470ee25993eb4e) | |
Target: x86_64-unknown-linux-gnu | |
Thread model: posix | |
Dump of assembler code for function func1: | |
0x00000000004005a0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a10 <buf> | |
0x00000000004005a7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a11 <buf+1> | |
0x00000000004005ae <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a12 <buf+2> | |
0x00000000004005b5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a13 <buf+3> | |
0x00000000004005bc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a14 <buf+4> | |
0x00000000004005c3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a15 <buf+5> | |
0x00000000004005ca <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a16 <buf+6> | |
0x00000000004005d1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a17 <buf+7> | |
0x00000000004005d8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a18 <buf+8> | |
0x00000000004005df <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a19 <buf+9> | |
0x00000000004005e6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a1a <buf+10> | |
0x00000000004005ed <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a1b <buf+11> | |
0x00000000004005f4 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a1c <buf+12> | |
0x00000000004005fb <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a1d <buf+13> | |
0x0000000000400602 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a1e <buf+14> | |
0x0000000000400609 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a1f <buf+15> | |
0x0000000000400610 <+112>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func2: | |
0x00000000004005a0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019c0 <buf> | |
0x00000000004005aa <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019c4 <buf+4> | |
0x00000000004005b4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019c8 <buf+8> | |
0x00000000004005be <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019cc <buf+12> | |
0x00000000004005c8 <+40>: c3 retq | |
End of assembler dump. | |
Dump of assembler code for function func3: | |
0x00000000004005a0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019b0 <buf> | |
0x00000000004005ab <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019b8 <buf+8> | |
0x00000000004005b6 <+22>: c3 retq | |
End of assembler dump. | |
=== Func1 Timing === | |
real 0m20.440s | |
user 0m20.410s | |
sys 0m0.000s | |
=== Func2 Timing === | |
real 0m8.418s | |
user 0m8.404s | |
sys 0m0.001s | |
=== Func3 Timing === | |
real 0m7.217s | |
user 0m7.196s | |
sys 0m0.002s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Bug: http://llvm.org/bugs/show_bug.cgi?id=13836 .
Fixed in r163809!
Updated log from mainline:
clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 629956400519d7b66c2009aed3a85c737018dc5b)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m7.263s
user 0m7.251s
sys 0m0.001s
=== Func2 Timing ===
real 0m7.219s
user 0m7.208s
sys 0m0.000s
=== Func3 Timing ===
real 0m7.218s
user 0m7.206s
sys 0m0.001s
Perfect!