Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dtzWill
Created September 13, 2012 15:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dtzWill/3715222 to your computer and use it in GitHub Desktop.
Save dtzWill/3715222 to your computer and use it in GitHub Desktop.
Merge adjacent stores of constants?
#include <stdio.h>
#include <limits.h>
#include <stdint.h>
static uint64_t buf[2];
#define NOINLINE __attribute__ ((noinline))
static void NOINLINE func1() {
uint8_t* p = (uint8_t*)buf;
p[0] = 0xFF;
p[1] = 0xFF;
p[2] = 0xFF;
p[3] = 0xFF;
p[4] = 0xFF;
p[5] = 0xFF;
p[6] = 0xFF;
p[7] = 0xFF;
p[8] = 0xFF;
p[9] = 0xFF;
p[10] = 0xFF;
p[11] = 0xFF;
p[12] = 0xFF;
p[13] = 0xFF;
p[14] = 0xFF;
p[15] = 0xFF;
}
static void NOINLINE func2() {
uint32_t* p = (uint32_t*)buf;
p[0] = 0xFFFFFFFF;
p[1] = 0xFFFFFFFF;
p[2] = 0xFFFFFFFF;
p[3] = 0xFFFFFFFF;
}
static void NOINLINE func3() {
uint64_t* p = (uint64_t*)buf;
p[0] = 0xFFFFFFFFFFFFFFFF;
p[1] = 0xFFFFFFFFFFFFFFFF;
}
int main(int argc, const char *argv[]) {
unsigned i;
// Run one of the functions lots of times...
for(i = 0; i < UINT_MAX; ++i) {
FUNC();
}
// Misc code to just use all of buf
unsigned v = 0;
for (i = 0 ; i < 16; ++i)
v |= buf[i];
printf("%u\n", v);
return 0;
}
#!/bin/sh
CC=${CC-gcc}
CFLAGS="-mtune=generic -mno-sse -O3"
$CC -v 2>&1
$CC $CFLAGS array.c -o test1 -DFUNC=func1
$CC $CFLAGS array.c -o test2 -DFUNC=func2
$CC $CFLAGS array.c -o test3 -DFUNC=func3
gdb --batch -ex "disassemble /r func1" test1
gdb --batch -ex "disassemble /r func2" test2
gdb --batch -ex "disassemble /r func3" test3
echo ""
echo "=== Func1 Timing ==="
(time ./test1 > /dev/null) 2>&1
echo "=== Func2 Timing ==="
(time ./test2 > /dev/null) 2>&1
echo "=== Func3 Timing ==="
(time ./test3 > /dev/null) 2>&1
rm test1 test2 test3
Using built-in specs.
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC)
Dump of assembler code for function func1:
0x0000000000400500 <+0>: c6 05 39 15 00 00 ff movb $0xff,0x1539(%rip) # 0x401a40 <buf>
0x0000000000400507 <+7>: c6 05 33 15 00 00 ff movb $0xff,0x1533(%rip) # 0x401a41 <buf+1>
0x000000000040050e <+14>: c6 05 2d 15 00 00 ff movb $0xff,0x152d(%rip) # 0x401a42 <buf+2>
0x0000000000400515 <+21>: c6 05 27 15 00 00 ff movb $0xff,0x1527(%rip) # 0x401a43 <buf+3>
0x000000000040051c <+28>: c6 05 21 15 00 00 ff movb $0xff,0x1521(%rip) # 0x401a44 <buf+4>
0x0000000000400523 <+35>: c6 05 1b 15 00 00 ff movb $0xff,0x151b(%rip) # 0x401a45 <buf+5>
0x000000000040052a <+42>: c6 05 15 15 00 00 ff movb $0xff,0x1515(%rip) # 0x401a46 <buf+6>
0x0000000000400531 <+49>: c6 05 0f 15 00 00 ff movb $0xff,0x150f(%rip) # 0x401a47 <buf+7>
0x0000000000400538 <+56>: c6 05 09 15 00 00 ff movb $0xff,0x1509(%rip) # 0x401a48 <buf+8>
0x000000000040053f <+63>: c6 05 03 15 00 00 ff movb $0xff,0x1503(%rip) # 0x401a49 <buf+9>
0x0000000000400546 <+70>: c6 05 fd 14 00 00 ff movb $0xff,0x14fd(%rip) # 0x401a4a <buf+10>
0x000000000040054d <+77>: c6 05 f7 14 00 00 ff movb $0xff,0x14f7(%rip) # 0x401a4b <buf+11>
0x0000000000400554 <+84>: c6 05 f1 14 00 00 ff movb $0xff,0x14f1(%rip) # 0x401a4c <buf+12>
0x000000000040055b <+91>: c6 05 eb 14 00 00 ff movb $0xff,0x14eb(%rip) # 0x401a4d <buf+13>
0x0000000000400562 <+98>: c6 05 e5 14 00 00 ff movb $0xff,0x14e5(%rip) # 0x401a4e <buf+14>
0x0000000000400569 <+105>: c6 05 df 14 00 00 ff movb $0xff,0x14df(%rip) # 0x401a4f <buf+15>
0x0000000000400570 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x0000000000400500 <+0>: c7 05 e6 14 00 00 ff ff ff ff movl $0xffffffff,0x14e6(%rip) # 0x4019f0 <buf>
0x000000000040050a <+10>: c7 05 e0 14 00 00 ff ff ff ff movl $0xffffffff,0x14e0(%rip) # 0x4019f4 <buf+4>
0x0000000000400514 <+20>: c7 05 da 14 00 00 ff ff ff ff movl $0xffffffff,0x14da(%rip) # 0x4019f8 <buf+8>
0x000000000040051e <+30>: c7 05 d4 14 00 00 ff ff ff ff movl $0xffffffff,0x14d4(%rip) # 0x4019fc <buf+12>
0x0000000000400528 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x0000000000400500 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
0x0000000000400507 <+7>: 48 89 05 d2 14 00 00 mov %rax,0x14d2(%rip) # 0x4019e0 <buf>
0x000000000040050e <+14>: 48 89 05 d3 14 00 00 mov %rax,0x14d3(%rip) # 0x4019e8 <buf+8>
0x0000000000400515 <+21>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.464s
user 0m20.434s
sys 0m0.000s
=== Func2 Timing ===
real 0m9.630s
user 0m9.615s
sys 0m0.000s
=== Func3 Timing ===
real 0m7.219s
user 0m7.203s
sys 0m0.003s
Reading specs from /usr/lib64/gcc/x86_64-slackware-linux/4.7.1/specs
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-slackware-linux/4.7.1/lto-wrapper
Target: x86_64-slackware-linux
Configured with: ../gcc-4.7.1/configure --prefix=/usr --libdir=/usr/lib64 --mandir=/usr/man --infodir=/usr/info --enable-shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,go,java,lto,objc --enable-threads=posix --enable-checking=release --enable-objc-gc --with-system-zlib --with-python-dir=/lib64/python2.7/site-packages --disable-libunwind-exceptions --enable-__cxa_atexit --enable-libssp --enable-lto --with-gnu-ld --verbose --enable-java-home --with-java-home=/usr/lib64/jvm/jre --with-jvm-root-dir=/usr/lib64/jvm --with-jvm-jar-dir=/usr/lib64/jvm/jvm-exports --with-arch-directory=amd64 --with-antlr-jar=/home/slackware/slackbuilds/gcc/antlr-runtime-3.4.jar --enable-multilib --target=x86_64-slackware-linux --build=x86_64-slackware-linux --host=x86_64-slackware-linux
Thread model: posix
gcc version 4.7.1 (GCC)
Dump of assembler code for function func1:
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004006a0 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m9.941s
user 0m9.938s
sys 0m0.000s
=== Func2 Timing ===
real 0m9.943s
user 0m9.937s
sys 0m0.001s
=== Func3 Timing ===
real 0m9.938s
user 0m9.935s
sys 0m0.000s
clang version 3.1 (git@github.com:llvm-mirror/clang.git 6f576c9bfa9a22e2801485768fe56b3336ea18a7) (git@github.com:llvm-mirror/llvm.git 02b87df98afb03136a1f5076c042696c98524947)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005b0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a20 <buf>
0x00000000004005b7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a21 <buf+1>
0x00000000004005be <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a22 <buf+2>
0x00000000004005c5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a23 <buf+3>
0x00000000004005cc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a24 <buf+4>
0x00000000004005d3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a25 <buf+5>
0x00000000004005da <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a26 <buf+6>
0x00000000004005e1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a27 <buf+7>
0x00000000004005e8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a28 <buf+8>
0x00000000004005ef <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a29 <buf+9>
0x00000000004005f6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a2a <buf+10>
0x00000000004005fd <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a2b <buf+11>
0x0000000000400604 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a2c <buf+12>
0x000000000040060b <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a2d <buf+13>
0x0000000000400612 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a2e <buf+14>
0x0000000000400619 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a2f <buf+15>
0x0000000000400620 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005b0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019d0 <buf>
0x00000000004005ba <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019d4 <buf+4>
0x00000000004005c4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019d8 <buf+8>
0x00000000004005ce <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019dc <buf+12>
0x00000000004005d8 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005b0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019c0 <buf>
0x00000000004005bb <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019c8 <buf+8>
0x00000000004005c6 <+22>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.456s
user 0m20.426s
sys 0m0.000s
=== Func2 Timing ===
real 0m8.420s
user 0m8.406s
sys 0m0.001s
=== Func3 Timing ===
real 0m8.411s
user 0m8.398s
sys 0m0.000s
clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 40d734de4c19a34d2d1764b136470ee25993eb4e)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005a0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a10 <buf>
0x00000000004005a7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a11 <buf+1>
0x00000000004005ae <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a12 <buf+2>
0x00000000004005b5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a13 <buf+3>
0x00000000004005bc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a14 <buf+4>
0x00000000004005c3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a15 <buf+5>
0x00000000004005ca <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a16 <buf+6>
0x00000000004005d1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a17 <buf+7>
0x00000000004005d8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a18 <buf+8>
0x00000000004005df <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a19 <buf+9>
0x00000000004005e6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a1a <buf+10>
0x00000000004005ed <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a1b <buf+11>
0x00000000004005f4 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a1c <buf+12>
0x00000000004005fb <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a1d <buf+13>
0x0000000000400602 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a1e <buf+14>
0x0000000000400609 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a1f <buf+15>
0x0000000000400610 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005a0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019c0 <buf>
0x00000000004005aa <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019c4 <buf+4>
0x00000000004005b4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019c8 <buf+8>
0x00000000004005be <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019cc <buf+12>
0x00000000004005c8 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005a0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019b0 <buf>
0x00000000004005ab <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019b8 <buf+8>
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.440s
user 0m20.410s
sys 0m0.000s
=== Func2 Timing ===
real 0m8.418s
user 0m8.404s
sys 0m0.001s
=== Func3 Timing ===
real 0m7.217s
user 0m7.196s
sys 0m0.002s
@dtzWill
Copy link
Author

dtzWill commented Sep 13, 2012

Bug: http://llvm.org/bugs/show_bug.cgi?id=13836 .

Fixed in r163809!

Updated log from mainline:

clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 629956400519d7b66c2009aed3a85c737018dc5b)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.

=== Func1 Timing ===

real 0m7.263s
user 0m7.251s
sys 0m0.001s
=== Func2 Timing ===

real 0m7.219s
user 0m7.208s
sys 0m0.000s
=== Func3 Timing ===

real 0m7.218s
user 0m7.206s
sys 0m0.001s

Perfect!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment