Skip to content

Instantly share code, notes, and snippets.

@dtzWill dtzWill/array.c
Created Sep 13, 2012

Embed
What would you like to do?
Merge adjacent stores of constants?
#include <stdio.h>
#include <limits.h>
#include <stdint.h>
static uint64_t buf[2];
#define NOINLINE __attribute__ ((noinline))
static void NOINLINE func1() {
uint8_t* p = (uint8_t*)buf;
p[0] = 0xFF;
p[1] = 0xFF;
p[2] = 0xFF;
p[3] = 0xFF;
p[4] = 0xFF;
p[5] = 0xFF;
p[6] = 0xFF;
p[7] = 0xFF;
p[8] = 0xFF;
p[9] = 0xFF;
p[10] = 0xFF;
p[11] = 0xFF;
p[12] = 0xFF;
p[13] = 0xFF;
p[14] = 0xFF;
p[15] = 0xFF;
}
static void NOINLINE func2() {
uint32_t* p = (uint32_t*)buf;
p[0] = 0xFFFFFFFF;
p[1] = 0xFFFFFFFF;
p[2] = 0xFFFFFFFF;
p[3] = 0xFFFFFFFF;
}
static void NOINLINE func3() {
uint64_t* p = (uint64_t*)buf;
p[0] = 0xFFFFFFFFFFFFFFFF;
p[1] = 0xFFFFFFFFFFFFFFFF;
}
int main(int argc, const char *argv[]) {
unsigned i;
// Run one of the functions lots of times...
for(i = 0; i < UINT_MAX; ++i) {
FUNC();
}
// Misc code to just use all of buf
unsigned v = 0;
for (i = 0 ; i < 16; ++i)
v |= buf[i];
printf("%u\n", v);
return 0;
}
#!/bin/sh
CC=${CC-gcc}
CFLAGS="-mtune=generic -mno-sse -O3"
$CC -v 2>&1
$CC $CFLAGS array.c -o test1 -DFUNC=func1
$CC $CFLAGS array.c -o test2 -DFUNC=func2
$CC $CFLAGS array.c -o test3 -DFUNC=func3
gdb --batch -ex "disassemble /r func1" test1
gdb --batch -ex "disassemble /r func2" test2
gdb --batch -ex "disassemble /r func3" test3
echo ""
echo "=== Func1 Timing ==="
(time ./test1 > /dev/null) 2>&1
echo "=== Func2 Timing ==="
(time ./test2 > /dev/null) 2>&1
echo "=== Func3 Timing ==="
(time ./test3 > /dev/null) 2>&1
rm test1 test2 test3
Using built-in specs.
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC)
Dump of assembler code for function func1:
0x0000000000400500 <+0>: c6 05 39 15 00 00 ff movb $0xff,0x1539(%rip) # 0x401a40 <buf>
0x0000000000400507 <+7>: c6 05 33 15 00 00 ff movb $0xff,0x1533(%rip) # 0x401a41 <buf+1>
0x000000000040050e <+14>: c6 05 2d 15 00 00 ff movb $0xff,0x152d(%rip) # 0x401a42 <buf+2>
0x0000000000400515 <+21>: c6 05 27 15 00 00 ff movb $0xff,0x1527(%rip) # 0x401a43 <buf+3>
0x000000000040051c <+28>: c6 05 21 15 00 00 ff movb $0xff,0x1521(%rip) # 0x401a44 <buf+4>
0x0000000000400523 <+35>: c6 05 1b 15 00 00 ff movb $0xff,0x151b(%rip) # 0x401a45 <buf+5>
0x000000000040052a <+42>: c6 05 15 15 00 00 ff movb $0xff,0x1515(%rip) # 0x401a46 <buf+6>
0x0000000000400531 <+49>: c6 05 0f 15 00 00 ff movb $0xff,0x150f(%rip) # 0x401a47 <buf+7>
0x0000000000400538 <+56>: c6 05 09 15 00 00 ff movb $0xff,0x1509(%rip) # 0x401a48 <buf+8>
0x000000000040053f <+63>: c6 05 03 15 00 00 ff movb $0xff,0x1503(%rip) # 0x401a49 <buf+9>
0x0000000000400546 <+70>: c6 05 fd 14 00 00 ff movb $0xff,0x14fd(%rip) # 0x401a4a <buf+10>
0x000000000040054d <+77>: c6 05 f7 14 00 00 ff movb $0xff,0x14f7(%rip) # 0x401a4b <buf+11>
0x0000000000400554 <+84>: c6 05 f1 14 00 00 ff movb $0xff,0x14f1(%rip) # 0x401a4c <buf+12>
0x000000000040055b <+91>: c6 05 eb 14 00 00 ff movb $0xff,0x14eb(%rip) # 0x401a4d <buf+13>
0x0000000000400562 <+98>: c6 05 e5 14 00 00 ff movb $0xff,0x14e5(%rip) # 0x401a4e <buf+14>
0x0000000000400569 <+105>: c6 05 df 14 00 00 ff movb $0xff,0x14df(%rip) # 0x401a4f <buf+15>
0x0000000000400570 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x0000000000400500 <+0>: c7 05 e6 14 00 00 ff ff ff ff movl $0xffffffff,0x14e6(%rip) # 0x4019f0 <buf>
0x000000000040050a <+10>: c7 05 e0 14 00 00 ff ff ff ff movl $0xffffffff,0x14e0(%rip) # 0x4019f4 <buf+4>
0x0000000000400514 <+20>: c7 05 da 14 00 00 ff ff ff ff movl $0xffffffff,0x14da(%rip) # 0x4019f8 <buf+8>
0x000000000040051e <+30>: c7 05 d4 14 00 00 ff ff ff ff movl $0xffffffff,0x14d4(%rip) # 0x4019fc <buf+12>
0x0000000000400528 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x0000000000400500 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
0x0000000000400507 <+7>: 48 89 05 d2 14 00 00 mov %rax,0x14d2(%rip) # 0x4019e0 <buf>
0x000000000040050e <+14>: 48 89 05 d3 14 00 00 mov %rax,0x14d3(%rip) # 0x4019e8 <buf+8>
0x0000000000400515 <+21>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.464s
user 0m20.434s
sys 0m0.000s
=== Func2 Timing ===
real 0m9.630s
user 0m9.615s
sys 0m0.000s
=== Func3 Timing ===
real 0m7.219s
user 0m7.203s
sys 0m0.003s
Reading specs from /usr/lib64/gcc/x86_64-slackware-linux/4.7.1/specs
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-slackware-linux/4.7.1/lto-wrapper
Target: x86_64-slackware-linux
Configured with: ../gcc-4.7.1/configure --prefix=/usr --libdir=/usr/lib64 --mandir=/usr/man --infodir=/usr/info --enable-shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,go,java,lto,objc --enable-threads=posix --enable-checking=release --enable-objc-gc --with-system-zlib --with-python-dir=/lib64/python2.7/site-packages --disable-libunwind-exceptions --enable-__cxa_atexit --enable-libssp --enable-lto --with-gnu-ld --verbose --enable-java-home --with-java-home=/usr/lib64/jvm/jre --with-jvm-root-dir=/usr/lib64/jvm --with-jvm-jar-dir=/usr/lib64/jvm/jvm-exports --with-arch-directory=amd64 --with-antlr-jar=/home/slackware/slackbuilds/gcc/antlr-runtime-3.4.jar --enable-multilib --target=x86_64-slackware-linux --build=x86_64-slackware-linux --host=x86_64-slackware-linux
Thread model: posix
gcc version 4.7.1 (GCC)
Dump of assembler code for function func1:
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004006a0 <+0>: 48 8b 05 09 01 00 00 mov 0x109(%rip),%rax # 0x4007b0
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004006a0 <+0>: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
0x00000000004006a7 <+7>: 48 89 05 22 04 20 00 mov %rax,0x200422(%rip) # 0x600ad0 <buf>
0x00000000004006ae <+14>: 48 89 05 23 04 20 00 mov %rax,0x200423(%rip) # 0x600ad8 <buf+8>
0x00000000004006b5 <+21>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m9.941s
user 0m9.938s
sys 0m0.000s
=== Func2 Timing ===
real 0m9.943s
user 0m9.937s
sys 0m0.001s
=== Func3 Timing ===
real 0m9.938s
user 0m9.935s
sys 0m0.000s
clang version 3.1 (git@github.com:llvm-mirror/clang.git 6f576c9bfa9a22e2801485768fe56b3336ea18a7) (git@github.com:llvm-mirror/llvm.git 02b87df98afb03136a1f5076c042696c98524947)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005b0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a20 <buf>
0x00000000004005b7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a21 <buf+1>
0x00000000004005be <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a22 <buf+2>
0x00000000004005c5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a23 <buf+3>
0x00000000004005cc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a24 <buf+4>
0x00000000004005d3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a25 <buf+5>
0x00000000004005da <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a26 <buf+6>
0x00000000004005e1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a27 <buf+7>
0x00000000004005e8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a28 <buf+8>
0x00000000004005ef <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a29 <buf+9>
0x00000000004005f6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a2a <buf+10>
0x00000000004005fd <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a2b <buf+11>
0x0000000000400604 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a2c <buf+12>
0x000000000040060b <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a2d <buf+13>
0x0000000000400612 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a2e <buf+14>
0x0000000000400619 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a2f <buf+15>
0x0000000000400620 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005b0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019d0 <buf>
0x00000000004005ba <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019d4 <buf+4>
0x00000000004005c4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019d8 <buf+8>
0x00000000004005ce <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019dc <buf+12>
0x00000000004005d8 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005b0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019c0 <buf>
0x00000000004005bb <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019c8 <buf+8>
0x00000000004005c6 <+22>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.456s
user 0m20.426s
sys 0m0.000s
=== Func2 Timing ===
real 0m8.420s
user 0m8.406s
sys 0m0.001s
=== Func3 Timing ===
real 0m8.411s
user 0m8.398s
sys 0m0.000s
clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 40d734de4c19a34d2d1764b136470ee25993eb4e)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005a0 <+0>: c6 05 69 14 00 00 ff movb $0xff,0x1469(%rip) # 0x401a10 <buf>
0x00000000004005a7 <+7>: c6 05 63 14 00 00 ff movb $0xff,0x1463(%rip) # 0x401a11 <buf+1>
0x00000000004005ae <+14>: c6 05 5d 14 00 00 ff movb $0xff,0x145d(%rip) # 0x401a12 <buf+2>
0x00000000004005b5 <+21>: c6 05 57 14 00 00 ff movb $0xff,0x1457(%rip) # 0x401a13 <buf+3>
0x00000000004005bc <+28>: c6 05 51 14 00 00 ff movb $0xff,0x1451(%rip) # 0x401a14 <buf+4>
0x00000000004005c3 <+35>: c6 05 4b 14 00 00 ff movb $0xff,0x144b(%rip) # 0x401a15 <buf+5>
0x00000000004005ca <+42>: c6 05 45 14 00 00 ff movb $0xff,0x1445(%rip) # 0x401a16 <buf+6>
0x00000000004005d1 <+49>: c6 05 3f 14 00 00 ff movb $0xff,0x143f(%rip) # 0x401a17 <buf+7>
0x00000000004005d8 <+56>: c6 05 39 14 00 00 ff movb $0xff,0x1439(%rip) # 0x401a18 <buf+8>
0x00000000004005df <+63>: c6 05 33 14 00 00 ff movb $0xff,0x1433(%rip) # 0x401a19 <buf+9>
0x00000000004005e6 <+70>: c6 05 2d 14 00 00 ff movb $0xff,0x142d(%rip) # 0x401a1a <buf+10>
0x00000000004005ed <+77>: c6 05 27 14 00 00 ff movb $0xff,0x1427(%rip) # 0x401a1b <buf+11>
0x00000000004005f4 <+84>: c6 05 21 14 00 00 ff movb $0xff,0x1421(%rip) # 0x401a1c <buf+12>
0x00000000004005fb <+91>: c6 05 1b 14 00 00 ff movb $0xff,0x141b(%rip) # 0x401a1d <buf+13>
0x0000000000400602 <+98>: c6 05 15 14 00 00 ff movb $0xff,0x1415(%rip) # 0x401a1e <buf+14>
0x0000000000400609 <+105>: c6 05 0f 14 00 00 ff movb $0xff,0x140f(%rip) # 0x401a1f <buf+15>
0x0000000000400610 <+112>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005a0 <+0>: c7 05 16 14 00 00 ff ff ff ff movl $0xffffffff,0x1416(%rip) # 0x4019c0 <buf>
0x00000000004005aa <+10>: c7 05 10 14 00 00 ff ff ff ff movl $0xffffffff,0x1410(%rip) # 0x4019c4 <buf+4>
0x00000000004005b4 <+20>: c7 05 0a 14 00 00 ff ff ff ff movl $0xffffffff,0x140a(%rip) # 0x4019c8 <buf+8>
0x00000000004005be <+30>: c7 05 04 14 00 00 ff ff ff ff movl $0xffffffff,0x1404(%rip) # 0x4019cc <buf+12>
0x00000000004005c8 <+40>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005a0 <+0>: 48 c7 05 05 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1405(%rip) # 0x4019b0 <buf>
0x00000000004005ab <+11>: 48 c7 05 02 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x1402(%rip) # 0x4019b8 <buf+8>
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
=== Func1 Timing ===
real 0m20.440s
user 0m20.410s
sys 0m0.000s
=== Func2 Timing ===
real 0m8.418s
user 0m8.404s
sys 0m0.001s
=== Func3 Timing ===
real 0m7.217s
user 0m7.196s
sys 0m0.002s
@dtzWill

This comment has been minimized.

Copy link
Owner Author

commented Sep 13, 2012

Bug: http://llvm.org/bugs/show_bug.cgi?id=13836 .

Fixed in r163809!

Updated log from mainline:

clang version 3.2 (http://llvm.org/git/clang.git git@github.com:dtzWill/ioc-clang 0278c78685d247a92d60d163e6cbd9fa4a4441e4) (http://llvm.org/git/llvm git@github.com:llvm-mirror/llvm 629956400519d7b66c2009aed3a85c737018dc5b)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Dump of assembler code for function func1:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func2:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.
Dump of assembler code for function func3:
0x00000000004005a0 <+0>: 48 c7 05 0d 14 00 00 ff ff ff ff movq $0xffffffffffffffff,0x140d(%rip) # 0x4019b8 <buf+8>
0x00000000004005ab <+11>: 48 c7 05 fa 13 00 00 ff ff ff ff movq $0xffffffffffffffff,0x13fa(%rip) # 0x4019b0
0x00000000004005b6 <+22>: c3 retq
End of assembler dump.

=== Func1 Timing ===

real 0m7.263s
user 0m7.251s
sys 0m0.001s
=== Func2 Timing ===

real 0m7.219s
user 0m7.208s
sys 0m0.000s
=== Func3 Timing ===

real 0m7.218s
user 0m7.206s
sys 0m0.001s

Perfect!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.