Last active
May 5, 2016 02:06
-
-
Save assp1r1n3/3e967028299039de8a053025b5dbba5e to your computer and use it in GitHub Desktop.
Hand assembly vs. Intrinsics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stddef.h> | |
#include <time.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
uint64_t builtin_popcnt(const uint64_t* buf, size_t len){ | |
uint64_t cnt = 0; | |
for(size_t i = 0; i < len; ++i){ | |
cnt += __builtin_popcountll(buf[i]); | |
} | |
return cnt; | |
} | |
int main(int argc, char** argv){ | |
if(argc != 2){ | |
printf("Usage: %s <buffer size in MB>\n", argv[0]); | |
return -1; | |
} | |
uint64_t size = atol(argv[1]) << 20; | |
uint64_t* buffer = (uint64_t*)malloc((size/8)*sizeof(*buffer)); | |
//spoil copy-on-write memory allocation on *nix | |
for (size_t i = 0; i < (size / 8); i++) { | |
buffer[i] = random(); | |
} | |
uint64_t count = 0; | |
clock_t tic = clock(); | |
for(size_t i = 0; i < 10000; ++i){ | |
count += builtin_popcnt(buffer, size/8); | |
} | |
clock_t toc = clock(); | |
printf("Count: %lu\tElapsed: %f seconds\tSpeed: %f GB/s\n", count, (double)(toc - tic) / CLOCKS_PER_SEC, ((10000.0*size)/(((double)(toc - tic)*1e+9) / CLOCKS_PER_SEC))); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.file "bench.c" | |
.text | |
.p2align 4,,15 | |
.globl builtin_popcnt | |
.type builtin_popcnt, @function | |
builtin_popcnt: | |
.LFB39: | |
.cfi_startproc | |
testq %rsi, %rsi | |
je .L4 | |
leaq -8(,%rsi,8), %rcx | |
popcntq (%rdi), %rax | |
leaq (%rdi,%rsi,8), %r8 | |
shrq $3, %rcx | |
leaq 8(%rdi), %rsi | |
andl $7, %ecx | |
cmpq %r8, %rsi | |
je .L42 | |
testq %rcx, %rcx | |
je .L3 | |
cmpq $1, %rcx | |
je .L26 | |
cmpq $2, %rcx | |
je .L27 | |
cmpq $3, %rcx | |
je .L28 | |
cmpq $4, %rcx | |
je .L29 | |
cmpq $5, %rcx | |
je .L30 | |
cmpq $6, %rcx | |
je .L31 | |
popcntq (%rsi), %rdx | |
leaq 16(%rdi), %rsi | |
addq %rdx, %rax | |
.L31: | |
popcntq (%rsi), %rdi | |
addq $8, %rsi | |
addq %rdi, %rax | |
.L30: | |
popcntq (%rsi), %r9 | |
addq $8, %rsi | |
addq %r9, %rax | |
.L29: | |
popcntq (%rsi), %r10 | |
addq $8, %rsi | |
addq %r10, %rax | |
.L28: | |
popcntq (%rsi), %r11 | |
addq $8, %rsi | |
addq %r11, %rax | |
.L27: | |
popcntq (%rsi), %rcx | |
addq $8, %rsi | |
addq %rcx, %rax | |
.L26: | |
popcntq (%rsi), %rdx | |
addq $8, %rsi | |
addq %rdx, %rax | |
cmpq %r8, %rsi | |
je .L43 | |
.L3: | |
popcntq (%rsi), %rdi | |
addq %rdi, %rax | |
addq $64, %rsi | |
popcntq -56(%rsi), %r9 | |
addq %r9, %rax | |
popcntq -48(%rsi), %r10 | |
addq %r10, %rax | |
popcntq -40(%rsi), %r11 | |
popcntq -32(%rsi), %rcx | |
addq %r11, %rax | |
popcntq -24(%rsi), %rdx | |
popcntq -16(%rsi), %rdi | |
addq %rcx, %rax | |
popcntq -8(%rsi), %r9 | |
addq %rdx, %rax | |
addq %rdi, %rax | |
addq %r9, %rax | |
cmpq %r8, %rsi | |
jne .L3 | |
rep ret | |
.p2align 4,,10 | |
.p2align 3 | |
.L43: | |
rep ret | |
.p2align 4,,10 | |
.p2align 3 | |
.L4: | |
xorl %eax, %eax | |
.p2align 4,,9 | |
ret | |
.p2align 4,,10 | |
.p2align 3 | |
.L42: | |
.p2align 4,,10 | |
rep ret | |
.cfi_endproc | |
.LFE39: | |
.size builtin_popcnt, .-builtin_popcnt | |
.section .rodata.str1.8,"aMS",@progbits,1 | |
.align 8 | |
.LC0: | |
.string "Usage: %s <buffer size in MB>\n" | |
.align 8 | |
.LC4: | |
.string "Count: %lu\tElapsed: %f seconds\tSpeed: %f GB/s\n" | |
.section .text.startup,"ax",@progbits | |
.p2align 4,,15 | |
.globl main | |
.type main, @function | |
main: | |
.LFB40: | |
.cfi_startproc | |
pushq %r15 | |
.cfi_def_cfa_offset 16 | |
.cfi_offset 15, -16 | |
pushq %r14 | |
.cfi_def_cfa_offset 24 | |
.cfi_offset 14, -24 | |
pushq %r13 | |
.cfi_def_cfa_offset 32 | |
.cfi_offset 13, -32 | |
pushq %r12 | |
.cfi_def_cfa_offset 40 | |
.cfi_offset 12, -40 | |
pushq %rbp | |
.cfi_def_cfa_offset 48 | |
.cfi_offset 6, -48 | |
pushq %rbx | |
.cfi_def_cfa_offset 56 | |
.cfi_offset 3, -56 | |
subq $8, %rsp | |
.cfi_def_cfa_offset 64 | |
cmpl $2, %edi | |
jne .L133 | |
movq 8(%rsi), %rdi | |
movl $10, %edx | |
xorl %esi, %esi | |
call strtol | |
movq %rax, %rbp | |
salq $20, %rbp | |
movq %rbp, %r13 | |
shrq $3, %r13 | |
leaq 0(,%r13,8), %r14 | |
movq %r14, %rdi | |
call malloc | |
testq %r13, %r13 | |
movq %rax, %rbx | |
je .L49 | |
movl $1, %r12d | |
movl $7, %r15d | |
call random | |
cmpq %r13, %r12 | |
movq %rax, (%rbx) | |
je .L49 | |
testq %r15, %r15 | |
je .L109 | |
cmpq $1, %r15 | |
je .L110 | |
cmpq $2, %r15 | |
je .L111 | |
cmpq $3, %r15 | |
je .L112 | |
cmpq $4, %r15 | |
je .L113 | |
cmpq $5, %r15 | |
je .L114 | |
cmpq $6, %r15 | |
je .L115 | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L115: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L114: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L113: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L112: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L111: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
.L110: | |
call random | |
movq %rax, (%rbx,%r12,8) | |
addq $1, %r12 | |
cmpq %r13, %r12 | |
je .L49 | |
.L109: | |
call random | |
leaq 1(%r12), %r15 | |
movq %rax, (%rbx,%r12,8) | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 2(%r12), %r15 | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 3(%r12), %r15 | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 4(%r12), %r15 | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 5(%r12), %r15 | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 6(%r12), %r15 | |
call random | |
movq %rax, (%rbx,%r15,8) | |
leaq 7(%r12), %r15 | |
addq $8, %r12 | |
call random | |
cmpq %r13, %r12 | |
movq %rax, (%rbx,%r15,8) | |
jne .L109 | |
.L49: | |
call clock | |
movl $10000, %r8d | |
movq %rax, %r12 | |
leaq (%rbx,%r14), %rax | |
xorl %r14d, %r14d | |
.p2align 4,,10 | |
.p2align 3 | |
.L48: | |
testq %r13, %r13 | |
je .L57 | |
leaq 8(%rbx), %rdx | |
movq %rax, %rdi | |
popcntq (%rbx), %rsi | |
subq %rbx, %rdi | |
subq $8, %rdi | |
shrq $3, %rdi | |
andl $7, %edi | |
cmpq %rax, %rdx | |
je .L53 | |
testq %rdi, %rdi | |
je .L54 | |
cmpq $1, %rdi | |
je .L103 | |
cmpq $2, %rdi | |
je .L104 | |
cmpq $3, %rdi | |
je .L105 | |
cmpq $4, %rdi | |
je .L106 | |
cmpq $5, %rdi | |
je .L107 | |
cmpq $6, %rdi | |
je .L108 | |
popcntq 8(%rbx), %rcx | |
leaq 16(%rbx), %rdx | |
addq %rcx, %rsi | |
.L108: | |
popcntq (%rdx), %r9 | |
addq $8, %rdx | |
addq %r9, %rsi | |
.L107: | |
popcntq (%rdx), %r10 | |
addq $8, %rdx | |
addq %r10, %rsi | |
.L106: | |
popcntq (%rdx), %r11 | |
addq $8, %rdx | |
addq %r11, %rsi | |
.L105: | |
popcntq (%rdx), %r15 | |
addq $8, %rdx | |
addq %r15, %rsi | |
.L104: | |
popcntq (%rdx), %rdi | |
addq $8, %rdx | |
addq %rdi, %rsi | |
.L103: | |
popcntq (%rdx), %rcx | |
addq $8, %rdx | |
addq %rcx, %rsi | |
cmpq %rax, %rdx | |
je .L53 | |
.L54: | |
popcntq (%rdx), %r9 | |
addq %r9, %rsi | |
addq $64, %rdx | |
popcntq -56(%rdx), %r10 | |
addq %r10, %rsi | |
popcntq -48(%rdx), %r11 | |
addq %r11, %rsi | |
popcntq -40(%rdx), %r15 | |
popcntq -32(%rdx), %rdi | |
addq %r15, %rsi | |
popcntq -24(%rdx), %rcx | |
popcntq -16(%rdx), %r9 | |
addq %rdi, %rsi | |
popcntq -8(%rdx), %r10 | |
addq %rcx, %rsi | |
addq %r9, %rsi | |
addq %r10, %rsi | |
cmpq %rax, %rdx | |
jne .L54 | |
.L53: | |
addq %rsi, %r14 | |
subq $1, %r8 | |
jne .L48 | |
.L52: | |
call clock | |
subq %r12, %rax | |
testq %rbp, %rbp | |
vcvtsi2sdq %rax, %xmm2, %xmm2 | |
js .L55 | |
vcvtsi2sdq %rbp, %xmm0, %xmm0 | |
.L56: | |
vmulsd .LC2(%rip), %xmm2, %xmm3 | |
movq %r14, %rdx | |
movl $.LC4, %esi | |
vmovsd .LC3(%rip), %xmm5 | |
movl $1, %edi | |
movl $2, %eax | |
vmulsd .LC1(%rip), %xmm0, %xmm4 | |
vdivsd %xmm5, %xmm2, %xmm0 | |
vdivsd %xmm5, %xmm3, %xmm6 | |
vdivsd %xmm6, %xmm4, %xmm1 | |
call __printf_chk | |
xorl %eax, %eax | |
.L131: | |
addq $8, %rsp | |
.cfi_remember_state | |
.cfi_def_cfa_offset 56 | |
popq %rbx | |
.cfi_def_cfa_offset 48 | |
popq %rbp | |
.cfi_def_cfa_offset 40 | |
popq %r12 | |
.cfi_def_cfa_offset 32 | |
popq %r13 | |
.cfi_def_cfa_offset 24 | |
popq %r14 | |
.cfi_def_cfa_offset 16 | |
popq %r15 | |
.cfi_def_cfa_offset 8 | |
ret | |
.p2align 4,,10 | |
.p2align 3 | |
.L57: | |
.cfi_restore_state | |
xorl %esi, %esi | |
addq %rsi, %r14 | |
subq $1, %r8 | |
jne .L48 | |
jmp .L52 | |
.L55: | |
shrq %rbp | |
vcvtsi2sdq %rbp, %xmm1, %xmm1 | |
vaddsd %xmm1, %xmm1, %xmm0 | |
jmp .L56 | |
.L133: | |
movq (%rsi), %rdx | |
movl $1, %edi | |
movl $.LC0, %esi | |
xorl %eax, %eax | |
call __printf_chk | |
orl $-1, %eax | |
jmp .L131 | |
.cfi_endproc | |
.LFE40: | |
.size main, .-main | |
.section .rodata.cst8,"aM",@progbits,8 | |
.align 8 | |
.LC1: | |
.long 0 | |
.long 1086556160 | |
.align 8 | |
.LC2: | |
.long 0 | |
.long 1104006501 | |
.align 8 | |
.LC3: | |
.long 0 | |
.long 1093567616 | |
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04.1) 4.8.4" | |
.section .note.GNU-stack,"",@progbits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stddef.h> | |
uint64_t builtin_popcnt(const uint64_t* buf, size_t len){ | |
uint64_t cnt = 0; | |
for(size_t i = 0; i < len; ++i){ | |
cnt += __builtin_popcountll(buf[i]); | |
} | |
return cnt; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
assp1r1n3@pinguino:/tmp$ gcc --std=gnu99 -mpopcnt -O3 -funroll-loops -march=native bench.c -o bench | |
assp1r1n3@pinguino:/tmp$ ./bench 1 | |
Count: 130000 Elapsed: 0.364957 seconds Speed: 28.731494 GB/s | |
assp1r1n3@pinguino:/tmp$ ./bench 2 | |
Count: 130000 Elapsed: 0.725931 seconds Speed: 28.889137 GB/s | |
assp1r1n3@pinguino:/tmp$ ./bench 4 | |
Count: 120000 Elapsed: 1.539693 seconds Speed: 27.241171 GB/s | |
assp1r1n3@pinguino:/tmp$ ./bench 8 | |
Count: 130000 Elapsed: 3.276062 seconds Speed: 25.605767 GB/s | |
assp1r1n3@pinguino:/tmp$ gcc --version | |
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.1) 4.8.4 | |
Copyright (C) 2013 Free Software Foundation, Inc. | |
This is free software; see the source for copying conditions. There is NO | |
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
assp1r1n3@pinguino:/tmp$ uname -a | |
Linux pinguino 3.19.0-58-generic #64~14.04.1-Ubuntu SMP Fri Mar 18 19:05:43 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux | |
assp1r1n3@pinguino:/tmp$ cat /proc/cpuinfo | |
processor : 0 | |
vendor_id : GenuineIntel | |
cpu family : 6 | |
model : 70 | |
model name : Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz | |
stepping : 1 | |
microcode : 0xf | |
cpu MHz : 2494.226 | |
cache size : 6144 KB | |
physical id : 0 | |
siblings : 1 | |
core id : 0 | |
cpu cores : 1 | |
apicid : 0 | |
initial apicid : 0 | |
fpu : yes | |
fpu_exception : yes | |
cpuid level : 13 | |
wp : yes | |
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm arat pln pts dtherm fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 invpcid xsaveopt | |
bugs : | |
bogomips : 4988.45 | |
clflush size : 64 | |
cache_alignment : 64 | |
address sizes : 36 bits physical, 48 bits virtual | |
power management: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bench: file format elf64-x86-64 | |
Disassembly of section .init: | |
00000000004004e8 <_init>: | |
4004e8: 48 83 ec 08 sub $0x8,%rsp | |
4004ec: 48 8b 05 05 0b 20 00 mov 0x200b05(%rip),%rax # 600ff8 <_DYNAMIC+0x1d0> | |
4004f3: 48 85 c0 test %rax,%rax | |
4004f6: 74 05 je 4004fd <_init+0x15> | |
4004f8: e8 43 00 00 00 callq 400540 <__gmon_start__@plt> | |
4004fd: 48 83 c4 08 add $0x8,%rsp | |
400501: c3 retq | |
Disassembly of section .plt: | |
0000000000400510 <clock@plt-0x10>: | |
400510: ff 35 f2 0a 20 00 pushq 0x200af2(%rip) # 601008 <_GLOBAL_OFFSET_TABLE_+0x8> | |
400516: ff 25 f4 0a 20 00 jmpq *0x200af4(%rip) # 601010 <_GLOBAL_OFFSET_TABLE_+0x10> | |
40051c: 0f 1f 40 00 nopl 0x0(%rax) | |
0000000000400520 <clock@plt>: | |
400520: ff 25 f2 0a 20 00 jmpq *0x200af2(%rip) # 601018 <_GLOBAL_OFFSET_TABLE_+0x18> | |
400526: 68 00 00 00 00 pushq $0x0 | |
40052b: e9 e0 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400530 <__libc_start_main@plt>: | |
400530: ff 25 ea 0a 20 00 jmpq *0x200aea(%rip) # 601020 <_GLOBAL_OFFSET_TABLE_+0x20> | |
400536: 68 01 00 00 00 pushq $0x1 | |
40053b: e9 d0 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400540 <__gmon_start__@plt>: | |
400540: ff 25 e2 0a 20 00 jmpq *0x200ae2(%rip) # 601028 <_GLOBAL_OFFSET_TABLE_+0x28> | |
400546: 68 02 00 00 00 pushq $0x2 | |
40054b: e9 c0 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400550 <strtol@plt>: | |
400550: ff 25 da 0a 20 00 jmpq *0x200ada(%rip) # 601030 <_GLOBAL_OFFSET_TABLE_+0x30> | |
400556: 68 03 00 00 00 pushq $0x3 | |
40055b: e9 b0 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400560 <random@plt>: | |
400560: ff 25 d2 0a 20 00 jmpq *0x200ad2(%rip) # 601038 <_GLOBAL_OFFSET_TABLE_+0x38> | |
400566: 68 04 00 00 00 pushq $0x4 | |
40056b: e9 a0 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400570 <malloc@plt>: | |
400570: ff 25 ca 0a 20 00 jmpq *0x200aca(%rip) # 601040 <_GLOBAL_OFFSET_TABLE_+0x40> | |
400576: 68 05 00 00 00 pushq $0x5 | |
40057b: e9 90 ff ff ff jmpq 400510 <_init+0x28> | |
0000000000400580 <__printf_chk@plt>: | |
400580: ff 25 c2 0a 20 00 jmpq *0x200ac2(%rip) # 601048 <_GLOBAL_OFFSET_TABLE_+0x48> | |
400586: 68 06 00 00 00 pushq $0x6 | |
40058b: e9 80 ff ff ff jmpq 400510 <_init+0x28> | |
Disassembly of section .text: | |
0000000000400590 <main>: | |
400590: 41 57 push %r15 | |
400592: 41 56 push %r14 | |
400594: 41 55 push %r13 | |
400596: 41 54 push %r12 | |
400598: 55 push %rbp | |
400599: 53 push %rbx | |
40059a: 48 83 ec 08 sub $0x8,%rsp | |
40059e: 83 ff 02 cmp $0x2,%edi | |
4005a1: 0f 85 08 03 00 00 jne 4008af <main+0x31f> | |
4005a7: 48 8b 7e 08 mov 0x8(%rsi),%rdi | |
4005ab: ba 0a 00 00 00 mov $0xa,%edx | |
4005b0: 31 f6 xor %esi,%esi | |
4005b2: e8 99 ff ff ff callq 400550 <strtol@plt> | |
4005b7: 48 89 c5 mov %rax,%rbp | |
4005ba: 48 c1 e5 14 shl $0x14,%rbp | |
4005be: 49 89 ed mov %rbp,%r13 | |
4005c1: 49 c1 ed 03 shr $0x3,%r13 | |
4005c5: 4e 8d 34 ed 00 00 00 lea 0x0(,%r13,8),%r14 | |
4005cc: 00 | |
4005cd: 4c 89 f7 mov %r14,%rdi | |
4005d0: e8 9b ff ff ff callq 400570 <malloc@plt> | |
4005d5: 4d 85 ed test %r13,%r13 | |
4005d8: 48 89 c3 mov %rax,%rbx | |
4005db: 0f 84 1e 01 00 00 je 4006ff <main+0x16f> | |
4005e1: 41 bc 01 00 00 00 mov $0x1,%r12d | |
4005e7: 41 bf 07 00 00 00 mov $0x7,%r15d | |
4005ed: e8 6e ff ff ff callq 400560 <random@plt> | |
4005f2: 4d 39 ec cmp %r13,%r12 | |
4005f5: 48 89 03 mov %rax,(%rbx) | |
4005f8: 0f 84 01 01 00 00 je 4006ff <main+0x16f> | |
4005fe: 4d 85 ff test %r15,%r15 | |
400601: 0f 84 84 00 00 00 je 40068b <main+0xfb> | |
400607: 49 83 ff 01 cmp $0x1,%r15 | |
40060b: 74 6c je 400679 <main+0xe9> | |
40060d: 49 83 ff 02 cmp $0x2,%r15 | |
400611: 74 59 je 40066c <main+0xdc> | |
400613: 49 83 ff 03 cmp $0x3,%r15 | |
400617: 74 46 je 40065f <main+0xcf> | |
400619: 49 83 ff 04 cmp $0x4,%r15 | |
40061d: 74 33 je 400652 <main+0xc2> | |
40061f: 49 83 ff 05 cmp $0x5,%r15 | |
400623: 74 20 je 400645 <main+0xb5> | |
400625: 49 83 ff 06 cmp $0x6,%r15 | |
400629: 74 0d je 400638 <main+0xa8> | |
40062b: e8 30 ff ff ff callq 400560 <random@plt> | |
400630: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400634: 49 83 c4 01 add $0x1,%r12 | |
400638: e8 23 ff ff ff callq 400560 <random@plt> | |
40063d: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400641: 49 83 c4 01 add $0x1,%r12 | |
400645: e8 16 ff ff ff callq 400560 <random@plt> | |
40064a: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
40064e: 49 83 c4 01 add $0x1,%r12 | |
400652: e8 09 ff ff ff callq 400560 <random@plt> | |
400657: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
40065b: 49 83 c4 01 add $0x1,%r12 | |
40065f: e8 fc fe ff ff callq 400560 <random@plt> | |
400664: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400668: 49 83 c4 01 add $0x1,%r12 | |
40066c: e8 ef fe ff ff callq 400560 <random@plt> | |
400671: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400675: 49 83 c4 01 add $0x1,%r12 | |
400679: e8 e2 fe ff ff callq 400560 <random@plt> | |
40067e: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400682: 49 83 c4 01 add $0x1,%r12 | |
400686: 4d 39 ec cmp %r13,%r12 | |
400689: 74 74 je 4006ff <main+0x16f> | |
40068b: e8 d0 fe ff ff callq 400560 <random@plt> | |
400690: 4d 8d 7c 24 01 lea 0x1(%r12),%r15 | |
400695: 4a 89 04 e3 mov %rax,(%rbx,%r12,8) | |
400699: e8 c2 fe ff ff callq 400560 <random@plt> | |
40069e: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006a2: 4d 8d 7c 24 02 lea 0x2(%r12),%r15 | |
4006a7: e8 b4 fe ff ff callq 400560 <random@plt> | |
4006ac: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006b0: 4d 8d 7c 24 03 lea 0x3(%r12),%r15 | |
4006b5: e8 a6 fe ff ff callq 400560 <random@plt> | |
4006ba: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006be: 4d 8d 7c 24 04 lea 0x4(%r12),%r15 | |
4006c3: e8 98 fe ff ff callq 400560 <random@plt> | |
4006c8: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006cc: 4d 8d 7c 24 05 lea 0x5(%r12),%r15 | |
4006d1: e8 8a fe ff ff callq 400560 <random@plt> | |
4006d6: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006da: 4d 8d 7c 24 06 lea 0x6(%r12),%r15 | |
4006df: e8 7c fe ff ff callq 400560 <random@plt> | |
4006e4: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006e8: 4d 8d 7c 24 07 lea 0x7(%r12),%r15 | |
4006ed: 49 83 c4 08 add $0x8,%r12 | |
4006f1: e8 6a fe ff ff callq 400560 <random@plt> | |
4006f6: 4d 39 ec cmp %r13,%r12 | |
4006f9: 4a 89 04 fb mov %rax,(%rbx,%r15,8) | |
4006fd: 75 8c jne 40068b <main+0xfb> | |
4006ff: e8 1c fe ff ff callq 400520 <clock@plt> | |
400704: 41 b8 10 27 00 00 mov $0x2710,%r8d | |
40070a: 49 89 c4 mov %rax,%r12 | |
40070d: 4a 8d 04 33 lea (%rbx,%r14,1),%rax | |
400711: 45 31 f6 xor %r14d,%r14d | |
400714: 0f 1f 40 00 nopl 0x0(%rax) | |
400718: 4d 85 ed test %r13,%r13 | |
40071b: 0f 84 6f 01 00 00 je 400890 <main+0x300> | |
400721: 48 8d 53 08 lea 0x8(%rbx),%rdx | |
400725: 48 89 c7 mov %rax,%rdi | |
400728: f3 48 0f b8 33 popcnt (%rbx),%rsi | |
40072d: 48 29 df sub %rbx,%rdi | |
400730: 48 83 ef 08 sub $0x8,%rdi | |
400734: 48 c1 ef 03 shr $0x3,%rdi | |
400738: 83 e7 07 and $0x7,%edi | |
40073b: 48 39 c2 cmp %rax,%rdx | |
40073e: 0f 84 d3 00 00 00 je 400817 <main+0x287> | |
400744: 48 85 ff test %rdi,%rdi | |
400747: 74 7e je 4007c7 <main+0x237> | |
400749: 48 83 ff 01 cmp $0x1,%rdi | |
40074d: 74 67 je 4007b6 <main+0x226> | |
40074f: 48 83 ff 02 cmp $0x2,%rdi | |
400753: 74 55 je 4007aa <main+0x21a> | |
400755: 48 83 ff 03 cmp $0x3,%rdi | |
400759: 74 43 je 40079e <main+0x20e> | |
40075b: 48 83 ff 04 cmp $0x4,%rdi | |
40075f: 74 31 je 400792 <main+0x202> | |
400761: 48 83 ff 05 cmp $0x5,%rdi | |
400765: 74 1f je 400786 <main+0x1f6> | |
400767: 48 83 ff 06 cmp $0x6,%rdi | |
40076b: 74 0d je 40077a <main+0x1ea> | |
40076d: f3 48 0f b8 4b 08 popcnt 0x8(%rbx),%rcx | |
400773: 48 8d 53 10 lea 0x10(%rbx),%rdx | |
400777: 48 01 ce add %rcx,%rsi | |
40077a: f3 4c 0f b8 0a popcnt (%rdx),%r9 | |
40077f: 48 83 c2 08 add $0x8,%rdx | |
400783: 4c 01 ce add %r9,%rsi | |
400786: f3 4c 0f b8 12 popcnt (%rdx),%r10 | |
40078b: 48 83 c2 08 add $0x8,%rdx | |
40078f: 4c 01 d6 add %r10,%rsi | |
400792: f3 4c 0f b8 1a popcnt (%rdx),%r11 | |
400797: 48 83 c2 08 add $0x8,%rdx | |
40079b: 4c 01 de add %r11,%rsi | |
40079e: f3 4c 0f b8 3a popcnt (%rdx),%r15 | |
4007a3: 48 83 c2 08 add $0x8,%rdx | |
4007a7: 4c 01 fe add %r15,%rsi | |
4007aa: f3 48 0f b8 3a popcnt (%rdx),%rdi | |
4007af: 48 83 c2 08 add $0x8,%rdx | |
4007b3: 48 01 fe add %rdi,%rsi | |
4007b6: f3 48 0f b8 0a popcnt (%rdx),%rcx | |
4007bb: 48 83 c2 08 add $0x8,%rdx | |
4007bf: 48 01 ce add %rcx,%rsi | |
4007c2: 48 39 c2 cmp %rax,%rdx | |
4007c5: 74 50 je 400817 <main+0x287> | |
4007c7: f3 4c 0f b8 0a popcnt (%rdx),%r9 | |
4007cc: 4c 01 ce add %r9,%rsi | |
4007cf: 48 83 c2 40 add $0x40,%rdx | |
4007d3: f3 4c 0f b8 52 c8 popcnt -0x38(%rdx),%r10 | |
4007d9: 4c 01 d6 add %r10,%rsi | |
4007dc: f3 4c 0f b8 5a d0 popcnt -0x30(%rdx),%r11 | |
4007e2: 4c 01 de add %r11,%rsi | |
4007e5: f3 4c 0f b8 7a d8 popcnt -0x28(%rdx),%r15 | |
4007eb: f3 48 0f b8 7a e0 popcnt -0x20(%rdx),%rdi | |
4007f1: 4c 01 fe add %r15,%rsi | |
4007f4: f3 48 0f b8 4a e8 popcnt -0x18(%rdx),%rcx | |
4007fa: f3 4c 0f b8 4a f0 popcnt -0x10(%rdx),%r9 | |
400800: 48 01 fe add %rdi,%rsi | |
400803: f3 4c 0f b8 52 f8 popcnt -0x8(%rdx),%r10 | |
400809: 48 01 ce add %rcx,%rsi | |
40080c: 4c 01 ce add %r9,%rsi | |
40080f: 4c 01 d6 add %r10,%rsi | |
400812: 48 39 c2 cmp %rax,%rdx | |
400815: 75 b0 jne 4007c7 <main+0x237> | |
400817: 49 01 f6 add %rsi,%r14 | |
40081a: 49 83 e8 01 sub $0x1,%r8 | |
40081e: 0f 85 f4 fe ff ff jne 400718 <main+0x188> | |
400824: e8 f7 fc ff ff callq 400520 <clock@plt> | |
400829: 4c 29 e0 sub %r12,%rax | |
40082c: 48 85 ed test %rbp,%rbp | |
40082f: c4 e1 eb 2a d0 vcvtsi2sd %rax,%xmm2,%xmm2 | |
400834: 78 6b js 4008a1 <main+0x311> | |
400836: c4 e1 fb 2a c5 vcvtsi2sd %rbp,%xmm0,%xmm0 | |
40083b: c5 eb 59 1d 8d 03 00 vmulsd 0x38d(%rip),%xmm2,%xmm3 # 400bd0 <_IO_stdin_used+0x60> | |
400842: 00 | |
400843: 4c 89 f2 mov %r14,%rdx | |
400846: be 98 0b 40 00 mov $0x400b98,%esi | |
40084b: c5 fb 10 2d 85 03 00 vmovsd 0x385(%rip),%xmm5 # 400bd8 <_IO_stdin_used+0x68> | |
400852: 00 | |
400853: bf 01 00 00 00 mov $0x1,%edi | |
400858: b8 02 00 00 00 mov $0x2,%eax | |
40085d: c5 fb 59 25 63 03 00 vmulsd 0x363(%rip),%xmm0,%xmm4 # 400bc8 <_IO_stdin_used+0x58> | |
400864: 00 | |
400865: c5 eb 5e c5 vdivsd %xmm5,%xmm2,%xmm0 | |
400869: c5 e3 5e f5 vdivsd %xmm5,%xmm3,%xmm6 | |
40086d: c5 db 5e ce vdivsd %xmm6,%xmm4,%xmm1 | |
400871: e8 0a fd ff ff callq 400580 <__printf_chk@plt> | |
400876: 31 c0 xor %eax,%eax | |
400878: 48 83 c4 08 add $0x8,%rsp | |
40087c: 5b pop %rbx | |
40087d: 5d pop %rbp | |
40087e: 41 5c pop %r12 | |
400880: 41 5d pop %r13 | |
400882: 41 5e pop %r14 | |
400884: 41 5f pop %r15 | |
400886: c3 retq | |
400887: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) | |
40088e: 00 00 | |
400890: 31 f6 xor %esi,%esi | |
400892: 49 01 f6 add %rsi,%r14 | |
400895: 49 83 e8 01 sub $0x1,%r8 | |
400899: 0f 85 79 fe ff ff jne 400718 <main+0x188> | |
40089f: eb 83 jmp 400824 <main+0x294> | |
4008a1: 48 d1 ed shr %rbp | |
4008a4: c4 e1 f3 2a cd vcvtsi2sd %rbp,%xmm1,%xmm1 | |
4008a9: c5 f3 58 c1 vaddsd %xmm1,%xmm1,%xmm0 | |
4008ad: eb 8c jmp 40083b <main+0x2ab> | |
4008af: 48 8b 16 mov (%rsi),%rdx | |
4008b2: bf 01 00 00 00 mov $0x1,%edi | |
4008b7: be 78 0b 40 00 mov $0x400b78,%esi | |
4008bc: 31 c0 xor %eax,%eax | |
4008be: e8 bd fc ff ff callq 400580 <__printf_chk@plt> | |
4008c3: 83 c8 ff or $0xffffffff,%eax | |
4008c6: eb b0 jmp 400878 <main+0x2e8> | |
00000000004008c8 <_start>: | |
4008c8: 31 ed xor %ebp,%ebp | |
4008ca: 49 89 d1 mov %rdx,%r9 | |
4008cd: 5e pop %rsi | |
4008ce: 48 89 e2 mov %rsp,%rdx | |
4008d1: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp | |
4008d5: 50 push %rax | |
4008d6: 54 push %rsp | |
4008d7: 49 c7 c0 60 0b 40 00 mov $0x400b60,%r8 | |
4008de: 48 c7 c1 f0 0a 40 00 mov $0x400af0,%rcx | |
4008e5: 48 c7 c7 90 05 40 00 mov $0x400590,%rdi | |
4008ec: e8 3f fc ff ff callq 400530 <__libc_start_main@plt> | |
4008f1: f4 hlt | |
4008f2: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) | |
4008f9: 00 00 00 | |
4008fc: 0f 1f 40 00 nopl 0x0(%rax) | |
0000000000400900 <deregister_tm_clones>: | |
400900: b8 67 10 60 00 mov $0x601067,%eax | |
400905: 55 push %rbp | |
400906: 48 2d 60 10 60 00 sub $0x601060,%rax | |
40090c: 48 83 f8 0e cmp $0xe,%rax | |
400910: 48 89 e5 mov %rsp,%rbp | |
400913: 77 02 ja 400917 <deregister_tm_clones+0x17> | |
400915: 5d pop %rbp | |
400916: c3 retq | |
400917: b8 00 00 00 00 mov $0x0,%eax | |
40091c: 48 85 c0 test %rax,%rax | |
40091f: 74 f4 je 400915 <deregister_tm_clones+0x15> | |
400921: 5d pop %rbp | |
400922: bf 60 10 60 00 mov $0x601060,%edi | |
400927: ff e0 jmpq *%rax | |
400929: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) | |
0000000000400930 <register_tm_clones>: | |
400930: b8 60 10 60 00 mov $0x601060,%eax | |
400935: 55 push %rbp | |
400936: 48 2d 60 10 60 00 sub $0x601060,%rax | |
40093c: 48 c1 f8 03 sar $0x3,%rax | |
400940: 48 89 e5 mov %rsp,%rbp | |
400943: 48 89 c2 mov %rax,%rdx | |
400946: 48 c1 ea 3f shr $0x3f,%rdx | |
40094a: 48 01 d0 add %rdx,%rax | |
40094d: 48 d1 f8 sar %rax | |
400950: 75 02 jne 400954 <register_tm_clones+0x24> | |
400952: 5d pop %rbp | |
400953: c3 retq | |
400954: ba 00 00 00 00 mov $0x0,%edx | |
400959: 48 85 d2 test %rdx,%rdx | |
40095c: 74 f4 je 400952 <register_tm_clones+0x22> | |
40095e: 5d pop %rbp | |
40095f: 48 89 c6 mov %rax,%rsi | |
400962: bf 60 10 60 00 mov $0x601060,%edi | |
400967: ff e2 jmpq *%rdx | |
400969: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) | |
0000000000400970 <__do_global_dtors_aux>: | |
400970: 80 3d e9 06 20 00 00 cmpb $0x0,0x2006e9(%rip) # 601060 <__TMC_END__> | |
400977: 75 11 jne 40098a <__do_global_dtors_aux+0x1a> | |
400979: 55 push %rbp | |
40097a: 48 89 e5 mov %rsp,%rbp | |
40097d: e8 7e ff ff ff callq 400900 <deregister_tm_clones> | |
400982: 5d pop %rbp | |
400983: c6 05 d6 06 20 00 01 movb $0x1,0x2006d6(%rip) # 601060 <__TMC_END__> | |
40098a: f3 c3 repz retq | |
40098c: 0f 1f 40 00 nopl 0x0(%rax) | |
0000000000400990 <frame_dummy>: | |
400990: 48 83 3d 88 04 20 00 cmpq $0x0,0x200488(%rip) # 600e20 <__JCR_END__> | |
400997: 00 | |
400998: 74 1e je 4009b8 <frame_dummy+0x28> | |
40099a: b8 00 00 00 00 mov $0x0,%eax | |
40099f: 48 85 c0 test %rax,%rax | |
4009a2: 74 14 je 4009b8 <frame_dummy+0x28> | |
4009a4: 55 push %rbp | |
4009a5: bf 20 0e 60 00 mov $0x600e20,%edi | |
4009aa: 48 89 e5 mov %rsp,%rbp | |
4009ad: ff d0 callq *%rax | |
4009af: 5d pop %rbp | |
4009b0: e9 7b ff ff ff jmpq 400930 <register_tm_clones> | |
4009b5: 0f 1f 00 nopl (%rax) | |
4009b8: e9 73 ff ff ff jmpq 400930 <register_tm_clones> | |
4009bd: 0f 1f 00 nopl (%rax) | |
00000000004009c0 <builtin_popcnt>: | |
4009c0: 48 85 f6 test %rsi,%rsi | |
4009c3: 0f 84 07 01 00 00 je 400ad0 <builtin_popcnt+0x110> | |
4009c9: 48 8d 0c f5 f8 ff ff lea -0x8(,%rsi,8),%rcx | |
4009d0: ff | |
4009d1: f3 48 0f b8 07 popcnt (%rdi),%rax | |
4009d6: 4c 8d 04 f7 lea (%rdi,%rsi,8),%r8 | |
4009da: 48 c1 e9 03 shr $0x3,%rcx | |
4009de: 48 8d 77 08 lea 0x8(%rdi),%rsi | |
4009e2: 83 e1 07 and $0x7,%ecx | |
4009e5: 4c 39 c6 cmp %r8,%rsi | |
4009e8: 0f 84 ea 00 00 00 je 400ad8 <builtin_popcnt+0x118> | |
4009ee: 48 85 c9 test %rcx,%rcx | |
4009f1: 74 7d je 400a70 <builtin_popcnt+0xb0> | |
4009f3: 48 83 f9 01 cmp $0x1,%rcx | |
4009f7: 74 66 je 400a5f <builtin_popcnt+0x9f> | |
4009f9: 48 83 f9 02 cmp $0x2,%rcx | |
4009fd: 74 54 je 400a53 <builtin_popcnt+0x93> | |
4009ff: 48 83 f9 03 cmp $0x3,%rcx | |
400a03: 74 42 je 400a47 <builtin_popcnt+0x87> | |
400a05: 48 83 f9 04 cmp $0x4,%rcx | |
400a09: 74 30 je 400a3b <builtin_popcnt+0x7b> | |
400a0b: 48 83 f9 05 cmp $0x5,%rcx | |
400a0f: 74 1e je 400a2f <builtin_popcnt+0x6f> | |
400a11: 48 83 f9 06 cmp $0x6,%rcx | |
400a15: 74 0c je 400a23 <builtin_popcnt+0x63> | |
400a17: f3 48 0f b8 16 popcnt (%rsi),%rdx | |
400a1c: 48 8d 77 10 lea 0x10(%rdi),%rsi | |
400a20: 48 01 d0 add %rdx,%rax | |
400a23: f3 48 0f b8 3e popcnt (%rsi),%rdi | |
400a28: 48 83 c6 08 add $0x8,%rsi | |
400a2c: 48 01 f8 add %rdi,%rax | |
400a2f: f3 4c 0f b8 0e popcnt (%rsi),%r9 | |
400a34: 48 83 c6 08 add $0x8,%rsi | |
400a38: 4c 01 c8 add %r9,%rax | |
400a3b: f3 4c 0f b8 16 popcnt (%rsi),%r10 | |
400a40: 48 83 c6 08 add $0x8,%rsi | |
400a44: 4c 01 d0 add %r10,%rax | |
400a47: f3 4c 0f b8 1e popcnt (%rsi),%r11 | |
400a4c: 48 83 c6 08 add $0x8,%rsi | |
400a50: 4c 01 d8 add %r11,%rax | |
400a53: f3 48 0f b8 0e popcnt (%rsi),%rcx | |
400a58: 48 83 c6 08 add $0x8,%rsi | |
400a5c: 48 01 c8 add %rcx,%rax | |
400a5f: f3 48 0f b8 16 popcnt (%rsi),%rdx | |
400a64: 48 83 c6 08 add $0x8,%rsi | |
400a68: 48 01 d0 add %rdx,%rax | |
400a6b: 4c 39 c6 cmp %r8,%rsi | |
400a6e: 74 58 je 400ac8 <builtin_popcnt+0x108> | |
400a70: f3 48 0f b8 3e popcnt (%rsi),%rdi | |
400a75: 48 01 f8 add %rdi,%rax | |
400a78: 48 83 c6 40 add $0x40,%rsi | |
400a7c: f3 4c 0f b8 4e c8 popcnt -0x38(%rsi),%r9 | |
400a82: 4c 01 c8 add %r9,%rax | |
400a85: f3 4c 0f b8 56 d0 popcnt -0x30(%rsi),%r10 | |
400a8b: 4c 01 d0 add %r10,%rax | |
400a8e: f3 4c 0f b8 5e d8 popcnt -0x28(%rsi),%r11 | |
400a94: f3 48 0f b8 4e e0 popcnt -0x20(%rsi),%rcx | |
400a9a: 4c 01 d8 add %r11,%rax | |
400a9d: f3 48 0f b8 56 e8 popcnt -0x18(%rsi),%rdx | |
400aa3: f3 48 0f b8 7e f0 popcnt -0x10(%rsi),%rdi | |
400aa9: 48 01 c8 add %rcx,%rax | |
400aac: f3 4c 0f b8 4e f8 popcnt -0x8(%rsi),%r9 | |
400ab2: 48 01 d0 add %rdx,%rax | |
400ab5: 48 01 f8 add %rdi,%rax | |
400ab8: 4c 01 c8 add %r9,%rax | |
400abb: 4c 39 c6 cmp %r8,%rsi | |
400abe: 75 b0 jne 400a70 <builtin_popcnt+0xb0> | |
400ac0: f3 c3 repz retq | |
400ac2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) | |
400ac8: f3 c3 repz retq | |
400aca: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) | |
400ad0: 31 c0 xor %eax,%eax | |
400ad2: c3 retq | |
400ad3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) | |
400ad8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) | |
400adf: 00 | |
400ae0: f3 c3 repz retq | |
400ae2: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) | |
400ae9: 00 00 00 | |
400aec: 0f 1f 40 00 nopl 0x0(%rax) | |
0000000000400af0 <__libc_csu_init>: | |
400af0: 41 57 push %r15 | |
400af2: 41 89 ff mov %edi,%r15d | |
400af5: 41 56 push %r14 | |
400af7: 49 89 f6 mov %rsi,%r14 | |
400afa: 41 55 push %r13 | |
400afc: 49 89 d5 mov %rdx,%r13 | |
400aff: 41 54 push %r12 | |
400b01: 4c 8d 25 08 03 20 00 lea 0x200308(%rip),%r12 # 600e10 <__frame_dummy_init_array_entry> | |
400b08: 55 push %rbp | |
400b09: 48 8d 2d 08 03 20 00 lea 0x200308(%rip),%rbp # 600e18 <__init_array_end> | |
400b10: 53 push %rbx | |
400b11: 4c 29 e5 sub %r12,%rbp | |
400b14: 31 db xor %ebx,%ebx | |
400b16: 48 c1 fd 03 sar $0x3,%rbp | |
400b1a: 48 83 ec 08 sub $0x8,%rsp | |
400b1e: e8 c5 f9 ff ff callq 4004e8 <_init> | |
400b23: 48 85 ed test %rbp,%rbp | |
400b26: 74 1e je 400b46 <__libc_csu_init+0x56> | |
400b28: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) | |
400b2f: 00 | |
400b30: 4c 89 ea mov %r13,%rdx | |
400b33: 4c 89 f6 mov %r14,%rsi | |
400b36: 44 89 ff mov %r15d,%edi | |
400b39: 41 ff 14 dc callq *(%r12,%rbx,8) | |
400b3d: 48 83 c3 01 add $0x1,%rbx | |
400b41: 48 39 eb cmp %rbp,%rbx | |
400b44: 75 ea jne 400b30 <__libc_csu_init+0x40> | |
400b46: 48 83 c4 08 add $0x8,%rsp | |
400b4a: 5b pop %rbx | |
400b4b: 5d pop %rbp | |
400b4c: 41 5c pop %r12 | |
400b4e: 41 5d pop %r13 | |
400b50: 41 5e pop %r14 | |
400b52: 41 5f pop %r15 | |
400b54: c3 retq | |
400b55: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1) | |
400b5c: 00 00 00 00 | |
0000000000400b60 <__libc_csu_fini>: | |
400b60: f3 c3 repz retq | |
Disassembly of section .fini: | |
0000000000400b64 <_fini>: | |
400b64: 48 83 ec 08 sub $0x8,%rsp | |
400b68: 48 83 c4 08 add $0x8,%rsp | |
400b6c: c3 retq |
NOTE
I use --std=gnu99
to be able to call random
without any parameters. It has no influence on performance!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The following code compiles in the most optimal way(even without extra
movl
s!) ongcc
4.8.1. However,clang
3.4.1 fails to unroll the loop.