Skip to content

Instantly share code, notes, and snippets.

@lemire
Created September 17, 2018 00:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lemire/d2047ce1e3b511c54bb47b779a3028f5 to your computer and use it in GitHub Desktop.
Save lemire/d2047ce1e3b511c54bb47b779a3028f5 to your computer and use it in GitHub Desktop.
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.section __TEXT,__literal16,16byte_literals
.p2align 4 ## -- Begin function _Z9timestampv
LCPI0_0:
.long 1127219200 ## 0x43300000
.long 1160773632 ## 0x45300000
.long 0 ## 0x0
.long 0 ## 0x0
LCPI0_1:
.quad 4841369599423283200 ## double 4503599627370496
.quad 4985484787499139072 ## double 1.9342813113834067E+25
.section __TEXT,__literal8,8byte_literals
.p2align 3
LCPI0_2:
.quad 4696837146684686336 ## double 1.0E+6
.section __TEXT,__text,regular,pure_instructions
.globl __Z9timestampv
.p2align 4, 0x90
__Z9timestampv: ## @_Z9timestampv
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
callq _clock
movq %rax, %xmm0
punpckldq LCPI0_0(%rip), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
subpd LCPI0_1(%rip), %xmm0
haddpd %xmm0, %xmm0
divsd LCPI0_2(%rip), %xmm0
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z14pcg32_random_rP14pcg32_random_t ## -- Begin function _Z14pcg32_random_rP14pcg32_random_t
.p2align 4, 0x90
__Z14pcg32_random_rP14pcg32_random_t: ## @_Z14pcg32_random_rP14pcg32_random_t
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
movq (%rdi), %rcx
movq 8(%rdi), %rax
movabsq $6364136223846793005, %rdx ## imm = 0x5851F42D4C957F2D
imulq %rcx, %rdx
orq $1, %rax
addq %rdx, %rax
movq %rax, (%rdi)
movq %rcx, %rax
shrq $18, %rax
xorq %rcx, %rax
shrq $27, %rax
shrq $59, %rcx
movl %eax, %edx
shrl %cl, %edx
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %eax
orl %edx, %eax
## kill: %EAX<def> %EAX<kill> %RAX<kill>
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z14random_boundedP14pcg32_random_tj ## -- Begin function _Z14random_boundedP14pcg32_random_tj
.p2align 4, 0x90
__Z14random_boundedP14pcg32_random_tj: ## @_Z14random_boundedP14pcg32_random_tj
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi6:
.cfi_def_cfa_offset 16
Lcfi7:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi8:
.cfi_def_cfa_register %rbp
movq (%rdi), %rcx
movq 8(%rdi), %rax
movabsq $6364136223846793005, %rdx ## imm = 0x5851F42D4C957F2D
imulq %rcx, %rdx
orq $1, %rax
addq %rdx, %rax
movq %rax, (%rdi)
movq %rcx, %rdx
shrq $18, %rdx
xorq %rcx, %rdx
shrq $27, %rdx
shrq $59, %rcx
movl %edx, %eax
shrl %cl, %eax
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %edx
orl %eax, %edx
movl %esi, %eax
imulq %rdx, %rax
shrq $32, %rax
## kill: %EAX<def> %EAX<kill> %RAX<kill>
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z4swapPiS_ ## -- Begin function _Z4swapPiS_
.p2align 4, 0x90
__Z4swapPiS_: ## @_Z4swapPiS_
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi9:
.cfi_def_cfa_offset 16
Lcfi10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi11:
.cfi_def_cfa_register %rbp
movl (%rdi), %eax
movl (%rsi), %ecx
movl %ecx, (%rdi)
movl %eax, (%rsi)
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z21java_random_bounded32P14pcg32_random_tj ## -- Begin function _Z21java_random_bounded32P14pcg32_random_tj
.p2align 4, 0x90
__Z21java_random_bounded32P14pcg32_random_tj: ## @_Z21java_random_bounded32P14pcg32_random_tj
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi12:
.cfi_def_cfa_offset 16
Lcfi13:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi14:
.cfi_def_cfa_register %rbp
pushq %rbx
Lcfi15:
.cfi_offset %rbx, -24
movq (%rdi), %r9
movq 8(%rdi), %r10
orq $1, %r10
movl %esi, %r11d
negl %r11d
movabsq $6364136223846793005, %rbx ## imm = 0x5851F42D4C957F2D
.p2align 4, 0x90
LBB4_1: ## =>This Inner Loop Header: Depth=1
movq %r9, %rcx
imulq %rbx, %r9
addq %r10, %r9
movq %rcx, %r8
shrq $18, %r8
xorq %rcx, %r8
shrq $27, %r8
shrq $59, %rcx
movl %r8d, %eax
shrl %cl, %eax
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %r8d
orl %eax, %r8d
xorl %edx, %edx
movl %r8d, %eax
divl %esi
subl %edx, %r8d
cmpl %r11d, %r8d
ja LBB4_1
## BB#2:
movq %r9, (%rdi)
movl %edx, %eax
popq %rbx
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z12shuffle_javaPim ## -- Begin function _Z12shuffle_javaPim
.p2align 4, 0x90
__Z12shuffle_javaPim: ## @_Z12shuffle_javaPim
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi16:
.cfi_def_cfa_offset 16
Lcfi17:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi18:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi19:
.cfi_offset %rbx, -32
Lcfi20:
.cfi_offset %r14, -24
cmpl $2, %esi
jl LBB5_5
## BB#1:
movslq %esi, %r8
movabsq $-8846114313915602277, %r9 ## imm = 0x853C49E6748FEA9B
movabsq $6364136223846793005, %r10 ## imm = 0x5851F42D4C957F2D
movabsq $-2720673578348880933, %r11 ## imm = 0xDA3E39CB94B95BDB
.p2align 4, 0x90
LBB5_2: ## =>This Loop Header: Depth=1
## Child Loop BB5_3 Depth 2
leaq -1(%r8), %r14
movl %r8d, %ebx
negl %ebx
.p2align 4, 0x90
LBB5_3: ## Parent Loop BB5_2 Depth=1
## => This Inner Loop Header: Depth=2
movq %r9, %rcx
imulq %r10, %r9
addq %r11, %r9
movq %rcx, %rsi
shrq $18, %rsi
xorq %rcx, %rsi
shrq $27, %rsi
shrq $59, %rcx
movl %esi, %eax
shrl %cl, %eax
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %esi
orl %eax, %esi
xorl %edx, %edx
movl %esi, %eax
divl %r8d
subl %edx, %esi
cmpl %ebx, %esi
ja LBB5_3
## BB#4: ## in Loop: Header=BB5_2 Depth=1
movl %edx, %eax
movl -4(%rdi,%r8,4), %ecx
movl (%rdi,%rax,4), %edx
movl %edx, -4(%rdi,%r8,4)
movl %ecx, (%rdi,%rax,4)
cmpq $2, %r8
movq %r14, %r8
jg LBB5_2
LBB5_5:
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z15shuffle_precompPimS_ ## -- Begin function _Z15shuffle_precompPimS_
.p2align 4, 0x90
__Z15shuffle_precompPimS_: ## @_Z15shuffle_precompPimS_
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi21:
.cfi_def_cfa_offset 16
Lcfi22:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi23:
.cfi_def_cfa_register %rbp
cmpl $2, %esi
jl LBB6_3
## BB#1:
movslq %esi, %rax
incq %rax
.p2align 4, 0x90
LBB6_2: ## =>This Inner Loop Header: Depth=1
movslq -4(%rdx,%rax,4), %rcx
movl -8(%rdi,%rax,4), %r8d
movl (%rdi,%rcx,4), %esi
movl %esi, -8(%rdi,%rax,4)
movl %r8d, (%rdi,%rcx,4)
decq %rax
cmpq $2, %rax
jg LBB6_2
LBB6_3:
popq %rbp
retq
.cfi_endproc
## -- End function
.section __TEXT,__literal16,16byte_literals
.p2align 4 ## -- Begin function main
LCPI7_0:
.long 0 ## 0x0
.long 1 ## 0x1
.long 2 ## 0x2
.long 3 ## 0x3
LCPI7_1:
.long 4 ## 0x4
.long 4 ## 0x4
.long 4 ## 0x4
.long 4 ## 0x4
LCPI7_2:
.long 8 ## 0x8
.long 8 ## 0x8
.long 8 ## 0x8
.long 8 ## 0x8
LCPI7_3:
.long 12 ## 0xc
.long 12 ## 0xc
.long 12 ## 0xc
.long 12 ## 0xc
LCPI7_4:
.long 16 ## 0x10
.long 16 ## 0x10
.long 16 ## 0x10
.long 16 ## 0x10
LCPI7_5:
.long 20 ## 0x14
.long 20 ## 0x14
.long 20 ## 0x14
.long 20 ## 0x14
LCPI7_6:
.long 24 ## 0x18
.long 24 ## 0x18
.long 24 ## 0x18
.long 24 ## 0x18
LCPI7_7:
.long 28 ## 0x1c
.long 28 ## 0x1c
.long 28 ## 0x1c
.long 28 ## 0x1c
LCPI7_8:
.long 32 ## 0x20
.long 32 ## 0x20
.long 32 ## 0x20
.long 32 ## 0x20
LCPI7_9:
.long 36 ## 0x24
.long 36 ## 0x24
.long 36 ## 0x24
.long 36 ## 0x24
LCPI7_10:
.long 40 ## 0x28
.long 40 ## 0x28
.long 40 ## 0x28
.long 40 ## 0x28
LCPI7_11:
.long 1127219200 ## 0x43300000
.long 1160773632 ## 0x45300000
.long 0 ## 0x0
.long 0 ## 0x0
LCPI7_12:
.quad 4841369599423283200 ## double 4503599627370496
.quad 4985484787499139072 ## double 1.9342813113834067E+25
.section __TEXT,__literal8,8byte_literals
.p2align 3
LCPI7_13:
.quad 4696837146684686336 ## double 1.0E+6
.section __TEXT,__text,regular,pure_instructions
.globl _main
.p2align 4, 0x90
_main: ## @main
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi24:
.cfi_def_cfa_offset 16
Lcfi25:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi26:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $40, %rsp
Lcfi27:
.cfi_offset %rbx, -56
Lcfi28:
.cfi_offset %r12, -48
Lcfi29:
.cfi_offset %r13, -40
Lcfi30:
.cfi_offset %r14, -32
Lcfi31:
.cfi_offset %r15, -24
movl $400000000, %edi ## imm = 0x17D78400
callq __Znam
movq %rax, %r14
movl $400000000, %edi ## imm = 0x17D78400
callq __Znam
movdqa LCPI7_0(%rip), %xmm2 ## xmm2 = [0,1,2,3]
movl $36, %ecx
movdqa LCPI7_1(%rip), %xmm8 ## xmm8 = [4,4,4,4]
movdqa LCPI7_2(%rip), %xmm9 ## xmm9 = [8,8,8,8]
movdqa LCPI7_3(%rip), %xmm10 ## xmm10 = [12,12,12,12]
movdqa LCPI7_4(%rip), %xmm11 ## xmm11 = [16,16,16,16]
movdqa LCPI7_5(%rip), %xmm12 ## xmm12 = [20,20,20,20]
movdqa LCPI7_6(%rip), %xmm5 ## xmm5 = [24,24,24,24]
movdqa LCPI7_7(%rip), %xmm6 ## xmm6 = [28,28,28,28]
movdqa LCPI7_8(%rip), %xmm7 ## xmm7 = [32,32,32,32]
movdqa LCPI7_9(%rip), %xmm0 ## xmm0 = [36,36,36,36]
movdqa LCPI7_10(%rip), %xmm1 ## xmm1 = [40,40,40,40]
.p2align 4, 0x90
LBB7_1: ## =>This Inner Loop Header: Depth=1
movdqa %xmm2, %xmm3
paddd %xmm8, %xmm3
movdqu %xmm2, -144(%r14,%rcx,4)
movdqu %xmm3, -128(%r14,%rcx,4)
movdqa %xmm2, %xmm3
paddd %xmm9, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm10, %xmm4
movdqu %xmm3, -112(%r14,%rcx,4)
movdqu %xmm4, -96(%r14,%rcx,4)
movdqa %xmm2, %xmm3
paddd %xmm11, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm12, %xmm4
movdqu %xmm3, -80(%r14,%rcx,4)
movdqu %xmm4, -64(%r14,%rcx,4)
movdqa %xmm2, %xmm3
paddd %xmm5, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm6, %xmm4
movdqu %xmm3, -48(%r14,%rcx,4)
movdqu %xmm4, -32(%r14,%rcx,4)
movdqa %xmm2, %xmm3
paddd %xmm7, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm0, %xmm4
movdqu %xmm3, -16(%r14,%rcx,4)
movdqu %xmm4, (%r14,%rcx,4)
paddd %xmm1, %xmm2
addq $40, %rcx
cmpq $100000036, %rcx ## imm = 0x5F5E124
jne LBB7_1
## BB#2:
movq %rax, -80(%rbp) ## 8-byte Spill
movdqa LCPI7_0(%rip), %xmm2 ## xmm2 = [0,1,2,3]
movl $36, %eax
.p2align 4, 0x90
LBB7_3: ## =>This Inner Loop Header: Depth=1
movdqa %xmm2, %xmm3
paddd %xmm8, %xmm3
movdqu %xmm2, -144(%r14,%rax,4)
movdqu %xmm3, -128(%r14,%rax,4)
movdqa %xmm2, %xmm3
paddd %xmm9, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm10, %xmm4
movdqu %xmm3, -112(%r14,%rax,4)
movdqu %xmm4, -96(%r14,%rax,4)
movdqa %xmm2, %xmm3
paddd %xmm11, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm12, %xmm4
movdqu %xmm3, -80(%r14,%rax,4)
movdqu %xmm4, -64(%r14,%rax,4)
movdqa %xmm2, %xmm3
paddd %xmm5, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm6, %xmm4
movdqu %xmm3, -48(%r14,%rax,4)
movdqu %xmm4, -32(%r14,%rax,4)
movdqa %xmm2, %xmm3
paddd %xmm7, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm0, %xmm4
movdqu %xmm3, -16(%r14,%rax,4)
movdqu %xmm4, (%r14,%rax,4)
paddd %xmm1, %xmm2
addq $40, %rax
cmpq $100000036, %rax ## imm = 0x5F5E124
jne LBB7_3
## BB#4:
movabsq $-2720673578348880933, %r12 ## imm = 0xDA3E39CB94B95BDB
movabsq $-8846114313915602277, %r15 ## imm = 0x853C49E6748FEA9B
movabsq $6364136223846793005, %r13 ## imm = 0x5851F42D4C957F2D
callq _clock
movl $100000000, %r8d ## imm = 0x5F5E100
movq %r15, %rbx
movq -80(%rbp), %r9 ## 8-byte Reload
.p2align 4, 0x90
LBB7_5: ## =>This Loop Header: Depth=1
## Child Loop BB7_6 Depth 2
movl %r8d, %edi
negl %edi
.p2align 4, 0x90
LBB7_6: ## Parent Loop BB7_5 Depth=1
## => This Inner Loop Header: Depth=2
movq %rbx, %rcx
imulq %r13, %rbx
addq %r12, %rbx
movq %rcx, %rsi
shrq $18, %rsi
xorq %rcx, %rsi
shrq $27, %rsi
shrq $59, %rcx
movl %esi, %eax
shrl %cl, %eax
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %esi
orl %eax, %esi
xorl %edx, %edx
movl %esi, %eax
divl %r8d
subl %edx, %esi
cmpl %edi, %esi
ja LBB7_6
## BB#7: ## in Loop: Header=BB7_5 Depth=1
movl %edx, -4(%r9,%r8,4)
cmpq $2, %r8
leaq -1(%r8), %r8
ja LBB7_5
## BB#8:
callq _clock
leaq L_str(%rip), %rdi
callq _puts
leaq L_str.9(%rip), %rdi
callq _puts
callq _clock
movq %rax, %xmm0
punpckldq LCPI7_11(%rip), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
subpd LCPI7_12(%rip), %xmm0
haddpd %xmm0, %xmm0
movl $100000000, %r8d ## imm = 0x5F5E100
movq %r15, %rbx
.p2align 4, 0x90
LBB7_9: ## =>This Loop Header: Depth=1
## Child Loop BB7_10 Depth 2
movl %r8d, %edi
negl %edi
.p2align 4, 0x90
LBB7_10: ## Parent Loop BB7_9 Depth=1
## => This Inner Loop Header: Depth=2
movq %rbx, %rcx
imulq %r13, %rbx
addq %r12, %rbx
movq %rcx, %rsi
shrq $18, %rsi
xorq %rcx, %rsi
shrq $27, %rsi
shrq $59, %rcx
movl %esi, %eax
shrl %cl, %eax
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %esi
orl %eax, %esi
xorl %edx, %edx
movl %esi, %eax
divl %r8d
subl %edx, %esi
cmpl %edi, %esi
ja LBB7_10
## BB#11: ## in Loop: Header=BB7_9 Depth=1
movl %edx, %eax
movl -4(%r14,%r8,4), %ecx
movl (%r14,%rax,4), %edx
movl %edx, -4(%r14,%r8,4)
movl %ecx, (%r14,%rax,4)
cmpq $2, %r8
leaq -1(%r8), %r8
ja LBB7_9
## BB#12:
divsd LCPI7_13(%rip), %xmm0
movapd %xmm0, -64(%rbp) ## 16-byte Spill
callq _clock
movq %rax, %xmm0
punpckldq LCPI7_11(%rip), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
subpd LCPI7_12(%rip), %xmm0
haddpd %xmm0, %xmm0
divsd LCPI7_13(%rip), %xmm0
subsd -64(%rbp), %xmm0 ## 16-byte Folded Reload
leaq L_.str.2(%rip), %rdi
movb $1, %al
callq _printf
callq _clock
movq %rax, %xmm1
punpckldq LCPI7_11(%rip), %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
subpd LCPI7_12(%rip), %xmm1
haddpd %xmm1, %xmm1
movl $100000001, %eax ## imm = 0x5F5E101
movq -80(%rbp), %rdi ## 8-byte Reload
.p2align 4, 0x90
LBB7_13: ## =>This Inner Loop Header: Depth=1
movslq -4(%rdi,%rax,4), %rcx
movl -8(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -8(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
movslq -8(%rdi,%rax,4), %rcx
movl -12(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -12(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
movslq -12(%rdi,%rax,4), %rcx
movl -16(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -16(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
addq $-3, %rax
cmpq $2, %rax
ja LBB7_13
## BB#14:
movsd LCPI7_13(%rip), %xmm0 ## xmm0 = mem[0],zero
divsd %xmm0, %xmm1
movapd %xmm1, -64(%rbp) ## 16-byte Spill
callq _clock
movq %rax, %xmm0
movdqa LCPI7_11(%rip), %xmm1 ## xmm1 = [1127219200,1160773632,0,0]
punpckldq %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
movapd LCPI7_12(%rip), %xmm1 ## xmm1 = [4.503600e+15,1.934281e+25]
subpd %xmm1, %xmm0
haddpd %xmm0, %xmm0
divsd LCPI7_13(%rip), %xmm0
subsd -64(%rbp), %xmm0 ## 16-byte Folded Reload
leaq L_.str.3(%rip), %rdi
movb $1, %al
callq _printf
leaq L_str.10(%rip), %rdi
callq _puts
callq _clock
movq %rax, %xmm1
punpckldq LCPI7_11(%rip), %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
subpd LCPI7_12(%rip), %xmm1
haddpd %xmm1, %xmm1
movl $100000001, %eax ## imm = 0x5F5E101
movq %r15, %rcx
movq %rcx, %rdx
.p2align 4, 0x90
LBB7_15: ## =>This Inner Loop Header: Depth=1
leaq -1(%rax), %rsi
imulq %r13, %rdx
addq %r12, %rdx
movq %rcx, %rdi
shrq $18, %rdi
xorq %rcx, %rdi
shrq $27, %rdi
shrq $59, %rcx
movl %edi, %ebx
shrl %cl, %ebx
negl %ecx
## kill: %CL<def> %CL<kill> %RCX<kill>
shll %cl, %edi
orl %ebx, %edi
imulq %rsi, %rdi
sarq $32, %rdi
movl -8(%r14,%rax,4), %ecx
movl (%r14,%rdi,4), %ebx
movl %ebx, -8(%r14,%rax,4)
movl %ecx, (%r14,%rdi,4)
cmpq $2, %rsi
movq %rsi, %rax
movq %rdx, %rcx
ja LBB7_15
## BB#16:
movsd LCPI7_13(%rip), %xmm0 ## xmm0 = mem[0],zero
divsd %xmm0, %xmm1
movapd %xmm1, -64(%rbp) ## 16-byte Spill
callq _clock
movq %rax, %xmm0
movdqa LCPI7_11(%rip), %xmm1 ## xmm1 = [1127219200,1160773632,0,0]
punpckldq %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
movapd LCPI7_12(%rip), %xmm1 ## xmm1 = [4.503600e+15,1.934281e+25]
subpd %xmm1, %xmm0
haddpd %xmm0, %xmm0
divsd LCPI7_13(%rip), %xmm0
subsd -64(%rbp), %xmm0 ## 16-byte Folded Reload
leaq L_.str.5(%rip), %rdi
movb $1, %al
callq _printf
callq _clock
movq %rax, %xmm1
punpckldq LCPI7_11(%rip), %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
subpd LCPI7_12(%rip), %xmm1
haddpd %xmm1, %xmm1
movl $100000001, %edx ## imm = 0x5F5E101
movq %r15, %rax
movq -80(%rbp), %r8 ## 8-byte Reload
.p2align 4, 0x90
LBB7_17: ## =>This Inner Loop Header: Depth=1
leaq -1(%rdx), %rsi
imulq %r13, %rax
addq %r12, %rax
movq %r15, %rdi
shrq $18, %rdi
xorq %r15, %rdi
shrq $27, %rdi
shrq $59, %r15
movl %edi, %ebx
movl %r15d, %ecx
shrl %cl, %ebx
negl %r15d
movl %r15d, %ecx
shll %cl, %edi
orl %ebx, %edi
imulq %rsi, %rdi
shrq $32, %rdi
movl %edi, -8(%r8,%rdx,4)
cmpq $2, %rsi
movq %rsi, %rdx
movq %rax, %r15
ja LBB7_17
## BB#18:
movsd LCPI7_13(%rip), %xmm0 ## xmm0 = mem[0],zero
divsd %xmm0, %xmm1
movapd %xmm1, -64(%rbp) ## 16-byte Spill
callq _clock
movq %rax, %xmm0
movdqa LCPI7_11(%rip), %xmm1 ## xmm1 = [1127219200,1160773632,0,0]
punpckldq %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
movapd LCPI7_12(%rip), %xmm1 ## xmm1 = [4.503600e+15,1.934281e+25]
subpd %xmm1, %xmm0
haddpd %xmm0, %xmm0
divsd LCPI7_13(%rip), %xmm0
subsd -64(%rbp), %xmm0 ## 16-byte Folded Reload
leaq L_.str.6(%rip), %rdi
movb $1, %al
callq _printf
callq _clock
movq -80(%rbp), %rdi ## 8-byte Reload
movq %rax, %xmm1
punpckldq LCPI7_11(%rip), %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
subpd LCPI7_12(%rip), %xmm1
haddpd %xmm1, %xmm1
movl $100000001, %eax ## imm = 0x5F5E101
.p2align 4, 0x90
LBB7_19: ## =>This Inner Loop Header: Depth=1
movslq -8(%rdi,%rax,4), %rcx
movl -8(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -8(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
movslq -12(%rdi,%rax,4), %rcx
movl -12(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -12(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
movslq -16(%rdi,%rax,4), %rcx
movl -16(%r14,%rax,4), %edx
movl (%r14,%rcx,4), %esi
movl %esi, -16(%r14,%rax,4)
movl %edx, (%r14,%rcx,4)
addq $-3, %rax
cmpq $2, %rax
ja LBB7_19
## BB#20:
movsd LCPI7_13(%rip), %xmm0 ## xmm0 = mem[0],zero
divsd %xmm0, %xmm1
movapd %xmm1, -80(%rbp) ## 16-byte Spill
callq _clock
movq %rax, %xmm0
punpckldq LCPI7_11(%rip), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
subpd LCPI7_12(%rip), %xmm0
haddpd %xmm0, %xmm0
divsd LCPI7_13(%rip), %xmm0
subsd -80(%rbp), %xmm0 ## 16-byte Folded Reload
leaq L_.str.7(%rip), %rdi
movb $1, %al
callq _printf
leaq L_str.11(%rip), %rdi
callq _puts
xorl %eax, %eax
addq $40, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
.cfi_endproc
## -- End function
.section __TEXT,__cstring,cstring_literals
L_.str.2: ## @.str.2
.asciz "java-bound PCG shuffle %f s \n"
L_.str.3: ## @.str.3
.asciz "precomp shuffle %f s \n"
L_.str.5: ## @.str.5
.asciz "biased fast PCG shuffle %f s \n"
L_.str.6: ## @.str.6
.asciz "fast comp. of indexes %f s \n"
L_.str.7: ## @.str.7
.asciz "precomp shuffle %f s \n"
.p2align 4 ## @str
L_str:
.asciz "Reproducing the Java numbers from blog post https://lemire.me/blog/2018/03/24/when-shuffling-large-arrays-how-much-time-can-be-attributed-to-random-number-generation/#comments "
.p2align 4 ## @str.9
L_str.9:
.asciz "Caveat: we use PCG instead of the LCG from Java."
.p2align 4 ## @str.10
L_str.10:
.asciz "\n\n rest is from Arseny Kapoulkine's original code: "
L_str.11: ## @str.11
.asciz "\n\n"
.subsections_via_symbols
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment