Created
June 15, 2020 14:06
-
-
Save jmert/6aea12adb74ef8b7f25eba276d42911a to your computer and use it in GitHub Desktop.
ASM for coeff_α(int,int) compared in Julia v1.4.2 (LLVM v8), Julia v1.5-beta1 (LLVM v9), and LLVM llc vs 8/9
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Julia v1.4.2 (with LLVM v8): | |
.text | |
vcvtsi2sd xmm0, xmm0, rdi | |
movabs rax, 139958522833784 | |
vcvtsi2sd xmm1, xmm1, rsi | |
vaddsd xmm2, xmm0, xmm0 | |
vaddsd xmm3, xmm2, qword ptr [rax] | |
movabs rax, 139958522833792 | |
vmulsd xmm4, xmm0, xmm0 | |
vaddsd xmm2, xmm2, qword ptr [rax] | |
movabs rax, 139958522833800 | |
vmulsd xmm1, xmm1, xmm1 | |
vsubsd xmm1, xmm4, xmm1 | |
vmulsd xmm1, xmm2, xmm1 | |
vdivsd xmm1, xmm3, xmm1 | |
vmovsd xmm3, qword ptr [rax] # xmm3 = mem[0],zero | |
movabs rax, 139958522833808 | |
vaddsd xmm0, xmm0, xmm3 | |
vmulsd xmm0, xmm0, xmm0 | |
vmulsd xmm0, xmm0, qword ptr [rax] | |
vaddsd xmm0, xmm0, xmm3 | |
vmulsd xmm0, xmm1, xmm0 | |
vsqrtsd xmm0, xmm0, xmm0 | |
ret | |
nop | |
# Julia v1.5-beta1 (with LLVM v9): | |
# julia> @code_native syntax=:intel debuginfo=:none coeff_α(1,1) | |
.text | |
vcvtsi2sd xmm0, xmm0, rdi | |
movabs rax, offset .rodata.cst8 | |
vcvtsi2sd xmm1, xmm1, rsi | |
vmulsd xmm4, xmm0, xmm0 | |
vaddsd xmm2, xmm0, xmm0 | |
vaddsd xmm3, xmm2, qword ptr [rax] | |
movabs rax, 140666936433288 | |
vaddsd xmm2, xmm2, qword ptr [rax] | |
movabs rax, 140666936433296 | |
vmulsd xmm1, xmm1, xmm1 | |
vsubsd xmm1, xmm4, xmm1 | |
vmovsd xmm4, qword ptr [rax] # xmm4 = mem[0],zero | |
movabs rax, 140666936433304 | |
vmulsd xmm1, xmm2, xmm1 | |
vaddsd xmm0, xmm0, xmm4 | |
vmulsd xmm0, xmm0, xmm0 | |
vmulsd xmm0, xmm0, qword ptr [rax] | |
vaddsd xmm0, xmm0, xmm4 | |
vmulsd xmm0, xmm0, xmm3 | |
vdivsd xmm0, xmm0, xmm1 | |
vsqrtsd xmm0, xmm0, xmm0 | |
ret | |
nop | |
# Godbolt llc with LLVM v8 and v9 | |
# llc -march=x86-64 -mcpu=sandybridge -O1 | |
.LCPI0_0: | |
.quad 4607182418800017408 # double 1 | |
.LCPI0_1: | |
.quad -4609434218613702656 # double -3 | |
.LCPI0_2: | |
.quad 4616189618054758400 # double 4 | |
"julia_coeff_α_1367": # @"julia_coeff_\CE\B1_1367" | |
push r14 | |
push rbx | |
push rax | |
mov r14, rsi | |
mov rbx, rdi | |
call julia.ptls_states | |
vcvtsi2sd xmm0, xmm0, rbx | |
vcvtsi2sd xmm1, xmm1, r14 | |
vaddsd xmm2, xmm0, xmm0 | |
vmovsd xmm3, qword ptr [rip + .LCPI0_0] # xmm3 = mem[0],zero | |
vaddsd xmm4, xmm2, xmm3 | |
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI0_1] | |
vmulsd xmm5, xmm0, xmm0 | |
vmulsd xmm1, xmm1, xmm1 | |
vsubsd xmm1, xmm5, xmm1 | |
vmulsd xmm1, xmm2, xmm1 | |
vsubsd xmm0, xmm0, xmm3 | |
vmulsd xmm0, xmm0, xmm0 | |
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI0_2] | |
vdivsd xmm1, xmm4, xmm1 | |
vsubsd xmm0, xmm0, xmm3 | |
vmulsd xmm0, xmm1, xmm0 | |
vsqrtsd xmm0, xmm0, xmm0 | |
add rsp, 8 | |
pop rbx | |
pop r14 | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment