Skip to content

Instantly share code, notes, and snippets.

@jmert
Created June 15, 2020 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmert/6aea12adb74ef8b7f25eba276d42911a to your computer and use it in GitHub Desktop.
Save jmert/6aea12adb74ef8b7f25eba276d42911a to your computer and use it in GitHub Desktop.
ASM for coeff_α(int,int) compared in Julia v1.4.2 (LLVM v8), Julia v1.5-beta1 (LLVM v9), and LLVM llc vs 8/9
# Julia v1.4.2 (with LLVM v8):
.text
vcvtsi2sd xmm0, xmm0, rdi
movabs rax, 139958522833784
vcvtsi2sd xmm1, xmm1, rsi
vaddsd xmm2, xmm0, xmm0
vaddsd xmm3, xmm2, qword ptr [rax]
movabs rax, 139958522833792
vmulsd xmm4, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rax]
movabs rax, 139958522833800
vmulsd xmm1, xmm1, xmm1
vsubsd xmm1, xmm4, xmm1
vmulsd xmm1, xmm2, xmm1
vdivsd xmm1, xmm3, xmm1
vmovsd xmm3, qword ptr [rax] # xmm3 = mem[0],zero
movabs rax, 139958522833808
vaddsd xmm0, xmm0, xmm3
vmulsd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rax]
vaddsd xmm0, xmm0, xmm3
vmulsd xmm0, xmm1, xmm0
vsqrtsd xmm0, xmm0, xmm0
ret
nop
# Julia v1.5-beta1 (with LLVM v9):
# julia> @code_native syntax=:intel debuginfo=:none coeff_α(1,1)
.text
vcvtsi2sd xmm0, xmm0, rdi
movabs rax, offset .rodata.cst8
vcvtsi2sd xmm1, xmm1, rsi
vmulsd xmm4, xmm0, xmm0
vaddsd xmm2, xmm0, xmm0
vaddsd xmm3, xmm2, qword ptr [rax]
movabs rax, 140666936433288
vaddsd xmm2, xmm2, qword ptr [rax]
movabs rax, 140666936433296
vmulsd xmm1, xmm1, xmm1
vsubsd xmm1, xmm4, xmm1
vmovsd xmm4, qword ptr [rax] # xmm4 = mem[0],zero
movabs rax, 140666936433304
vmulsd xmm1, xmm2, xmm1
vaddsd xmm0, xmm0, xmm4
vmulsd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rax]
vaddsd xmm0, xmm0, xmm4
vmulsd xmm0, xmm0, xmm3
vdivsd xmm0, xmm0, xmm1
vsqrtsd xmm0, xmm0, xmm0
ret
nop
# Godbolt llc with LLVM v8 and v9
# llc -march=x86-64 -mcpu=sandybridge -O1
.LCPI0_0:
.quad 4607182418800017408 # double 1
.LCPI0_1:
.quad -4609434218613702656 # double -3
.LCPI0_2:
.quad 4616189618054758400 # double 4
"julia_coeff_α_1367": # @"julia_coeff_\CE\B1_1367"
push r14
push rbx
push rax
mov r14, rsi
mov rbx, rdi
call julia.ptls_states
vcvtsi2sd xmm0, xmm0, rbx
vcvtsi2sd xmm1, xmm1, r14
vaddsd xmm2, xmm0, xmm0
vmovsd xmm3, qword ptr [rip + .LCPI0_0] # xmm3 = mem[0],zero
vaddsd xmm4, xmm2, xmm3
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI0_1]
vmulsd xmm5, xmm0, xmm0
vmulsd xmm1, xmm1, xmm1
vsubsd xmm1, xmm5, xmm1
vmulsd xmm1, xmm2, xmm1
vsubsd xmm0, xmm0, xmm3
vmulsd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI0_2]
vdivsd xmm1, xmm4, xmm1
vsubsd xmm0, xmm0, xmm3
vmulsd xmm0, xmm1, xmm0
vsqrtsd xmm0, xmm0, xmm0
add rsp, 8
pop rbx
pop r14
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment