Skip to content

Instantly share code, notes, and snippets.

@ffreyer
Created September 2, 2022 17:13
Show Gist options
  • Save ffreyer/d8427ec1cf4215f69efed8a30f5ec04d to your computer and use it in GitHub Desktop.
Save ffreyer/d8427ec1cf4215f69efed8a30f5ec04d to your computer and use it in GitHub Desktop.
julia> @code_native debuginfo = :none syntax = :intel reflectorApply!(x, τ, y)
.text
push rbp
mov rbp, rsp
push r15
push r14
push r13
push r12
push rbx
and rsp, -32
sub rsp, 384
vmovq qword ptr [rsp + 224], xmm0
mov qword ptr [rsp + 48], rdx
mov qword ptr [rsp + 144], rsi
mov qword ptr [rsp + 160], rdi
vpxor xmm0, xmm0, xmm0
vmovdqa ymmword ptr [rsp + 288], ymm0
mov rax, qword ptr fs:[0]
mov rdx, qword ptr [rax - 8]
mov qword ptr [rsp + 288], 8
mov rax, qword ptr [rdx]
mov qword ptr [rsp + 296], rax
lea rax, [rsp + 288]
mov qword ptr [rsp + 152], rdx
mov qword ptr [rdx], rax
mov rdi, qword ptr [rcx + 32]
mov qword ptr [rsp + 32], rcx
mov rcx, qword ptr [rcx + 24]
mov rax, rcx
mov qword ptr [rsp + 80], rcx
sub rdi, rcx
inc rdi
test rdi, rdi
jle L1813
mov rcx, qword ptr [rsp + 32]
mov rax, qword ptr [rcx + 8]
mov rdx, qword ptr [rcx + 16]
sub rdx, rax
inc rdx
mov rsi, rdx
sar rsi, 63
andn r11, rsi, rdx
mov rdx, rdi
sar rdx, 63
andn rdx, rdx, rdi
mov qword ptr [rsp + 216], rdx
mov r8, qword ptr [rcx]
mov rdi, qword ptr [r8 + 24]
mov r15, qword ptr [rsp + 48]
mov r9, qword ptr [r15]
mov rcx, qword ptr [r15 + 24]
dec rcx
imul rcx, qword ptr [r9 + 24]
mov rdx, qword ptr [rsp + 80]
lea r10, [rdx - 1]
mov qword ptr [rsp + 200], rdi
imul r10, rdi
lea rdi, [rax + r10]
dec rdi
shl rdi, 3
mov qword ptr [rsp + 96], rdi
lea r13, [r11 - 2]
cmp r11, 18
setl dil
cmp r13, 15
setne bl
and bl, dil
mov byte ptr [rsp + 23], bl
lea rdx, [r11 - 1]
mov r14, rdx
and r14, -16
and rdx, -8
lea ebx, [r11 + 2]
and ebx, 3
xor esi, esi
cmp r11, 1
mov edi, 3
cmovg rdi, rbx
vmovq xmm0, rdi
cmovle r14, rsi
cmovle rdx, rsi
mov qword ptr [rsp + 64], rdx
lea rdx, [rax - 1]
mov qword ptr [rsp + 184], rdx
shl rax, 3
lea rax, [rax + 8*r10]
add rax, 96
mov qword ptr [rsp + 88], rax
mov rax, qword ptr [r15 + 8]
lea rdx, [rax + rcx]
shl rax, 3
lea rax, [rax + 8*rcx]
mov qword ptr [rsp + 192], r9
mov rcx, qword ptr [r9]
lea rdx, [rcx + 8*rdx]
mov qword ptr [rsp + 24], rdx
lea rbx, [rcx + rax + 96]
movabs rax, offset .rodata.cst8
vpbroadcastq ymm1, qword ptr [rax]
movabs rax, offset .rodata.cst32
vmovdqa ymm2, ymmword ptr [rax]
vpbroadcastq ymm0, xmm0
vpxor ymm0, ymm0, ymm1
lea rax, [r11 - 6]
mov qword ptr [rsp + 56], rax
lea rax, [r11 - 10]
mov qword ptr [rsp + 176], rax
add r11, -14
mov qword ptr [rsp + 168], r11
vpcmpgtq ymm1, ymm2, ymm0
vpcmpeqd ymm0, ymm0, ymm0
vmovdqa ymmword ptr [rsp + 320], ymm1
vpxor ymm0, ymm1, ymm0
vmovdqa ymmword ptr [rsp + 256], ymm0
mov qword ptr [rsp + 208], r8
mov rax, qword ptr [r8]
mov qword ptr [rsp + 112], rax
mov r15d, 1
mov qword ptr [rsp + 72], r13
jmp L632
nop dword ptr [rax + rax]
L560:
mov rcx, qword ptr [rsp + 24]
vmovapd ymm3, ymmword ptr [rsp + 256]
vmaskmovpd ymm0, ymm3, ymmword ptr [rcx + 8*r14]
vmaskmovpd ymm1, ymm3, ymmword ptr [rax + 8*r14]
vbroadcastsd ymm2, xmm7
vfnmadd213pd ymm2, ymm0, ymm1 # ymm2 = -(ymm0 * ymm2) + ymm1
vmaskmovpd ymmword ptr [rax + 8*r14], ymm3, ymm2
L602:
mov rcx, qword ptr [rsp + 248]
lea r15, [rcx + 1]
add rsi, 8
cmp rcx, qword ptr [rsp + 216]
je L1813
L632:
mov qword ptr [rsp + 120], rsi
mov rax, qword ptr [rsp + 80]
lea rcx, [r15 + rax]
add rcx, -2
imul rcx, qword ptr [rsp + 200]
add rcx, qword ptr [rsp + 184]
mov rax, qword ptr [rsp + 112]
mov qword ptr [rsp + 40], rcx
vmovsd xmm0, qword ptr [rax + 8*rcx] # xmm0 = mem[0],zero
vmovsd qword ptr [rsp + 128], xmm0
mov rax, qword ptr [rsp + 192]
mov qword ptr [rsp + 312], rax
mov r13, qword ptr [rsp + 208]
mov qword ptr [rsp + 304], r13
movabs r12, 140286483749744
mov rdi, r12
mov rsi, qword ptr [rsp + 48]
movabs rax, offset StrideIndex
vzeroupper
call rax
mov r13, qword ptr [r13]
mov rax, qword ptr [rsp + 96]
add rax, r13
mov qword ptr [rsp + 104], rax
mov rdi, r12
mov rsi, qword ptr [rsp + 32]
movabs rax, offset StrideIndex
call rax
mov qword ptr [rsp + 248], r15
lea rsi, [8*r15 - 8]
mov rcx, rsi
imul rcx, rax
cmp byte ptr [rsp + 23], 0
mov qword ptr [rsp + 240], r13
je L880
vxorpd xmm0, xmm0, xmm0
xor eax, eax
vpxor xmm1, xmm1, xmm1
mov rdx, qword ptr [rsp + 64]
vmovsd xmm5, qword ptr [rsp + 128] # xmm5 = mem[0],zero
mov rdi, qword ptr [rsp + 104]
jmp L1017
nop word ptr cs:[rax + rax]
L880:
mov rdx, qword ptr [rsp + 88]
add rdx, r13
imul rax, qword ptr [rsp + 120]
add rax, rdx
vpxor xmm1, xmm1, xmm1
vxorpd xmm0, xmm0, xmm0
vpxor xmm2, xmm2, xmm2
vxorpd xmm3, xmm3, xmm3
xor edx, edx
nop word ptr cs:[rax + rax]
L928:
vmovupd ymm4, ymmword ptr [rbx + 8*rdx - 96]
vmovupd ymm5, ymmword ptr [rbx + 8*rdx - 64]
vmovupd ymm6, ymmword ptr [rbx + 8*rdx - 32]
vmovupd ymm7, ymmword ptr [rbx + 8*rdx]
vfmadd231pd ymm3, ymm4, ymmword ptr [rax + 8*rdx - 96] # ymm3 = (ymm4 * mem) + ymm3
vfmadd231pd ymm2, ymm5, ymmword ptr [rax + 8*rdx - 64] # ymm2 = (ymm5 * mem) + ymm2
vfmadd231pd ymm0, ymm6, ymmword ptr [rax + 8*rdx - 32] # ymm0 = (ymm6 * mem) + ymm0
vfmadd231pd ymm1, ymm7, ymmword ptr [rax + 8*rdx] # ymm1 = (ymm7 * mem) + ymm1
add rdx, 16
cmp r14, rdx
jne L928
vaddpd ymm0, ymm3, ymm0
vaddpd ymm1, ymm2, ymm1
mov rax, r14
mov rdx, qword ptr [rsp + 64]
vmovsd xmm5, qword ptr [rsp + 128] # xmm5 = mem[0],zero
mov rdi, qword ptr [rsp + 104]
L1017:
add rcx, rdi
add rcx, 8
cmp rax, rdx
jne L1040
mov rax, rdx
mov r13, qword ptr [rsp + 72]
jmp L1078
nop
L1040:
mov rdx, qword ptr [rsp + 24]
vmovupd ymm2, ymmword ptr [rdx + 8*rax]
vmovupd ymm3, ymmword ptr [rdx + 8*rax + 32]
vfmadd231pd ymm0, ymm2, ymmword ptr [rcx + 8*rax] # ymm0 = (ymm2 * mem) + ymm0
vfmadd231pd ymm1, ymm3, ymmword ptr [rcx + 8*rax + 32] # ymm1 = (ymm3 * mem) + ymm1
or rax, 8
mov r13, qword ptr [rsp + 72]
L1078:
cmp rax, r13
mov qword ptr [rsp + 232], rsi
jle L1104
mov rcx, qword ptr [rsp + 40]
jmp L1232
nop dword ptr [rax]
L1104:
cmp qword ptr [rsp + 56], rax
jge L1168
mov rdx, qword ptr [rsp + 24]
vmovapd ymm3, ymmword ptr [rsp + 256]
vmaskmovpd ymm2, ymm3, ymmword ptr [rdx + 8*rax]
vmaskmovpd ymm3, ymm3, ymmword ptr [rcx + 8*rax]
vfmadd213pd ymm3, ymm2, ymm0 # ymm3 = (ymm2 * ymm3) + ymm0
vmovapd ymm2, ymmword ptr [rsp + 320]
vblendvpd ymm0, ymm3, ymm0, ymm2
mov rcx, qword ptr [rsp + 40]
jmp L1232
nop dword ptr [rax]
L1168:
mov rdx, qword ptr [rsp + 24]
vmovapd ymm3, ymmword ptr [rsp + 256]
vmaskmovpd ymm2, ymm3, ymmword ptr [rdx + 8*rax + 32]
vmaskmovpd ymm3, ymm3, ymmword ptr [rcx + 8*rax + 32]
vmovupd ymm4, ymmword ptr [rdx + 8*rax]
vfmadd231pd ymm0, ymm4, ymmword ptr [rcx + 8*rax] # ymm0 = (ymm4 * mem) + ymm0
vfmadd213pd ymm3, ymm2, ymm1 # ymm3 = (ymm2 * ymm3) + ymm1
vmovapd ymm2, ymmword ptr [rsp + 320]
vblendvpd ymm1, ymm3, ymm1, ymm2
mov rcx, qword ptr [rsp + 40]
L1232:
vaddpd ymm0, ymm0, ymm1
vextractf128 xmm1, ymm0, 1
vaddpd xmm0, xmm0, xmm1
vpermilpd xmm1, xmm0, 1 # xmm1 = xmm0[1,0]
vaddsd xmm0, xmm0, xmm1
vaddsd xmm0, xmm5, xmm0
vmulsd xmm1, xmm0, qword ptr [rsp + 224]
vmovapd xmmword ptr [rsp + 128], xmm1
mov rax, qword ptr [rsp + 112]
vmovsd xmm0, qword ptr [rax + 8*rcx] # xmm0 = mem[0],zero
vsubsd xmm0, xmm0, xmm1
vmovsd qword ptr [rax + 8*rcx], xmm0
movabs r12, 140286483749744
mov rdi, r12
mov rsi, qword ptr [rsp + 32]
movabs rax, offset StrideIndex
vzeroupper
call rax
mov r15, rax
mov rdi, r12
mov rsi, qword ptr [rsp + 48]
movabs rax, offset StrideIndex
call rax
vmovapd xmm7, xmmword ptr [rsp + 128]
test r14, r14
mov rsi, qword ptr [rsp + 120]
mov rdx, qword ptr [rsp + 240]
je L1490
vbroadcastsd ymm0, xmm7
mov rax, qword ptr [rsp + 88]
lea rcx, [rdx + rax]
mov rax, r15
imul rax, rsi
add rax, rcx
xor ecx, ecx
nop
L1408:
vmovupd ymm1, ymmword ptr [rbx + 8*rcx - 96]
vmovupd ymm2, ymmword ptr [rbx + 8*rcx - 64]
vmovupd ymm3, ymmword ptr [rbx + 8*rcx - 32]
vmovupd ymm4, ymmword ptr [rbx + 8*rcx]
vfnmadd213pd ymm1, ymm0, ymmword ptr [rax + 8*rcx - 96] # ymm1 = -(ymm0 * ymm1) + mem
vfnmadd213pd ymm2, ymm0, ymmword ptr [rax + 8*rcx - 64] # ymm2 = -(ymm0 * ymm2) + mem
vfnmadd213pd ymm3, ymm0, ymmword ptr [rax + 8*rcx - 32] # ymm3 = -(ymm0 * ymm3) + mem
vfnmadd213pd ymm4, ymm0, ymmword ptr [rax + 8*rcx] # ymm4 = -(ymm0 * ymm4) + mem
vmovupd ymmword ptr [rax + 8*rcx - 96], ymm1
vmovupd ymmword ptr [rax + 8*rcx - 64], ymm2
vmovupd ymmword ptr [rax + 8*rcx - 32], ymm3
vmovupd ymmword ptr [rax + 8*rcx], ymm4
add rcx, 16
cmp r14, rcx
jne L1408
L1490:
cmp r14, r13
jg L602
mov rcx, qword ptr [rsp + 232]
imul rcx, r15
mov rax, qword ptr [rsp + 96]
add rax, rdx
add rax, 8
add rax, rcx
cmp qword ptr [rsp + 56], r14
jl L560
cmp qword ptr [rsp + 176], r14
jge L1615
mov rcx, qword ptr [rsp + 24]
vmovapd ymm4, ymmword ptr [rsp + 256]
vmaskmovpd ymm0, ymm4, ymmword ptr [rcx + 8*r14 + 32]
vmovupd ymm1, ymmword ptr [rcx + 8*r14]
vmaskmovpd ymm2, ymm4, ymmword ptr [rax + 8*r14 + 32]
vbroadcastsd ymm3, xmm7
vfnmadd213pd ymm1, ymm3, ymmword ptr [rax + 8*r14] # ymm1 = -(ymm3 * ymm1) + mem
vfnmadd231pd ymm2, ymm3, ymm0 # ymm2 = -(ymm3 * ymm0) + ymm2
vmovupd ymmword ptr [rax + 8*r14], ymm1
vmaskmovpd ymmword ptr [rax + 8*r14 + 32], ymm4, ymm2
jmp L602
L1615:
mov rcx, qword ptr [rsp + 24]
vmovupd ymm1, ymmword ptr [rcx + 8*r14]
vmovupd ymm0, ymmword ptr [rcx + 8*r14 + 32]
cmp qword ptr [rsp + 168], r14
jge L1720
lea rcx, [rcx + 8*r14]
add rcx, 64
vmovapd ymm5, ymmword ptr [rsp + 256]
vmaskmovpd ymm2, ymm5, ymmword ptr [rcx]
vmaskmovpd ymm3, ymm5, ymmword ptr [rax + 8*r14 + 64]
vbroadcastsd ymm4, xmm7
vfnmadd213pd ymm1, ymm4, ymmword ptr [rax + 8*r14] # ymm1 = -(ymm4 * ymm1) + mem
vfnmadd213pd ymm0, ymm4, ymmword ptr [rax + 8*r14 + 32] # ymm0 = -(ymm4 * ymm0) + mem
vfnmadd231pd ymm3, ymm4, ymm2 # ymm3 = -(ymm4 * ymm2) + ymm3
vmovupd ymmword ptr [rax + 8*r14], ymm1
vmovupd ymmword ptr [rax + 8*r14 + 32], ymm0
vmaskmovpd ymmword ptr [rax + 8*r14 + 64], ymm5, ymm3
jmp L602
L1720:
lea rcx, [rcx + 8*r14]
vmovapd ymm6, ymmword ptr [rsp + 256]
vmaskmovpd ymm2, ymm6, ymmword ptr [rcx + 96]
vmaskmovpd ymm3, ymm6, ymmword ptr [rax + 8*r14 + 96]
vbroadcastsd ymm4, xmm7
vfnmadd213pd ymm1, ymm4, ymmword ptr [rax + 8*r14] # ymm1 = -(ymm4 * ymm1) + mem
vfnmadd213pd ymm0, ymm4, ymmword ptr [rax + 8*r14 + 32] # ymm0 = -(ymm4 * ymm0) + mem
vmovupd ymm5, ymmword ptr [rcx + 64]
vfnmadd213pd ymm5, ymm4, ymmword ptr [rax + 8*r14 + 64] # ymm5 = -(ymm4 * ymm5) + mem
vfnmadd231pd ymm3, ymm4, ymm2 # ymm3 = -(ymm4 * ymm2) + ymm3
vmovupd ymmword ptr [rax + 8*r14], ymm1
vmovupd ymmword ptr [rax + 8*r14 + 32], ymm0
vmovupd ymmword ptr [rax + 8*r14 + 64], ymm5
vmaskmovpd ymmword ptr [rax + 8*r14 + 96], ymm6, ymm3
jmp L602
L1813:
mov rcx, qword ptr [rsp + 32]
mov rax, qword ptr [rcx]
mov rdx, qword ptr [rsp + 144]
mov qword ptr [rdx], rax
vmovups ymm0, ymmword ptr [rcx]
vmovups ymm1, ymmword ptr [rcx + 24]
mov rax, qword ptr [rsp + 160]
vmovups ymmword ptr [rax + 24], ymm1
vmovups ymmword ptr [rax], ymm0
mov rcx, qword ptr [rsp + 296]
mov rdx, qword ptr [rsp + 152]
mov qword ptr [rdx], rcx
lea rsp, [rbp - 40]
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
nop word ptr [rax + rax]
julia> @code_typed reflectorApply!(x, τ, y)
CodeInfo(
1 ─── %1 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %2 = Core.getfield(%1, 1)::UnitRange{Int64}
│ %3 = Core.getfield(%1, 2)::UnitRange{Int64}
│ %4 = Base.getfield(%2, :stop)::Int64
│ %5 = Base.getfield(%2, :start)::Int64
│ %6 = Base.sub_int(%4, %5)::Int64
│ %7 = Base.add_int(%6, 1)::Int64
│ %8 = Base.slt_int(%7, 0)::Bool
│ %9 = Base.ifelse(%8, 0, %7)::Int64
│ %10 = Base.getfield(%3, :stop)::Int64
│ %11 = Base.getfield(%3, :start)::Int64
│ %12 = Base.sub_int(%10, %11)::Int64
│ %13 = Base.add_int(%12, 1)::Int64
│ %14 = Base.slt_int(%13, 0)::Bool
│ %15 = Base.ifelse(%14, 0, %13)::Int64
│ %16 = Base.sle_int(1, %15)::Bool
│ %17 = Base.ifelse(%16, %15, 0)::Int64
│ %18 = Base.slt_int(%17, 1)::Bool
└──── goto #3 if not %18
2 ─── Base.nothing::Nothing
└──── goto #4
3 ─── goto #4
4 ┄── %23 = φ (#2 => true, #3 => false)::Bool
│ %24 = φ (#3 => 1)::Int64
│ %25 = φ (#3 => 1)::Int64
│ %26 = Base.not_int(%23)::Bool
└──── goto #151 if not %26
5 ┄── %28 = φ (#4 => %24, #150 => %682)::Int64
│ %29 = φ (#4 => %25, #150 => %683)::Int64
└──── goto #10 if not false
6 ─── %31 = Core.tuple(1, %28)::Tuple{Int64, Int64}
│ %32 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %33 = Core.getfield(%32, 1)::UnitRange{Int64}
│ %34 = Core.getfield(%32, 2)::UnitRange{Int64}
│ %35 = Base.getfield(%33, :stop)::Int64
│ %36 = Base.getfield(%33, :start)::Int64
│ %37 = Base.sub_int(%35, %36)::Int64
│ %38 = Base.add_int(%37, 1)::Int64
│ %39 = Base.slt_int(%38, 0)::Bool
│ %40 = Base.ifelse(%39, 0, %38)::Int64
│ %41 = Base.getfield(%34, :stop)::Int64
│ %42 = Base.getfield(%34, :start)::Int64
│ %43 = Base.sub_int(%41, %42)::Int64
│ %44 = Base.add_int(%43, 1)::Int64
│ %45 = Base.slt_int(%44, 0)::Bool
│ %46 = Base.ifelse(%45, 0, %44)::Int64
│ %47 = Base.sle_int(1, 1)::Bool
│ %48 = Base.sle_int(1, %40)::Bool
│ %49 = Base.and_int(%47, %48)::Bool
│ %50 = Base.sle_int(1, %28)::Bool
│ %51 = Base.sle_int(%28, %46)::Bool
│ %52 = Base.and_int(%50, %51)::Bool
│ %53 = Base.and_int(%52, true)::Bool
│ %54 = Base.and_int(%49, %53)::Bool
└──── goto #8 if not %54
7 ─── Base.nothing::Nothing
└──── goto #9
8 ─── invoke Base.throw_boundserror(A::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}, %31::Tuple{Int64, Int64})::Union{}
└──── unreachable
9 ─── nothing::Nothing
10 ┄─ %61 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %62 = Base.getfield(%61, 1, false)::UnitRange{Int64}
│ %63 = Base.getfield(%62, :start)::Int64
│ %64 = Base.sub_int(1, 1)::Int64
│ %65 = Base.add_int(%63, %64)::Int64
└──── goto #19 if not false
11 ── %67 = Base.slt_int(0, 1)::Bool
└──── goto #15 if not %67
12 ── %69 = Base.getfield(%62, :stop)::Int64
│ %70 = Base.sle_int(%65, %69)::Bool
└──── goto #14 if not %70
13 ── %72 = Base.getfield(%62, :start)::Int64
│ %73 = Base.sle_int(%72, %65)::Bool
└──── goto #16
14 ── goto #16
15 ── goto #16
16 ┄─ %77 = φ (#13 => %73, #14 => false, #15 => false)::Bool
└──── goto #18 if not %77
17 ── goto #19
18 ── invoke Base.throw_boundserror(%62::UnitRange{Int64}, 1::Int64)::Union{}
└──── unreachable
19 ┄─ goto #20
20 ── %83 = Core.getfield(%61, 2)::UnitRange{Int64}
│ %84 = Base.getfield(%83, :start)::Int64
│ %85 = Base.sub_int(%28, 1)::Int64
│ %86 = Base.add_int(%84, %85)::Int64
└──── goto #29 if not false
21 ── %88 = Base.slt_int(0, %28)::Bool
└──── goto #25 if not %88
22 ── %90 = Base.getfield(%83, :stop)::Int64
│ %91 = Base.sle_int(%86, %90)::Bool
└──── goto #24 if not %91
23 ── %93 = Base.getfield(%83, :start)::Int64
│ %94 = Base.sle_int(%93, %86)::Bool
└──── goto #26
24 ── goto #26
25 ── goto #26
26 ┄─ %98 = φ (#23 => %94, #24 => false, #25 => false)::Bool
└──── goto #28 if not %98
27 ── goto #29
28 ── invoke Base.throw_boundserror(%83::UnitRange{Int64}, %28::Int64)::Union{}
└──── unreachable
29 ┄─ goto #30
30 ── goto #31
31 ── goto #32
32 ── %106 = Base.getfield(A, :parent)::Matrix{Float64}
│ %107 = Base.arrayref(false, %106, %65, %86)::Float64
└──── goto #33
33 ── %109 = Base.slt_int(%9, 2)::Bool
└──── goto #35 if not %109
34 ── goto #36
35 ── goto #36
36 ┄─ goto #37
37 ── goto #38
38 ── %115 = Base.getfield(x, :parent)::Matrix{Float64}
│ %116 = $(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(%115)))::Ptr{Float64}
│ %117 = Base.getfield(x, :parent)::Matrix{Float64}
│ %118 = Base.getfield(x, :indices)::Tuple{UnitRange{Int64}, Int64}
│ %119 = LayoutPointers.getfield(%118, 1, false)::UnitRange{Int64}
│ %120 = Base.getfield(%119, :start)::Int64
│ %121 = Base.sub_int(%120, 1)::Int64
│ %122 = Core.getfield(%118, 2)::Int64
│ %123 = Base.sub_int(%122, 1)::Int64
│ %124 = Base.arraysize(%117, 1)::Int64
│ Base.arraysize(%117, 2)::Int64
│ %126 = Base.mul_int(1, %124)::Int64
│ %127 = Base.mul_int(%121, 1)::Int64
│ %128 = Base.mul_int(%123, %126)::Int64
│ %129 = Base.add_int(%127, %128)::Int64
│ %130 = Base.mul_int(8, %129)::Int64
│ %131 = Core.bitcast(Core.UInt, %116)::UInt64
│ %132 = Base.bitcast(UInt64, %130)::UInt64
│ %133 = Base.add_ptr(%131, %132)::UInt64
│ %134 = Core.bitcast(Ptr{Float64}, %133)::Ptr{Float64}
│ invoke LayoutPointers.StrideIndex(x::SubArray{Float64, 1, Matrix{Float64}, Tuple{UnitRange{Int64}, Int64}, true})::ArrayInterface.StrideIndex{1, (1,), 1, Tuple{Static.StaticInt{1}}, Tuple{Static.StaticInt{1}}}
│ %136 = Base.getfield(A, :parent)::Matrix{Float64}
│ %137 = $(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(%136)))::Ptr{Float64}
│ %138 = Base.getfield(A, :parent)::Matrix{Float64}
│ %139 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %140 = LayoutPointers.getfield(%139, 1, false)::UnitRange{Int64}
│ %141 = Base.getfield(%140, :start)::Int64
│ %142 = Base.sub_int(%141, 1)::Int64
│ %143 = Core.getfield(%139, 2)::UnitRange{Int64}
│ %144 = Base.getfield(%143, :start)::Int64
│ %145 = Base.sub_int(%144, 1)::Int64
│ %146 = Base.arraysize(%138, 1)::Int64
│ Base.arraysize(%138, 2)::Int64
│ %148 = Base.mul_int(1, %146)::Int64
│ %149 = Base.mul_int(%142, 1)::Int64
│ %150 = Base.mul_int(%145, %148)::Int64
│ %151 = Base.add_int(%149, %150)::Int64
│ %152 = Base.mul_int(8, %151)::Int64
│ %153 = Core.bitcast(Core.UInt, %137)::UInt64
│ %154 = Base.bitcast(UInt64, %152)::UInt64
│ %155 = Base.add_ptr(%153, %154)::UInt64
│ %156 = Core.bitcast(Ptr{Float64}, %155)::Ptr{Float64}
│ %157 = invoke LayoutPointers.StrideIndex(A::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false})::ArrayInterface.StrideIndex{2, (1, 2), 1, Tuple{Static.StaticInt{1}, Int64}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}}
│ %158 = Base.getfield(%157, :strides)::Tuple{Static.StaticInt{1}, Int64}
│ %159 = Core.getfield(%158, 2)::Int64
│ %160 = Base.mul_int(8, %159)::Int64
│ %161 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %28, 1)::Int64
│ %162 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %161, %160)::Int64
│ %163 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = ptrtoint i8* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %156, %162)::Ptr{Float64}
│ %164 = Base.llvmcall((" \n\n define i64 @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}}, %134)::Ptr{Float64}
│ %165 = Base.llvmcall((" \n\n define i64 @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}}, %163)::Ptr{Float64}
│ %166 = $(Expr(:gc_preserve_begin, :(%115), :(%136)))
│ %167 = Base.sub_int(%9, 2)::Int64
│ %168 = Base.slt_int(%167, 0)::Bool
└──── goto #40 if not %168
39 ── goto #41
40 ── %171 = Base.sub_int(%167, 0)::Int64
│ %172 = Base.add_int(%171, 1)::Int64
└──── goto #41
41 ┄─ %174 = φ (#39 => 0, #40 => %172)::Int64
└──── goto #42
42 ── goto #43
43 ── %177 = Base.slt_int(0, %167)::Bool
│ %178 = (0 === %167)::Bool
│ %179 = Base.or_int(%177, %178)::Bool
│ Base.llvmcall((" declare void @llvm.assume(i1)\n\n define void @entry(i8) alwaysinline {\n top:\n %b = trunc i8 %0 to i1\ncall void @llvm.assume(i1 %b)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Bool}, %179)::Nothing
│ %181 = Base.slt_int(15, %167)::Bool
│ %182 = (15 === %167)::Bool
│ %183 = Base.or_int(%181, %182)::Bool
└──── goto #52 if not %183
44 ── %185 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
│ %186 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
│ %187 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
└──── %188 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
45 ┄─ %189 = φ (#44 => 0, #50 => %214)::Int64
│ %190 = φ (#44 => true, #50 => %224)::Bool
│ %191 = φ (#44 => %185, #50 => %210)::NTuple{4, VecElement{Float64}}
│ %192 = φ (#44 => %186, #50 => %211)::NTuple{4, VecElement{Float64}}
│ %193 = φ (#44 => %187, #50 => %212)::NTuple{4, VecElement{Float64}}
│ %194 = φ (#44 => %188, #50 => %213)::NTuple{4, VecElement{Float64}}
│ %195 = φ (#44 => %185, #50 => %210)::NTuple{4, VecElement{Float64}}
│ %196 = φ (#44 => %187, #50 => %212)::NTuple{4, VecElement{Float64}}
│ %197 = φ (#44 => %186, #50 => %211)::NTuple{4, VecElement{Float64}}
│ %198 = φ (#44 => %188, #50 => %213)::NTuple{4, VecElement{Float64}}
└──── goto #51 if not %190
46 ── %200 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %164, %189)::Ptr{Float64}
│ %201 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %200)::NTuple{4, VecElement{Float64}}
│ %202 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %200)::NTuple{4, VecElement{Float64}}
│ %203 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %200)::NTuple{4, VecElement{Float64}}
│ %204 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %200)::NTuple{4, VecElement{Float64}}
│ %205 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %165, %189)::Ptr{Float64}
│ %206 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %205)::NTuple{4, VecElement{Float64}}
│ %207 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %205)::NTuple{4, VecElement{Float64}}
│ %208 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %205)::NTuple{4, VecElement{Float64}}
│ %209 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %205)::NTuple{4, VecElement{Float64}}
│ %210 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %201, %206, %191)::NTuple{4, VecElement{Float64}}
│ %211 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %202, %207, %192)::NTuple{4, VecElement{Float64}}
│ %212 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %203, %208, %193)::NTuple{4, VecElement{Float64}}
│ %213 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %204, %209, %194)::NTuple{4, VecElement{Float64}}
│ %214 = Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 16, %189)::Int64
│ %215 = Base.slt_int(%167, 0)::Bool
└──── goto #48 if not %215
47 ── goto #49
48 ── %218 = Base.sub_int(%167, 0)::Int64
│ %219 = Base.add_int(%218, 1)::Int64
└──── goto #49
49 ┄─ %221 = φ (#47 => 0, #48 => %219)::Int64
│ %222 = Base.and_int(%221, -16)::Int64
│ %223 = (%214 === %222)::Bool
│ %224 = Base.not_int(%223)::Bool
└──── goto #50
50 ── goto #45
51 ── %227 = Base.llvmcall("%res = fadd nsz contract <4 x double> %0, %1\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %195, %196)::NTuple{4, VecElement{Float64}}
│ %228 = Base.llvmcall("%res = fadd nsz contract <4 x double> %0, %1\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %197, %198)::NTuple{4, VecElement{Float64}}
└──── goto #53
52 ── %230 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
└──── %231 = Base.llvmcall("ret <4 x double> zeroinitializer", NTuple{4, VecElement{Float64}}, Tuple{})::NTuple{4, VecElement{Float64}}
53 ┄─ %232 = φ (#51 => %189, #52 => 0)::Int64
│ %233 = φ (#51 => %227, #52 => %230)::NTuple{4, VecElement{Float64}}
│ %234 = φ (#51 => %228, #52 => %231)::NTuple{4, VecElement{Float64}}
│ %235 = φ (#51 => %227, #52 => %230)::NTuple{4, VecElement{Float64}}
│ %236 = φ (#51 => %227, #52 => %230)::NTuple{4, VecElement{Float64}}
│ %237 = φ (#51 => %227, #52 => %230)::NTuple{4, VecElement{Float64}}
│ %238 = φ (#51 => %228, #52 => %231)::NTuple{4, VecElement{Float64}}
│ %239 = φ (#51 => %228, #52 => %231)::NTuple{4, VecElement{Float64}}
│ %240 = φ (#51 => %227, #52 => %230)::NTuple{4, VecElement{Float64}}
│ %241 = φ (#51 => %228, #52 => %231)::NTuple{4, VecElement{Float64}}
│ %242 = φ (#51 => %228, #52 => %231)::NTuple{4, VecElement{Float64}}
│ %243 = Base.slt_int(%167, 0)::Bool
└──── goto #55 if not %243
54 ── goto #56
55 ── %246 = Base.sub_int(%167, 0)::Int64
│ %247 = Base.add_int(%246, 1)::Int64
└──── goto #56
56 ┄─ %249 = φ (#54 => 0, #55 => %247)::Int64
│ %250 = Base.and_int(%249, -8)::Int64
│ %251 = (%232 === %250)::Bool
│ %252 = Base.not_int(%251)::Bool
└──── goto #57
57 ── goto #59 if not %252
58 ── %255 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %164, %232)::Ptr{Float64}
│ %256 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %255)::NTuple{4, VecElement{Float64}}
│ %257 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %255)::NTuple{4, VecElement{Float64}}
│ %258 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %165, %232)::Ptr{Float64}
│ %259 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %258)::NTuple{4, VecElement{Float64}}
│ %260 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %258)::NTuple{4, VecElement{Float64}}
│ %261 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %256, %259, %233)::NTuple{4, VecElement{Float64}}
│ %262 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %257, %260, %234)::NTuple{4, VecElement{Float64}}
└──── %263 = Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %232)::Int64
59 ┄─ %264 = φ (#58 => %261, #57 => %235)::NTuple{4, VecElement{Float64}}
│ %265 = φ (#58 => %261, #57 => %236)::NTuple{4, VecElement{Float64}}
│ %266 = φ (#58 => %261, #57 => %237)::NTuple{4, VecElement{Float64}}
│ %267 = φ (#58 => %262, #57 => %238)::NTuple{4, VecElement{Float64}}
│ %268 = φ (#58 => %262, #57 => %239)::NTuple{4, VecElement{Float64}}
│ %269 = φ (#58 => %261, #57 => %240)::NTuple{4, VecElement{Float64}}
│ %270 = φ (#58 => %262, #57 => %241)::NTuple{4, VecElement{Float64}}
│ %271 = φ (#58 => %262, #57 => %242)::NTuple{4, VecElement{Float64}}
│ %272 = φ (#58 => %263, #57 => %232)::Int64
│ %273 = Base.sle_int(%272, %167)::Bool
└──── goto #63 if not %273
60 ── %275 = Base.bitcast(UInt64, %174)::UInt64
│ %276 = Base.llvmcall("%res = sub nsw nuw i64 %0, %1\nret i64 %res", UInt64, Tuple{UInt64, UInt64}, %275, 0x0000000000000001)::UInt64
│ %277 = Base.and_int(%276, 0x0000000000000003)::UInt64
│ %278 = Base.llvmcall(" %ie = insertelement <4 x i64> undef, i64 %0, i32 0\n %v = shufflevector <4 x i64> %ie, <4 x i64> undef, <4 x i32> zeroinitializer\n ret <4 x i64> %v\n", NTuple{4, VecElement{UInt64}}, Tuple{UInt64}, %277)::NTuple{4, VecElement{UInt64}}
│ %279 = Base.llvmcall(" %ie = insertelement <4 x i64> undef, i64 %0, i32 0\n %v = shufflevector <4 x i64> %ie, <4 x i64> undef, <4 x i32> zeroinitializer\n %res = add nsw <4 x i64> %v, <i64 0, i64 1, i64 2, i64 3>\n ret <4 x i64> %res\n", NTuple{4, VecElement{UInt64}}, Tuple{UInt64}, 0x0000000000000000)::NTuple{4, VecElement{UInt64}}
│ %280 = Base.llvmcall("%m = icmp uge <4 x i64> %0, %1\n%restrunc.0 = bitcast <4 x i1> %m to i4\n%res.0 = zext i4 %restrunc.0 to i8\nret i8 %res.0", VectorizationBase.UInt8, Tuple{NTuple{4, VecElement{UInt64}}, NTuple{4, VecElement{UInt64}}}, %278, %279)::UInt8
│ Base.llvmcall("%res = add nsw nuw i64 %0, %1\nret i64 %res", UInt64, Tuple{UInt64, UInt64}, %277, 0x0000000000000001)::UInt64
│ %282 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %167, 4)::Int64
│ %283 = Base.slt_int(%282, %272)::Bool
└──── goto #62 if not %283
61 ── %285 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %272)::Int64
│ %286 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = bitcast i8* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, Int64, UInt8}, %164, %285, %280)::NTuple{4, VecElement{Float64}}
│ %287 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %272)::Int64
│ %288 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = bitcast i8* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, Int64, UInt8}, %165, %287, %280)::NTuple{4, VecElement{Float64}}
│ %289 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %286, %288, %264)::NTuple{4, VecElement{Float64}}
│ %290 = Base.llvmcall("%masktrunc.0 = trunc i8 %0 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = select nsz arcp contract reassoc <4 x i1> %mask.0, <4 x double> %1, <4 x double> %2\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{UInt8, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %280, %289, %265)::NTuple{4, VecElement{Float64}}
│ Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 4, %272)::Int64
└──── goto #63
62 ── %293 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %164, %272)::Ptr{Float64}
│ %294 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %293)::NTuple{4, VecElement{Float64}}
│ %295 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %293, %280)::NTuple{4, VecElement{Float64}}
│ %296 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %165, %272)::Ptr{Float64}
│ %297 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %296)::NTuple{4, VecElement{Float64}}
│ %298 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %296, %280)::NTuple{4, VecElement{Float64}}
│ %299 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %294, %297, %266)::NTuple{4, VecElement{Float64}}
│ %300 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %295, %298, %267)::NTuple{4, VecElement{Float64}}
│ %301 = Base.llvmcall("%masktrunc.0 = trunc i8 %0 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = select nsz arcp contract reassoc <4 x i1> %mask.0, <4 x double> %1, <4 x double> %2\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{UInt8, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %280, %300, %268)::NTuple{4, VecElement{Float64}}
└──── Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %272)::Int64
63 ┄─ %303 = φ (#61 => %290, #62 => %299, #59 => %269)::NTuple{4, VecElement{Float64}}
│ %304 = φ (#61 => %271, #62 => %301, #59 => %270)::NTuple{4, VecElement{Float64}}
│ %305 = Base.llvmcall("%res = fadd nsz contract <4 x double> %0, %1\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %303, %304)::NTuple{4, VecElement{Float64}}
└──── goto #64
64 ── $(Expr(:gc_preserve_end, :(%166)))
│ %308 = Base.llvmcall((" declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)\n\n define double @entry(double, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc double @llvm.vector.reduce.fadd.v4f64(double %0, <4 x double> %1)\nret double %res\n }\n", "entry"), VectorizationBase.Float64, Tuple{Float64, NTuple{4, VecElement{Float64}}}, %107, %305)::Float64
│ %309 = Base.mul_float(τ, %308)::Float64
└──── goto #69 if not false
65 ── %311 = Core.tuple(1, %28)::Tuple{Int64, Int64}
│ %312 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %313 = Core.getfield(%312, 1)::UnitRange{Int64}
│ %314 = Core.getfield(%312, 2)::UnitRange{Int64}
│ %315 = Base.getfield(%313, :stop)::Int64
│ %316 = Base.getfield(%313, :start)::Int64
│ %317 = Base.sub_int(%315, %316)::Int64
│ %318 = Base.add_int(%317, 1)::Int64
│ %319 = Base.slt_int(%318, 0)::Bool
│ %320 = Base.ifelse(%319, 0, %318)::Int64
│ %321 = Base.getfield(%314, :stop)::Int64
│ %322 = Base.getfield(%314, :start)::Int64
│ %323 = Base.sub_int(%321, %322)::Int64
│ %324 = Base.add_int(%323, 1)::Int64
│ %325 = Base.slt_int(%324, 0)::Bool
│ %326 = Base.ifelse(%325, 0, %324)::Int64
│ %327 = Base.sle_int(1, 1)::Bool
│ %328 = Base.sle_int(1, %320)::Bool
│ %329 = Base.and_int(%327, %328)::Bool
│ %330 = Base.sle_int(1, %28)::Bool
│ %331 = Base.sle_int(%28, %326)::Bool
│ %332 = Base.and_int(%330, %331)::Bool
│ %333 = Base.and_int(%332, true)::Bool
│ %334 = Base.and_int(%329, %333)::Bool
└──── goto #67 if not %334
66 ── Base.nothing::Nothing
└──── goto #68
67 ── invoke Base.throw_boundserror(A::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}, %311::Tuple{Int64, Int64})::Union{}
└──── unreachable
68 ── nothing::Nothing
69 ┄─ %341 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %342 = Base.getfield(%341, 1, false)::UnitRange{Int64}
│ %343 = Base.getfield(%342, :start)::Int64
│ %344 = Base.sub_int(1, 1)::Int64
│ %345 = Base.add_int(%343, %344)::Int64
└──── goto #78 if not false
70 ── %347 = Base.slt_int(0, 1)::Bool
└──── goto #74 if not %347
71 ── %349 = Base.getfield(%342, :stop)::Int64
│ %350 = Base.sle_int(%345, %349)::Bool
└──── goto #73 if not %350
72 ── %352 = Base.getfield(%342, :start)::Int64
│ %353 = Base.sle_int(%352, %345)::Bool
└──── goto #75
73 ── goto #75
74 ── goto #75
75 ┄─ %357 = φ (#72 => %353, #73 => false, #74 => false)::Bool
└──── goto #77 if not %357
76 ── goto #78
77 ── invoke Base.throw_boundserror(%342::UnitRange{Int64}, 1::Int64)::Union{}
└──── unreachable
78 ┄─ goto #79
79 ── %363 = Core.getfield(%341, 2)::UnitRange{Int64}
│ %364 = Base.getfield(%363, :start)::Int64
│ %365 = Base.sub_int(%28, 1)::Int64
│ %366 = Base.add_int(%364, %365)::Int64
└──── goto #88 if not false
80 ── %368 = Base.slt_int(0, %28)::Bool
└──── goto #84 if not %368
81 ── %370 = Base.getfield(%363, :stop)::Int64
│ %371 = Base.sle_int(%366, %370)::Bool
└──── goto #83 if not %371
82 ── %373 = Base.getfield(%363, :start)::Int64
│ %374 = Base.sle_int(%373, %366)::Bool
└──── goto #85
83 ── goto #85
84 ── goto #85
85 ┄─ %378 = φ (#82 => %374, #83 => false, #84 => false)::Bool
└──── goto #87 if not %378
86 ── goto #88
87 ── invoke Base.throw_boundserror(%363::UnitRange{Int64}, %28::Int64)::Union{}
└──── unreachable
88 ┄─ goto #89
89 ── goto #90
90 ── goto #91
91 ── %386 = Base.getfield(A, :parent)::Matrix{Float64}
│ %387 = Base.arrayref(false, %386, %345, %366)::Float64
└──── goto #92
92 ── %389 = Base.sub_float(%387, %309)::Float64
└──── goto #97 if not false
93 ── %391 = Core.tuple(1, %28)::Tuple{Int64, Int64}
│ %392 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %393 = Core.getfield(%392, 1)::UnitRange{Int64}
│ %394 = Core.getfield(%392, 2)::UnitRange{Int64}
│ %395 = Base.getfield(%393, :stop)::Int64
│ %396 = Base.getfield(%393, :start)::Int64
│ %397 = Base.sub_int(%395, %396)::Int64
│ %398 = Base.add_int(%397, 1)::Int64
│ %399 = Base.slt_int(%398, 0)::Bool
│ %400 = Base.ifelse(%399, 0, %398)::Int64
│ %401 = Base.getfield(%394, :stop)::Int64
│ %402 = Base.getfield(%394, :start)::Int64
│ %403 = Base.sub_int(%401, %402)::Int64
│ %404 = Base.add_int(%403, 1)::Int64
│ %405 = Base.slt_int(%404, 0)::Bool
│ %406 = Base.ifelse(%405, 0, %404)::Int64
│ %407 = Base.sle_int(1, 1)::Bool
│ %408 = Base.sle_int(1, %400)::Bool
│ %409 = Base.and_int(%407, %408)::Bool
│ %410 = Base.sle_int(1, %28)::Bool
│ %411 = Base.sle_int(%28, %406)::Bool
│ %412 = Base.and_int(%410, %411)::Bool
│ %413 = Base.and_int(%412, true)::Bool
│ %414 = Base.and_int(%409, %413)::Bool
└──── goto #95 if not %414
94 ── Base.nothing::Nothing
└──── goto #96
95 ── invoke Base.throw_boundserror(A::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}, %391::Tuple{Int64, Int64})::Union{}
└──── unreachable
96 ── nothing::Nothing
97 ┄─ %421 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %422 = Base.getfield(%421, 1, false)::UnitRange{Int64}
│ %423 = Base.getfield(%422, :start)::Int64
│ %424 = Base.sub_int(1, 1)::Int64
│ %425 = Base.add_int(%423, %424)::Int64
└──── goto #106 if not false
98 ── %427 = Base.slt_int(0, 1)::Bool
└──── goto #102 if not %427
99 ── %429 = Base.getfield(%422, :stop)::Int64
│ %430 = Base.sle_int(%425, %429)::Bool
└──── goto #101 if not %430
100 ─ %432 = Base.getfield(%422, :start)::Int64
│ %433 = Base.sle_int(%432, %425)::Bool
└──── goto #103
101 ─ goto #103
102 ─ goto #103
103 ┄ %437 = φ (#100 => %433, #101 => false, #102 => false)::Bool
└──── goto #105 if not %437
104 ─ goto #106
105 ─ invoke Base.throw_boundserror(%422::UnitRange{Int64}, 1::Int64)::Union{}
└──── unreachable
106 ┄ goto #107
107 ─ %443 = Core.getfield(%421, 2)::UnitRange{Int64}
│ %444 = Base.getfield(%443, :start)::Int64
│ %445 = Base.sub_int(%28, 1)::Int64
│ %446 = Base.add_int(%444, %445)::Int64
└──── goto #116 if not false
108 ─ %448 = Base.slt_int(0, %28)::Bool
└──── goto #112 if not %448
109 ─ %450 = Base.getfield(%443, :stop)::Int64
│ %451 = Base.sle_int(%446, %450)::Bool
└──── goto #111 if not %451
110 ─ %453 = Base.getfield(%443, :start)::Int64
│ %454 = Base.sle_int(%453, %446)::Bool
└──── goto #113
111 ─ goto #113
112 ─ goto #113
113 ┄ %458 = φ (#110 => %454, #111 => false, #112 => false)::Bool
└──── goto #115 if not %458
114 ─ goto #116
115 ─ invoke Base.throw_boundserror(%443::UnitRange{Int64}, %28::Int64)::Union{}
└──── unreachable
116 ┄ goto #117
117 ─ goto #118
118 ─ goto #119
119 ─ %466 = Base.getfield(A, :parent)::Matrix{Float64}
│ Base.arrayset(false, %466, %389, %425, %446)::Matrix{Float64}
└──── goto #120
120 ─ %469 = Base.slt_int(%9, 2)::Bool
└──── goto #122 if not %469
121 ─ goto #123
122 ─ goto #123
123 ┄ goto #124
124 ─ goto #125
125 ─ %475 = Base.getfield(A, :parent)::Matrix{Float64}
│ %476 = $(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(%475)))::Ptr{Float64}
│ %477 = Base.getfield(A, :parent)::Matrix{Float64}
│ %478 = Base.getfield(A, :indices)::Tuple{UnitRange{Int64}, UnitRange{Int64}}
│ %479 = LayoutPointers.getfield(%478, 1, false)::UnitRange{Int64}
│ %480 = Base.getfield(%479, :start)::Int64
│ %481 = Base.sub_int(%480, 1)::Int64
│ %482 = Core.getfield(%478, 2)::UnitRange{Int64}
│ %483 = Base.getfield(%482, :start)::Int64
│ %484 = Base.sub_int(%483, 1)::Int64
│ %485 = Base.arraysize(%477, 1)::Int64
│ Base.arraysize(%477, 2)::Int64
│ %487 = Base.mul_int(1, %485)::Int64
│ %488 = Base.mul_int(%481, 1)::Int64
│ %489 = Base.mul_int(%484, %487)::Int64
│ %490 = Base.add_int(%488, %489)::Int64
│ %491 = Base.mul_int(8, %490)::Int64
│ %492 = Core.bitcast(Core.UInt, %476)::UInt64
│ %493 = Base.bitcast(UInt64, %491)::UInt64
│ %494 = Base.add_ptr(%492, %493)::UInt64
│ %495 = Core.bitcast(Ptr{Float64}, %494)::Ptr{Float64}
│ %496 = invoke LayoutPointers.StrideIndex(A::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false})::ArrayInterface.StrideIndex{2, (1, 2), 1, Tuple{Static.StaticInt{1}, Int64}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}}
│ %497 = Base.getfield(%496, :strides)::Tuple{Static.StaticInt{1}, Int64}
│ %498 = Core.getfield(%497, 2)::Int64
│ %499 = Base.mul_int(8, %498)::Int64
│ %500 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %28, 1)::Int64
│ %501 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %500, %499)::Int64
│ Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = ptrtoint i8* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %495, %501)::Ptr{Float64}
│ %503 = Base.getfield(x, :parent)::Matrix{Float64}
│ %504 = $(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(%503)))::Ptr{Float64}
│ %505 = Base.getfield(x, :parent)::Matrix{Float64}
│ %506 = Base.getfield(x, :indices)::Tuple{UnitRange{Int64}, Int64}
│ %507 = LayoutPointers.getfield(%506, 1, false)::UnitRange{Int64}
│ %508 = Base.getfield(%507, :start)::Int64
│ %509 = Base.sub_int(%508, 1)::Int64
│ %510 = Core.getfield(%506, 2)::Int64
│ %511 = Base.sub_int(%510, 1)::Int64
│ %512 = Base.arraysize(%505, 1)::Int64
│ Base.arraysize(%505, 2)::Int64
│ %514 = Base.mul_int(1, %512)::Int64
│ %515 = Base.mul_int(%509, 1)::Int64
│ %516 = Base.mul_int(%511, %514)::Int64
│ %517 = Base.add_int(%515, %516)::Int64
│ %518 = Base.mul_int(8, %517)::Int64
│ %519 = Core.bitcast(Core.UInt, %504)::UInt64
│ %520 = Base.bitcast(UInt64, %518)::UInt64
│ %521 = Base.add_ptr(%519, %520)::UInt64
│ %522 = Core.bitcast(Ptr{Float64}, %521)::Ptr{Float64}
│ invoke LayoutPointers.StrideIndex(x::SubArray{Float64, 1, Matrix{Float64}, Tuple{UnitRange{Int64}, Int64}, true})::ArrayInterface.StrideIndex{1, (1,), 1, Tuple{Static.StaticInt{1}}, Tuple{Static.StaticInt{1}}}
│ %524 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %28, 1)::Int64
│ %525 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %524, %499)::Int64
│ %526 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = ptrtoint i8* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %495, %525)::Ptr{Float64}
│ %527 = Base.llvmcall((" \n\n define i64 @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}}, %522)::Ptr{Float64}
│ %528 = Base.llvmcall((" \n\n define i64 @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}}, %526)::Ptr{Float64}
│ %529 = $(Expr(:gc_preserve_begin, :(%503), :(%475)))
│ %530 = Base.sub_int(%9, 2)::Int64
│ %531 = Base.slt_int(%530, 0)::Bool
└──── goto #127 if not %531
126 ─ goto #128
127 ─ %534 = Base.sub_int(%530, 0)::Int64
│ %535 = Base.add_int(%534, 1)::Int64
└──── goto #128
128 ┄ %537 = φ (#126 => 0, #127 => %535)::Int64
└──── goto #129
129 ─ goto #130
130 ─ %540 = Base.slt_int(0, %530)::Bool
│ %541 = (0 === %530)::Bool
│ %542 = Base.or_int(%540, %541)::Bool
└──── Base.llvmcall((" declare void @llvm.assume(i1)\n\n define void @entry(i8) alwaysinline {\n top:\n %b = trunc i8 %0 to i1\ncall void @llvm.assume(i1 %b)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Bool}, %542)::Nothing
131 ┄ %544 = φ (#130 => 0, #136 => %581)::Int64
│ %545 = Base.slt_int(%530, 0)::Bool
└──── goto #133 if not %545
132 ─ goto #134
133 ─ %548 = Base.sub_int(%530, 0)::Int64
│ %549 = Base.add_int(%548, 1)::Int64
└──── goto #134
134 ┄ %551 = φ (#132 => 0, #133 => %549)::Int64
│ %552 = Base.and_int(%551, -16)::Int64
│ %553 = (%544 === %552)::Bool
│ %554 = Base.not_int(%553)::Bool
└──── goto #135
135 ─ goto #137 if not %554
136 ─ %557 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %527, %544)::Ptr{Float64}
│ %558 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %557)::NTuple{4, VecElement{Float64}}
│ %559 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %557)::NTuple{4, VecElement{Float64}}
│ %560 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %557)::NTuple{4, VecElement{Float64}}
│ %561 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %557)::NTuple{4, VecElement{Float64}}
│ %562 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ %563 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %562)::NTuple{4, VecElement{Float64}}
│ %564 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %562)::NTuple{4, VecElement{Float64}}
│ %565 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %562)::NTuple{4, VecElement{Float64}}
│ %566 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %562)::NTuple{4, VecElement{Float64}}
│ %567 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %558)::NTuple{4, VecElement{Float64}}
│ %568 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %559)::NTuple{4, VecElement{Float64}}
│ %569 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %560)::NTuple{4, VecElement{Float64}}
│ %570 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %561)::NTuple{4, VecElement{Float64}}
│ %571 = Base.llvmcall(" %ie = insertelement <4 x double> undef, double %0, i32 0\n %v = shufflevector <4 x double> %ie, <4 x double> undef, <4 x i32> zeroinitializer\n ret <4 x double> %v\n", NTuple{4, VecElement{Float64}}, Tuple{Float64}, %309)::NTuple{4, VecElement{Float64}}
│ %572 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %567, %571, %563)::NTuple{4, VecElement{Float64}}
│ %573 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %568, %571, %564)::NTuple{4, VecElement{Float64}}
│ %574 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %569, %571, %565)::NTuple{4, VecElement{Float64}}
│ %575 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %570, %571, %566)::NTuple{4, VecElement{Float64}}
│ %576 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.1, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %576, %572)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %576, %573)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %576, %574)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %576, %575)::Nothing
│ %581 = Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 16, %544)::Int64
└──── goto #131
137 ─ %583 = Base.sle_int(%544, %530)::Bool
└──── goto #145 if not %583
138 ─ %585 = Base.bitcast(UInt64, %537)::UInt64
│ %586 = Base.llvmcall("%res = sub nsw nuw i64 %0, %1\nret i64 %res", UInt64, Tuple{UInt64, UInt64}, %585, 0x0000000000000001)::UInt64
│ %587 = Base.and_int(%586, 0x0000000000000003)::UInt64
│ %588 = Base.llvmcall(" %ie = insertelement <4 x i64> undef, i64 %0, i32 0\n %v = shufflevector <4 x i64> %ie, <4 x i64> undef, <4 x i32> zeroinitializer\n ret <4 x i64> %v\n", NTuple{4, VecElement{UInt64}}, Tuple{UInt64}, %587)::NTuple{4, VecElement{UInt64}}
│ %589 = Base.llvmcall(" %ie = insertelement <4 x i64> undef, i64 %0, i32 0\n %v = shufflevector <4 x i64> %ie, <4 x i64> undef, <4 x i32> zeroinitializer\n %res = add nsw <4 x i64> %v, <i64 0, i64 1, i64 2, i64 3>\n ret <4 x i64> %res\n", NTuple{4, VecElement{UInt64}}, Tuple{UInt64}, 0x0000000000000000)::NTuple{4, VecElement{UInt64}}
│ %590 = Base.llvmcall("%m = icmp uge <4 x i64> %0, %1\n%restrunc.0 = bitcast <4 x i1> %m to i4\n%res.0 = zext i4 %restrunc.0 to i8\nret i8 %res.0", VectorizationBase.UInt8, Tuple{NTuple{4, VecElement{UInt64}}, NTuple{4, VecElement{UInt64}}}, %588, %589)::UInt8
│ Base.llvmcall("%res = add nsw nuw i64 %0, %1\nret i64 %res", UInt64, Tuple{UInt64, UInt64}, %587, 0x0000000000000001)::UInt64
│ %592 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %530, 4)::Int64
│ %593 = Base.slt_int(%592, %544)::Bool
└──── goto #140 if not %593
139 ─ %595 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %544)::Int64
│ %596 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = bitcast i8* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, Int64, UInt8}, %527, %595, %590)::NTuple{4, VecElement{Float64}}
│ %597 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %544)::Int64
│ %598 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %1\n%ptr.2 = bitcast i8* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, Int64, UInt8}, %528, %597, %590)::NTuple{4, VecElement{Float64}}
│ %599 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %596)::NTuple{4, VecElement{Float64}}
│ %600 = Base.llvmcall(" %ie = insertelement <4 x double> undef, double %0, i32 0\n %v = shufflevector <4 x double> %ie, <4 x double> undef, <4 x i32> zeroinitializer\n ret <4 x double> %v\n", NTuple{4, VecElement{Float64}}, Tuple{Float64}, %309)::NTuple{4, VecElement{Float64}}
│ %601 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %599, %600, %598)::NTuple{4, VecElement{Float64}}
│ %602 = Base.llvmcall("%res = mul nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %544)::Int64
│ Base.llvmcall((" declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)\n\n define void @entry(i64, <4 x double>, i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to i8*\n%ptr.1 = getelementptr inbounds i8, i8* %ptr.0, i64 %2\n%ptr.2 = bitcast i8* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %3 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\ncall void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %1, <4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}, Int64, UInt8}, %528, %601, %602, %590)::Nothing
│ Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 4, %544)::Int64
└──── goto #145
140 ─ %606 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %530, 8)::Int64
│ %607 = Base.slt_int(%606, %544)::Bool
└──── goto #142 if not %607
141 ─ %609 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %527, %544)::Ptr{Float64}
│ %610 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %609)::NTuple{4, VecElement{Float64}}
│ %611 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %609, %590)::NTuple{4, VecElement{Float64}}
│ %612 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ %613 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %612)::NTuple{4, VecElement{Float64}}
│ %614 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %612, %590)::NTuple{4, VecElement{Float64}}
│ %615 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %610)::NTuple{4, VecElement{Float64}}
│ %616 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %611)::NTuple{4, VecElement{Float64}}
│ %617 = Base.llvmcall(" %ie = insertelement <4 x double> undef, double %0, i32 0\n %v = shufflevector <4 x double> %ie, <4 x double> undef, <4 x i32> zeroinitializer\n ret <4 x double> %v\n", NTuple{4, VecElement{Float64}}, Tuple{Float64}, %309)::NTuple{4, VecElement{Float64}}
│ %618 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %615, %617, %613)::NTuple{4, VecElement{Float64}}
│ %619 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %616, %617, %614)::NTuple{4, VecElement{Float64}}
│ %620 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.1, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %620, %618)::Nothing
│ Base.llvmcall((" declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)\n\n define void @entry(i64, <4 x double>, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\ncall void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %1, <4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}, UInt8}, %620, %619, %590)::Nothing
│ Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 8, %544)::Int64
└──── goto #145
142 ─ %625 = Base.llvmcall("%res = sub nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, %530, 12)::Int64
│ %626 = Base.slt_int(%625, %544)::Bool
└──── goto #144 if not %626
143 ─ %628 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %527, %544)::Ptr{Float64}
│ %629 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %628)::NTuple{4, VecElement{Float64}}
│ %630 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %628)::NTuple{4, VecElement{Float64}}
│ %631 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %628, %590)::NTuple{4, VecElement{Float64}}
│ %632 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ %633 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %632)::NTuple{4, VecElement{Float64}}
│ %634 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %632)::NTuple{4, VecElement{Float64}}
│ %635 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %632, %590)::NTuple{4, VecElement{Float64}}
│ %636 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %629)::NTuple{4, VecElement{Float64}}
│ %637 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %630)::NTuple{4, VecElement{Float64}}
│ %638 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %631)::NTuple{4, VecElement{Float64}}
│ %639 = Base.llvmcall(" %ie = insertelement <4 x double> undef, double %0, i32 0\n %v = shufflevector <4 x double> %ie, <4 x double> undef, <4 x i32> zeroinitializer\n ret <4 x double> %v\n", NTuple{4, VecElement{Float64}}, Tuple{Float64}, %309)::NTuple{4, VecElement{Float64}}
│ %640 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %636, %639, %633)::NTuple{4, VecElement{Float64}}
│ %641 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %637, %639, %634)::NTuple{4, VecElement{Float64}}
│ %642 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %638, %639, %635)::NTuple{4, VecElement{Float64}}
│ %643 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.1, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %643, %640)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %643, %641)::Nothing
│ Base.llvmcall((" declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)\n\n define void @entry(i64, <4 x double>, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\ncall void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %1, <4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}, UInt8}, %643, %642, %590)::Nothing
│ Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 12, %544)::Int64
└──── goto #145
144 ─ %649 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %527, %544)::Ptr{Float64}
│ %650 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %649)::NTuple{4, VecElement{Float64}}
│ %651 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %649)::NTuple{4, VecElement{Float64}}
│ %652 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %649)::NTuple{4, VecElement{Float64}}
│ %653 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %649, %590)::NTuple{4, VecElement{Float64}}
│ %654 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ %655 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.1, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %654)::NTuple{4, VecElement{Float64}}
│ %656 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %654)::NTuple{4, VecElement{Float64}}
│ %657 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\n\n\n define <4 x double> @entry(i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%res = load <4 x double>, <4 x double>* %ptr.2, align 8, !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}}, %654)::NTuple{4, VecElement{Float64}}
│ %658 = Base.llvmcall((" !1 = !{!\"noaliasdomain\"}\n!2 = !{!\"noaliasscope\", !1}\n!3 = !{!2}\ndeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)\n\n define <4 x double> @entry(i64, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %1 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\n%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0, <4 x double> zeroinitializer), !alias.scope !3\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{Ptr{Float64}, UInt8}, %654, %590)::NTuple{4, VecElement{Float64}}
│ %659 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %650)::NTuple{4, VecElement{Float64}}
│ %660 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %651)::NTuple{4, VecElement{Float64}}
│ %661 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %652)::NTuple{4, VecElement{Float64}}
│ %662 = Base.llvmcall("%res = fneg nsz arcp contract afn reassoc <4 x double> %0\nret <4 x double> %res", NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}}, %653)::NTuple{4, VecElement{Float64}}
│ %663 = Base.llvmcall(" %ie = insertelement <4 x double> undef, double %0, i32 0\n %v = shufflevector <4 x double> %ie, <4 x double> undef, <4 x i32> zeroinitializer\n ret <4 x double> %v\n", NTuple{4, VecElement{Float64}}, Tuple{Float64}, %309)::NTuple{4, VecElement{Float64}}
│ %664 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %659, %663, %655)::NTuple{4, VecElement{Float64}}
│ %665 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %660, %663, %656)::NTuple{4, VecElement{Float64}}
│ %666 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %661, %663, %657)::NTuple{4, VecElement{Float64}}
│ %667 = Base.llvmcall((" declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)\n\n define <4 x double> @entry(<4 x double>, <4 x double>, <4 x double>) alwaysinline {\n top:\n %res = call nsz arcp contract afn reassoc <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)\nret <4 x double> %res\n }\n", "entry"), NTuple{4, VecElement{Float64}}, Tuple{NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}, NTuple{4, VecElement{Float64}}}, %662, %663, %658)::NTuple{4, VecElement{Float64}}
│ %668 = Base.llvmcall((" \n\n define i64 @entry(i64, i64) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i64 %1\n%ptr.2 = ptrtoint double* %ptr.1 to i64\nret i64 %ptr.2\n }\n", "entry"), Ptr{Float64}, Tuple{Ptr{Float64}, Int64}, %528, %544)::Ptr{Float64}
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = bitcast double* %ptr.0 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.1, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %668, %664)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 4\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %668, %665)::Nothing
│ Base.llvmcall((" \n\n define void @entry(i64, <4 x double>) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 8\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\nstore <4 x double> %1, <4 x double>* %ptr.2, align 8\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}}, %668, %666)::Nothing
│ Base.llvmcall((" declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)\n\n define void @entry(i64, <4 x double>, i8) alwaysinline {\n top:\n %ptr.0 = inttoptr i64 %0 to double*\n%ptr.1 = getelementptr inbounds double, double* %ptr.0, i32 12\n%ptr.2 = bitcast double* %ptr.1 to <4 x double>*\n%masktrunc.0 = trunc i8 %2 to i4\n%mask.0 = bitcast i4 %masktrunc.0 to <4 x i1>\ncall void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %1, <4 x double>* %ptr.2, i32 8, <4 x i1> %mask.0)\nret void\n }\n", "entry"), VectorizationBase.Cvoid, Tuple{Ptr{Float64}, NTuple{4, VecElement{Float64}}, UInt8}, %668, %667, %590)::Nothing
└──── Base.llvmcall("%res = add nsw i64 %0, %1\nret i64 %res", Int64, Tuple{Int64, Int64}, 16, %544)::Int64
145 ┄ goto #146
146 ─ $(Expr(:gc_preserve_end, :(%529)))
│ %676 = (%29 === %17)::Bool
└──── goto #148 if not %676
147 ─ Base.nothing::Nothing
└──── goto #149
148 ─ %680 = Base.add_int(%29, 1)::Int64
└──── goto #149
149 ┄ %682 = φ (#148 => %680)::Int64
│ %683 = φ (#148 => %680)::Int64
│ %684 = φ (#147 => true, #148 => false)::Bool
│ %685 = Base.not_int(%684)::Bool
└──── goto #151 if not %685
150 ─ goto #5
151 ┄ return A
) => SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment