Skip to content

Instantly share code, notes, and snippets.

@Seelengrab
Created March 27, 2024 08:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Seelengrab/ab9285de8ee1181a6a2e6e41192c3415 to your computer and use it in GitHub Desktop.
Save Seelengrab/ab9285de8ee1181a6a2e6e41192c3415 to your computer and use it in GitHub Desktop.
Simple Vector Investigation from Discourse https://discourse.julialang.org/t/why-is-push-still-so-slow/112170
julia> @code_llvm length_separate_loop(Vector{Int}(undef, 1000))
; Function Signature: length_separate_loop(Array{Int64, 1})
; @ REPL[19]:1 within `length_separate_loop`
define void @julia_length_separate_loop_7603(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array") #0 {
top:
%"new::Tuple" = alloca [1 x i64], align 8
%0 = getelementptr inbounds { ptr, ptr }, ptr %"a::Array", i64 0, i32 1
%1 = load ptr, ptr %0, align 8
%.unbox = load i64, ptr %1, align 8
%2 = getelementptr inbounds i8, ptr %"a::Array", i64 16
%.size.sroa.0.0.copyload = load i64, ptr %2, align 8
%3 = load ptr, ptr %"a::Array", align 8
; @ REPL[19]:4 within `length_separate_loop`
%4 = add i64 %.size.sroa.0.0.copyload, -9223372036854775807
%smax = call i64 @llvm.smax.i64(i64 %4, i64 0)
%5 = sub i64 %.size.sroa.0.0.copyload, %smax
%isnotneg.inv = icmp slt i64 %.size.sroa.0.0.copyload, 0
%6 = call i64 @llvm.smin.i64(i64 %5, i64 1000)
%7 = call i64 @llvm.smax.i64(i64 %6, i64 0)
%.not30 = icmp slt i64 %6, 1
%.not = select i1 %isnotneg.inv, i1 true, i1 %.not30
br i1 %.not, label %main.pseudo.exit, label %L6.preheader
L6.preheader: ; preds = %top
; @ REPL[19]:6 within `length_separate_loop`
%smax25 = call i64 @llvm.smax.i64(i64 %.unbox, i64 0)
%8 = add nsw i64 %7, -1
%umin = call i64 @llvm.umin.i64(i64 %smax25, i64 %8)
%min.iters.check = icmp ult i64 %umin, 32
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %L6.preheader
%9 = add nuw i64 %umin, 1
%n.mod.vf = and i64 %9, 31
%10 = icmp eq i64 %n.mod.vf, 0
%11 = select i1 %10, i64 32, i64 %n.mod.vf
%n.vec = sub i64 %9, %11
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
; @ REPL[19]:7 within `length_separate_loop`
; ┌ @ int.jl:87 within `+`
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%12 = add nuw nsw <8 x i64> %vec.ind, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%13 = add <8 x i64> %vec.ind, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
%14 = add <8 x i64> %vec.ind, <i64 17, i64 17, i64 17, i64 17, i64 17, i64 17, i64 17, i64 17>
%15 = add <8 x i64> %vec.ind, <i64 25, i64 25, i64 25, i64 25, i64 25, i64 25, i64 25, i64 25>
; └
; @ REPL[19]:8 within `length_separate_loop`
; ┌ @ array.jl:976 within `setindex!`
%16 = getelementptr inbounds i64, ptr %3, i64 %index
store <8 x i64> %12, ptr %16, align 8
%17 = getelementptr inbounds i64, ptr %16, i64 8
store <8 x i64> %13, ptr %17, align 8
%18 = getelementptr inbounds i64, ptr %16, i64 16
store <8 x i64> %14, ptr %18, align 8
%19 = getelementptr inbounds i64, ptr %16, i64 24
store <8 x i64> %15, ptr %19, align 8
; └
; @ REPL[19]:7 within `length_separate_loop`
; ┌ @ int.jl:87 within `+`
%index.next = add nuw i64 %index, 32
%vec.ind.next = add <8 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%20 = icmp eq i64 %index.next, %n.vec
br i1 %20, label %scalar.ph, label %vector.body
scalar.ph: ; preds = %vector.body, %L6.preheader
%bc.resume.val = phi i64 [ 0, %L6.preheader ], [ %n.vec, %vector.body ]
; └
; @ REPL[19]:6 within `length_separate_loop`
br label %L6
L6: ; preds = %L27, %scalar.ph
%value_phi116 = phi i64 [ %21, %L27 ], [ %bc.resume.val, %scalar.ph ]
; ┌ @ int.jl:514 within `<=`
%exitcond26 = icmp eq i64 %value_phi116, %smax25
; └
br i1 %exitcond26, label %L33, label %L27
L24: ; preds = %L12.postloop
; @ REPL[19]:8 within `length_separate_loop`
; ┌ @ array.jl:975 within `setindex!`
store i64 %25, ptr %"new::Tuple", align 8
call void @j_throw_boundserror_7616(ptr nonnull %"a::Array", ptr nocapture nonnull readonly %"new::Tuple") #9
unreachable
L27: ; preds = %L6
; └
; @ REPL[19]:7 within `length_separate_loop`
; ┌ @ int.jl:87 within `+`
%21 = add nuw nsw i64 %value_phi116, 1
; └
; @ REPL[19]:8 within `length_separate_loop`
; ┌ @ array.jl:976 within `setindex!`
%22 = getelementptr inbounds i64, ptr %3, i64 %value_phi116
store i64 %21, ptr %22, align 8
; └
; @ REPL[19]:4 within `length_separate_loop`
%.not31 = icmp ult i64 %21, %7
br i1 %.not31, label %L6, label %main.exit.selector
main.exit.selector: ; preds = %L27
%23 = icmp ult i64 %value_phi116, 999
br i1 %23, label %main.pseudo.exit, label %L35
main.pseudo.exit: ; preds = %main.exit.selector, %top
%value_phi116.copy = phi i64 [ 0, %top ], [ %21, %main.exit.selector ]
%umax = call i64 @llvm.umax.i64(i64 %value_phi116.copy, i64 %.size.sroa.0.0.copyload)
br label %L6.postloop
L33: ; preds = %L6.postloop, %L6
; @ REPL[19]:10 within `length_separate_loop`
call void @j_error_7618(ptr nonnull @"jl_global#7619.jit") #9
unreachable
L35: ; preds = %L27.postloop, %main.exit.selector
ret void
L6.postloop: ; preds = %L27.postloop, %main.pseudo.exit
%value_phi116.postloop = phi i64 [ %value_phi116.copy, %main.pseudo.exit ], [ %25, %L27.postloop ]
%value_phi15.postloop = phi i64 [ %value_phi116.copy, %main.pseudo.exit ], [ %24, %L27.postloop ]
; @ REPL[19]:5 within `length_separate_loop`
; ┌ @ int.jl:87 within `+`
%24 = add nuw nsw i64 %value_phi15.postloop, 1
; └
; @ REPL[19]:6 within `length_separate_loop`
; ┌ @ int.jl:514 within `<=`
%.not.postloop.not = icmp slt i64 %value_phi15.postloop, %.unbox
; └
br i1 %.not.postloop.not, label %L12.postloop, label %L33
L12.postloop: ; preds = %L6.postloop
; @ REPL[19]:7 within `length_separate_loop`
; ┌ @ int.jl:87 within `+`
%25 = add i64 %value_phi116.postloop, 1
; └
; @ REPL[19]:8 within `length_separate_loop`
; ┌ @ array.jl:975 within `setindex!`
; │┌ @ int.jl:513 within `<`
%exitcond.not = icmp eq i64 %value_phi116.postloop, %umax
; │└
br i1 %exitcond.not, label %L24, label %L27.postloop
L27.postloop: ; preds = %L12.postloop
; │ @ array.jl:976 within `setindex!`
%26 = getelementptr inbounds i64, ptr %3, i64 %value_phi116.postloop
store i64 %24, ptr %26, align 8
; └
; @ REPL[19]:4 within `length_separate_loop`
; ┌ @ int.jl:83 within `<`
%27 = icmp sgt i64 %value_phi15.postloop, 998
; └
br i1 %27, label %L35, label %L6.postloop
}
julia> @code_llvm push_loop(Vector{Int}(undef, 1000))
; Function Signature: push_loop(Array{Int64, 1})
; @ REPL[3]:1 within `push_loop`
define void @julia_push_loop_7591(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array") #0 {
top:
%gcframe1 = alloca [6 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 48, i1 true)
%gc_slot_addr3 = getelementptr inbounds ptr, ptr %gcframe1, i64 5
%gc_slot_addr2 = getelementptr inbounds ptr, ptr %gcframe1, i64 4
%gc_slot_addr1 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
%0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%1 = alloca { ptr, ptr }, align 8
%2 = alloca { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, align 8
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 16, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%3 = getelementptr inbounds { ptr, ptr }, ptr %"a::Array", i64 0, i32 1
%4 = getelementptr inbounds i8, ptr %"a::Array", i64 16
%.fca.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 1
%.fca.2.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 2
%.fca.3.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 3
%.fca.4.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 4
%.fca.5.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 5
%.fca.6.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 6
%.fca.7.0.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 7, i32 0
%.fca.7.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %2, i64 0, i32 7, i32 1
; @ REPL[3]:5 within `push_loop`
; ┌ @ array.jl:1245 within `push!`
; │┌ @ array.jl:1091 within `_growend!`
; ││┌ @ Base.jl:49 within `getproperty`
%.pre = load ptr, ptr %"a::Array", align 8
%.pre22 = load ptr, ptr %3, align 8
; ││└
; ││ @ array.jl:1094 within `_growend!`
; ││┌ @ essentials.jl:11 within `length`
%.size.sroa.0.0.copyload.pre = load i64, ptr %4, align 8
; └└└
; @ REPL[3]:3 within `push_loop`
br label %L5
L5: ; preds = %L23, %top
%5 = phi ptr [ %.pre22, %top ], [ %16, %L23 ]
%6 = phi ptr [ %.pre, %top ], [ %17, %L23 ]
; @ REPL[3]:5 within `push_loop`
; ┌ @ array.jl:1245 within `push!`
; │┌ @ array.jl:1094 within `_growend!`
; ││┌ @ essentials.jl:11 within `length`
%.size.sroa.0.0.copyload = phi i64 [ %.size.sroa.0.0.copyload.pre, %top ], [ %.size11.sroa.0.0.copyload, %L23 ]
%value_phi21 = phi i64 [ 0, %top ], [ %7, %L23 ]
; └└└
; @ REPL[3]:4 within `push_loop`
; ┌ @ int.jl:87 within `+`
%7 = add nuw nsw i64 %value_phi21, 1
; └
; @ REPL[3]:5 within `push_loop`
; ┌ @ array.jl:1245 within `push!`
; │┌ @ array.jl:1095 within `_growend!`
; ││┌ @ int.jl:87 within `+`
%8 = add i64 %.size.sroa.0.0.copyload, 1
; ││└
; ││ @ array.jl:1096 within `_growend!`
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %5, i64 0, i32 1
%9 = load ptr, ptr %.data_ptr, align 8
%10 = ptrtoint ptr %9 to i64
%11 = ptrtoint ptr %6 to i64
%12 = sub i64 %11, %10
%13 = lshr exact i64 %12, 3
%14 = add nuw nsw i64 %13, 1
; ││ @ array.jl:1097 within `_growend!`
store i64 %8, ptr %4, align 8
; ││ @ array.jl:1098 within `_growend!`
; ││┌ @ int.jl:86 within `-`
%15 = add i64 %14, %.size.sroa.0.0.copyload
; ││└
; ││ @ array.jl:1099 within `_growend!`
; ││┌ @ int.jl:83 within `<`
%.unbox = load i64, ptr %5, align 8
%.not = icmp slt i64 %.unbox, %15
; ││└
br i1 %.not, label %L20, label %L23
L20: ; preds = %L5
store ptr %"a::Array", ptr %gc_slot_addr3, align 8
; ││ @ array.jl:1100 within `_growend!`
store ptr %"a::Array", ptr %2, align 8
store i64 %15, ptr %.fca.1.gep, align 8
store i64 %14, ptr %.fca.2.gep, align 8
store i64 %8, ptr %.fca.3.gep, align 8
store i64 %.size.sroa.0.0.copyload, ptr %.fca.4.gep, align 8
store i64 %.unbox, ptr %.fca.5.gep, align 8
store ptr %5, ptr %gc_slot_addr2, align 16
store ptr %5, ptr %.fca.6.gep, align 8
store ptr %6, ptr %.fca.7.0.gep, align 8
store ptr %5, ptr %gc_slot_addr1, align 8
store ptr %5, ptr %.fca.7.1.gep, align 8
call void @"j_#131_7600"(ptr noalias nocapture noundef nonnull sret({ ptr, ptr }) %1, ptr noalias nocapture noundef nonnull %0, ptr nocapture nonnull readonly %2)
; │└
; │ @ array.jl:1246 within `push!`
; │┌ @ essentials.jl:11 within `length`
%.size11.sroa.0.0.copyload.pre = load i64, ptr %4, align 8
; │└
; │┌ @ array.jl:990 within `__safe_setindex!`
; ││┌ @ Base.jl:49 within `getproperty`
%.pre25 = load ptr, ptr %"a::Array", align 8
%.pre26 = load ptr, ptr %3, align 8
; │└└
; │ @ array.jl:1245 within `push!`
; │┌ @ array.jl:1100 within `_growend!`
br label %L23
L23: ; preds = %L20, %L5
; │└
; │ @ array.jl:1246 within `push!`
; │┌ @ array.jl:990 within `__safe_setindex!`
; ││┌ @ Base.jl:49 within `getproperty`
%16 = phi ptr [ %5, %L5 ], [ %.pre26, %L20 ]
%17 = phi ptr [ %6, %L5 ], [ %.pre25, %L20 ]
; │└└
; │┌ @ essentials.jl:11 within `length`
%.size11.sroa.0.0.copyload = phi i64 [ %8, %L5 ], [ %.size11.sroa.0.0.copyload.pre, %L20 ]
; │└
; │┌ @ array.jl:990 within `__safe_setindex!`
%18 = add i64 %.size11.sroa.0.0.copyload, -1
%19 = getelementptr inbounds i64, ptr %17, i64 %18
store i64 %7, ptr %19, align 8
; └└
; @ REPL[3]:3 within `push_loop`
; ┌ @ int.jl:83 within `<`
%exitcond = icmp eq i64 %7, 1000
; └
br i1 %exitcond, label %L31, label %L5
L31: ; preds = %L23
%frame.prev32 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev32, ptr %tls_pgcstack, align 8
; @ REPL[3]:6 within `push_loop`
ret void
}
julia> @code_llvm simple_mutable_vector_loop(SimpleMutableVector{Int}(0, Memory{Int}(undef, 1000)))
; Function Signature: simple_mutable_vector_loop(Main.SimpleMutableVector{Int64})
; @ REPL[7]:1 within `simple_mutable_vector_loop`
define i64 @julia_simple_mutable_vector_loop_7623(ptr noundef nonnull align 8 dereferenceable(16) %"a::SimpleMutableVector") #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
; @ REPL[7]:2 within `simple_mutable_vector_loop`
; ┌ @ Base.jl:53 within `setproperty!`
store i64 0, ptr %"a::SimpleMutableVector", align 8
%.mem_ptr = getelementptr inbounds i8, ptr %"a::SimpleMutableVector", i64 8
%.mem = load atomic ptr, ptr %.mem_ptr unordered, align 8
%.unbox = load i64, ptr %.mem, align 8
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %.mem, i64 0, i32 1
; └
; @ REPL[7]:4 within `simple_mutable_vector_loop`
%smax = call i64 @llvm.smax.i64(i64 %.unbox, i64 0)
%smin = call i64 @llvm.smin.i64(i64 %.unbox, i64 0)
%0 = shl i64 %.unbox, 1
%isnotneg = icmp sgt i64 %0, -1
%1 = select i1 %isnotneg, i64 %smin, i64 0
%2 = sub i64 0, %1
%exit.preloop.at = call i64 @llvm.umin.i64(i64 %2, i64 1000)
%smin64 = call i64 @llvm.smin.i64(i64 %.unbox, i64 %0)
%3 = sub i64 %0, %smin64
%4 = call i64 @llvm.umin.i64(i64 %3, i64 1000)
%exit.mainloop.at = select i1 %isnotneg, i64 %4, i64 0
%.not = icmp eq i64 %exit.preloop.at, 0
br i1 %.not, label %mainloop, label %L5.preloop
mainloop: ; preds = %preloop.exit.selector, %top
%.copy = phi i64 [ 0, %top ], [ %23, %preloop.exit.selector ]
%indvar.end = phi i64 [ 0, %top ], [ %22, %preloop.exit.selector ]
%5 = icmp ult i64 %indvar.end, %exit.mainloop.at
br i1 %5, label %L5, label %postloop
L5: ; preds = %load, %mainloop
%6 = phi i64 [ %8, %load ], [ %.copy, %mainloop ]
%value_phi45 = phi i64 [ %7, %load ], [ %indvar.end, %mainloop ]
; @ REPL[7]:5 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%7 = add nuw nsw i64 %value_phi45, 1
; └
; @ REPL[7]:6 within `simple_mutable_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond = icmp eq i64 %value_phi45, %smax
; └
br i1 %exitcond, label %L21, label %L10
L10: ; preds = %L5
; @ REPL[7]:7 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%8 = add i64 %6, 1
; └
; ┌ @ Base.jl:53 within `setproperty!`
store i64 %8, ptr %"a::SimpleMutableVector", align 8
; └
; @ REPL[7]:8 within `simple_mutable_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%9 = load ptr, ptr %.data_ptr, align 8
%.idx90 = shl nsw i64 %6, 3
%10 = shl nuw nsw i64 %.unbox, 3
%11 = icmp ult i64 %.idx90, %10
br i1 %11, label %idxend, label %oob
L21: ; preds = %L5.postloop, %L5.preloop, %L5
; └
; @ REPL[7]:10 within `simple_mutable_vector_loop`
call void @j_error_7634(ptr nonnull @"jl_global#7635.jit") #4
unreachable
L23: ; preds = %load.postloop, %preloop.exit.selector, %main.exit.selector
; @ REPL[7]:8 within `simple_mutable_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%.lcssa57 = phi ptr [ %9, %main.exit.selector ], [ %24, %preloop.exit.selector ], [ %35, %load.postloop ]
%.lcssa48 = phi i64 [ %10, %main.exit.selector ], [ %26, %preloop.exit.selector ], [ %37, %load.postloop ]
; └
; @ REPL[7]:13 within `simple_mutable_vector_loop`
; ┌ @ essentials.jl:375 within `getindex`
%12 = add i64 %.unbox, -1
%13 = add i64 %12, %.unbox
%.not38 = icmp ult i64 %13, %0
%.idx94 = shl nsw i64 %12, 3
%14 = icmp ult i64 %.idx94, %.lcssa48
%15 = and i1 %.not38, %14
br i1 %15, label %idxend28, label %oob23
oob: ; preds = %L10.postloop, %L10.preloop, %L10
; └
; @ REPL[7]:7 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%.lcssa58 = phi i64 [ %34, %L10.postloop ], [ %8, %L10 ], [ %23, %L10.preloop ]
; └
; @ REPL[7]:8 within `simple_mutable_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%.lcssa55 = phi ptr [ %35, %L10.postloop ], [ %9, %L10 ], [ %24, %L10.preloop ]
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %.mem, ptr %gc_slot_addr_0, align 16
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"box::GenericMemoryRef" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 133184687382352) #10
%"box::GenericMemoryRef.tag_addr" = getelementptr inbounds i64, ptr %"box::GenericMemoryRef", i64 -1
store atomic i64 133184687382352, ptr %"box::GenericMemoryRef.tag_addr" unordered, align 8
store ptr %.lcssa55, ptr %"box::GenericMemoryRef", align 8
%"box::GenericMemoryRef.repack36" = getelementptr inbounds { ptr, ptr }, ptr %"box::GenericMemoryRef", i64 0, i32 1
store ptr %.mem, ptr %"box::GenericMemoryRef.repack36", align 8
call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef", i64 %.lcssa58)
unreachable
idxend: ; preds = %L10
; │ @ genericmemory.jl:213 within `setindex!`
%16 = icmp eq i64 %.unbox, 0
br i1 %16, label %oob13, label %load
oob13: ; preds = %idxend.postloop, %idxend.preloop, %idxend
call void @ijl_bounds_error_int(ptr nonnull %.mem, i64 1)
unreachable
load: ; preds = %idxend
%17 = getelementptr inbounds i64, ptr %9, i64 %6
store i64 %7, ptr %17, align 8
; └
; @ REPL[7]:4 within `simple_mutable_vector_loop`
%.not91 = icmp ult i64 %7, %exit.mainloop.at
br i1 %.not91, label %L5, label %main.exit.selector
main.exit.selector: ; preds = %load
%18 = icmp ult i64 %value_phi45, 999
br i1 %18, label %postloop, label %L23
oob23: ; preds = %L23
%gc_slot_addr_0210 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %.mem, ptr %gc_slot_addr_0210, align 16
; @ REPL[7]:13 within `simple_mutable_vector_loop`
; ┌ @ essentials.jl:375 within `getindex`
%ptls_field219 = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load220 = load ptr, ptr %ptls_field219, align 8
%"box::GenericMemoryRef27" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load220, i32 800, i32 32, i64 133184687382352) #10
%"box::GenericMemoryRef27.tag_addr" = getelementptr inbounds i64, ptr %"box::GenericMemoryRef27", i64 -1
store atomic i64 133184687382352, ptr %"box::GenericMemoryRef27.tag_addr" unordered, align 8
store ptr %.lcssa57, ptr %"box::GenericMemoryRef27", align 8
%"box::GenericMemoryRef27.repack39" = getelementptr inbounds { ptr, ptr }, ptr %"box::GenericMemoryRef27", i64 0, i32 1
store ptr %.mem, ptr %"box::GenericMemoryRef27.repack39", align 8
call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef27", i64 %.unbox)
unreachable
idxend28: ; preds = %L23
%19 = getelementptr inbounds i64, ptr %.lcssa57, i64 %12
%20 = load i64, ptr %19, align 8
%frame.prev221 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev221, ptr %tls_pgcstack, align 8
; └
ret i64 %20
L5.preloop: ; preds = %load.preloop, %top
%21 = phi i64 [ %23, %load.preloop ], [ 0, %top ]
%value_phi45.preloop = phi i64 [ %22, %load.preloop ], [ 0, %top ]
; @ REPL[7]:5 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%22 = add nuw nsw i64 %value_phi45.preloop, 1
; └
; @ REPL[7]:6 within `simple_mutable_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond.preloop = icmp eq i64 %value_phi45.preloop, %smax
; └
br i1 %exitcond.preloop, label %L21, label %L10.preloop
L10.preloop: ; preds = %L5.preloop
; @ REPL[7]:7 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%23 = add i64 %21, 1
; └
; ┌ @ Base.jl:53 within `setproperty!`
store i64 %23, ptr %"a::SimpleMutableVector", align 8
; └
; @ REPL[7]:8 within `simple_mutable_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%24 = load ptr, ptr %.data_ptr, align 8
%25 = add i64 %.unbox, %21
%.not35.preloop = icmp ult i64 %25, %0
%.idx92 = shl nsw i64 %21, 3
%26 = shl nuw nsw i64 %.unbox, 3
%27 = icmp ult i64 %.idx92, %26
%28 = and i1 %.not35.preloop, %27
br i1 %28, label %idxend.preloop, label %oob
idxend.preloop: ; preds = %L10.preloop
; │ @ genericmemory.jl:213 within `setindex!`
%29 = icmp eq i64 %.unbox, 0
br i1 %29, label %oob13, label %load.preloop
load.preloop: ; preds = %idxend.preloop
%30 = getelementptr inbounds i64, ptr %24, i64 %21
store i64 %22, ptr %30, align 8
; └
; @ REPL[7]:4 within `simple_mutable_vector_loop`
%.not93 = icmp ult i64 %22, %exit.preloop.at
br i1 %.not93, label %L5.preloop, label %preloop.exit.selector
preloop.exit.selector: ; preds = %load.preloop
%31 = icmp ult i64 %value_phi45.preloop, 999
br i1 %31, label %mainloop, label %L23
postloop: ; preds = %main.exit.selector, %mainloop
%.copy65 = phi i64 [ %.copy, %mainloop ], [ %8, %main.exit.selector ]
%value_phi45.copy = phi i64 [ %indvar.end, %mainloop ], [ %7, %main.exit.selector ]
br label %L5.postloop
L5.postloop: ; preds = %load.postloop, %postloop
%32 = phi i64 [ %.copy65, %postloop ], [ %34, %load.postloop ]
%value_phi45.postloop = phi i64 [ %value_phi45.copy, %postloop ], [ %33, %load.postloop ]
; @ REPL[7]:5 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%33 = add nuw nsw i64 %value_phi45.postloop, 1
; └
; @ REPL[7]:6 within `simple_mutable_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond.postloop = icmp eq i64 %value_phi45.postloop, %smax
; └
br i1 %exitcond.postloop, label %L21, label %L10.postloop
L10.postloop: ; preds = %L5.postloop
; @ REPL[7]:7 within `simple_mutable_vector_loop`
; ┌ @ int.jl:87 within `+`
%34 = add i64 %32, 1
; └
; ┌ @ Base.jl:53 within `setproperty!`
store i64 %34, ptr %"a::SimpleMutableVector", align 8
; └
; @ REPL[7]:8 within `simple_mutable_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%35 = load ptr, ptr %.data_ptr, align 8
%36 = add i64 %.unbox, %32
%.not35.postloop = icmp ult i64 %36, %0
%.idx = shl nsw i64 %32, 3
%37 = shl nuw nsw i64 %.unbox, 3
%38 = icmp ult i64 %.idx, %37
%39 = and i1 %.not35.postloop, %38
br i1 %39, label %idxend.postloop, label %oob
idxend.postloop: ; preds = %L10.postloop
; │ @ genericmemory.jl:213 within `setindex!`
%40 = icmp eq i64 %.unbox, 0
br i1 %40, label %oob13, label %load.postloop
load.postloop: ; preds = %idxend.postloop
%41 = getelementptr inbounds i64, ptr %35, i64 %32
store i64 %33, ptr %41, align 8
; └
; @ REPL[7]:4 within `simple_mutable_vector_loop`
; ┌ @ int.jl:83 within `<`
%exitcond46.postloop = icmp eq i64 %33, 1000
; └
br i1 %exitcond46.postloop, label %L23, label %L5.postloop
}
julia> @code_llvm simple_vector_loop(SimpleVector{Int}(0, Memory{Int}(undef, 1000)))
; Function Signature: simple_vector_loop(Main.SimpleVector{Int64})
; @ REPL[11]:1 within `simple_vector_loop`
define i64 @julia_simple_vector_loop_7637(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %"a::SimpleVector") #0 {
top:
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #11
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
; @ REPL[11]:2 within `simple_vector_loop`
; ┌ @ Base.jl:49 within `getproperty`
%"a::SimpleVector.mem_ptr" = getelementptr inbounds { i64, ptr }, ptr %"a::SimpleVector", i64 0, i32 1
%"a::SimpleVector.mem" = load atomic ptr, ptr %"a::SimpleVector.mem_ptr" unordered, align 8
%.unbox = load i64, ptr %"a::SimpleVector.mem", align 8
%.unbox.fr = freeze i64 %.unbox
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %"a::SimpleVector.mem", i64 0, i32 1
%0 = load ptr, ptr %.data_ptr, align 8
%1 = shl i64 %.unbox.fr, 1
%2 = shl i64 %.unbox.fr, 3
%3 = icmp eq i64 %.unbox.fr, 0
; └
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:213 within `setindex!`
br i1 %3, label %L20, label %top.split
top.split: ; preds = %top
; └
; @ REPL[11]:4 within `simple_vector_loop`
%smax = call i64 @llvm.smax.i64(i64 %.unbox.fr, i64 -9223372036854775807)
%isnotneg = icmp sgt i64 %1, -1
%4 = select i1 %isnotneg, i64 %smax, i64 0
%5 = sub nsw i64 0, %4
%smin36 = call i64 @llvm.smin.i64(i64 %5, i64 1000)
%exit.preloop.at = call i64 @llvm.smax.i64(i64 %smin36, i64 0)
%6 = add i64 %1, -9223372036854775807
%smax38 = call i64 @llvm.smax.i64(i64 %.unbox.fr, i64 %6)
%7 = sub i64 %1, %smax38
%8 = call i64 @llvm.smin.i64(i64 %7, i64 1000)
%9 = call i64 @llvm.smax.i64(i64 %8, i64 0)
%exit.mainloop.at = select i1 %isnotneg, i64 %9, i64 0
%.not = icmp slt i64 %smin36, 1
br i1 %.not, label %preloop.pseudo.exit, label %L9.preloop.preheader
L9.preloop.preheader: ; preds = %top.split
; @ REPL[11]:6 within `simple_vector_loop`
%smax55 = call i64 @llvm.smax.i64(i64 %.unbox.fr, i64 0)
br label %L9.preloop
L9.preheader: ; preds = %preloop.pseudo.exit
%smax57 = call i64 @llvm.smax.i64(i64 %value_phi233.preloop.copy, i64 %.unbox.fr)
%10 = xor i64 %value_phi233.preloop.copy, -1
%11 = add i64 %exit.mainloop.at, %10
%12 = freeze i64 %11
%13 = shl nuw nsw i64 %value_phi233.preloop.copy, 3
%14 = call i64 @llvm.usub.sat.i64(i64 %2, i64 %13)
%15 = lshr exact i64 %14, 3
%16 = sub i64 %smax57, %value_phi233.preloop.copy
%umin = call i64 @llvm.umin.i64(i64 %15, i64 %16)
%umin90 = call i64 @llvm.umin.i64(i64 %12, i64 %umin)
%min.iters.check = icmp ult i64 %umin90, 32
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %L9.preheader
%17 = add nuw nsw i64 %umin90, 1
%n.mod.vf = and i64 %17, 31
%18 = icmp eq i64 %n.mod.vf, 0
%19 = select i1 %18, i64 32, i64 %n.mod.vf
%n.vec = sub nsw i64 %17, %19
%ind.end = add i64 %value_phi233.preloop.copy, %n.vec
%.splatinsert = insertelement <8 x i64> poison, i64 %value_phi233.preloop.copy, i64 0
%.splat = shufflevector <8 x i64> %.splatinsert, <8 x i64> poison, <8 x i32> zeroinitializer
%induction = add <8 x i64> %.splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <8 x i64> [ %induction, %vector.ph ], [ %vec.ind.next, %vector.body ]
%offset.idx = add i64 %value_phi233.preloop.copy, %index
; @ REPL[11]:5 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%20 = add nuw nsw <8 x i64> %vec.ind, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%21 = add <8 x i64> %vec.ind, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
%22 = add <8 x i64> %vec.ind, <i64 17, i64 17, i64 17, i64 17, i64 17, i64 17, i64 17, i64 17>
%23 = add <8 x i64> %vec.ind, <i64 25, i64 25, i64 25, i64 25, i64 25, i64 25, i64 25, i64 25>
; └
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:213 within `setindex!`
%24 = getelementptr inbounds i64, ptr %0, i64 %offset.idx
store <8 x i64> %20, ptr %24, align 8
%25 = getelementptr inbounds i64, ptr %24, i64 8
store <8 x i64> %21, ptr %25, align 8
%26 = getelementptr inbounds i64, ptr %24, i64 16
store <8 x i64> %22, ptr %26, align 8
%27 = getelementptr inbounds i64, ptr %24, i64 24
store <8 x i64> %23, ptr %27, align 8
%index.next = add nuw i64 %index, 32
%vec.ind.next = add <8 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%28 = icmp eq i64 %index.next, %n.vec
br i1 %28, label %scalar.ph, label %vector.body
scalar.ph: ; preds = %vector.body, %L9.preheader
%bc.resume.val = phi i64 [ %value_phi233.preloop.copy, %L9.preheader ], [ %ind.end, %vector.body ]
; └
; @ REPL[11]:6 within `simple_vector_loop`
br label %L9
L9: ; preds = %idxend, %scalar.ph
%value_phi233 = phi i64 [ %30, %idxend ], [ %bc.resume.val, %scalar.ph ]
%value_phi32 = phi i64 [ %29, %idxend ], [ %bc.resume.val, %scalar.ph ]
; @ REPL[11]:5 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%29 = add nuw nsw i64 %value_phi32, 1
; └
; @ REPL[11]:6 within `simple_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond58 = icmp eq i64 %value_phi233, %smax57
; └
br i1 %exitcond58, label %L20, label %L13
L13: ; preds = %L9
; @ REPL[11]:7 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%30 = add nuw nsw i64 %value_phi233, 1
; └
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%.idx69 = shl nsw i64 %value_phi233, 3
%31 = icmp ult i64 %.idx69, %2
br i1 %31, label %idxend, label %oob
L20: ; preds = %L9.postloop, %L9.preloop, %L9, %top
; └
; @ REPL[11]:10 within `simple_vector_loop`
call void @j_error_7647(ptr nonnull @"jl_global#7648.jit") #5
unreachable
L22: ; preds = %idxend.postloop, %preloop.exit.selector, %main.exit.selector
; @ REPL[11]:13 within `simple_vector_loop`
; ┌ @ essentials.jl:375 within `getindex`
%32 = add i64 %.unbox.fr, -1
%33 = add i64 %32, %.unbox.fr
%.not26 = icmp ult i64 %33, %1
%.idx68 = shl nsw i64 %32, 3
%34 = icmp ult i64 %.idx68, %2
%35 = and i1 %.not26, %34
br i1 %35, label %idxend22, label %oob17
oob: ; preds = %L13.postloop, %L13.preloop, %L13
; └
; @ REPL[11]:7 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%.lcssa30 = phi i64 [ %50, %L13.postloop ], [ %30, %L13 ], [ %42, %L13.preloop ]
; └
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"box::GenericMemoryRef" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 133184687382352) #10
%"box::GenericMemoryRef.tag_addr" = getelementptr inbounds i64, ptr %"box::GenericMemoryRef", i64 -1
store atomic i64 133184687382352, ptr %"box::GenericMemoryRef.tag_addr" unordered, align 8
store ptr %0, ptr %"box::GenericMemoryRef", align 8
%"box::GenericMemoryRef.repack24" = getelementptr inbounds { ptr, ptr }, ptr %"box::GenericMemoryRef", i64 0, i32 1
store ptr %"a::SimpleVector.mem", ptr %"box::GenericMemoryRef.repack24", align 8
call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef", i64 %.lcssa30)
unreachable
idxend: ; preds = %L13
; │ @ genericmemory.jl:213 within `setindex!`
%36 = getelementptr inbounds i64, ptr %0, i64 %value_phi233
store i64 %29, ptr %36, align 8
; └
; @ REPL[11]:4 within `simple_vector_loop`
%.not70 = icmp ult i64 %29, %exit.mainloop.at
br i1 %.not70, label %L9, label %main.exit.selector
main.exit.selector: ; preds = %idxend
%37 = icmp slt i64 %value_phi32, 999
br i1 %37, label %main.pseudo.exit, label %L22
main.pseudo.exit: ; preds = %preloop.pseudo.exit, %main.exit.selector
%value_phi233.copy = phi i64 [ %value_phi233.preloop.copy, %preloop.pseudo.exit ], [ %30, %main.exit.selector ]
%value_phi32.copy = phi i64 [ %value_phi233.preloop.copy, %preloop.pseudo.exit ], [ %29, %main.exit.selector ]
%smax54 = call i64 @llvm.smax.i64(i64 %value_phi32.copy, i64 %.unbox.fr)
%38 = add i64 %value_phi233.copy, %smax54
%39 = sub i64 %38, %value_phi32.copy
br label %L9.postloop
oob17: ; preds = %L22
; @ REPL[11]:13 within `simple_vector_loop`
; ┌ @ essentials.jl:375 within `getindex`
%ptls_field123 = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load124 = load ptr, ptr %ptls_field123, align 8
%"box::GenericMemoryRef21" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load124, i32 800, i32 32, i64 133184687382352) #10
%"box::GenericMemoryRef21.tag_addr" = getelementptr inbounds i64, ptr %"box::GenericMemoryRef21", i64 -1
store atomic i64 133184687382352, ptr %"box::GenericMemoryRef21.tag_addr" unordered, align 8
store ptr %0, ptr %"box::GenericMemoryRef21", align 8
%"box::GenericMemoryRef21.repack27" = getelementptr inbounds { ptr, ptr }, ptr %"box::GenericMemoryRef21", i64 0, i32 1
store ptr %"a::SimpleVector.mem", ptr %"box::GenericMemoryRef21.repack27", align 8
call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef21", i64 %.unbox.fr)
unreachable
idxend22: ; preds = %L22
%40 = getelementptr inbounds i64, ptr %0, i64 %32
%41 = load i64, ptr %40, align 8
; └
ret i64 %41
L9.preloop: ; preds = %idxend.preloop, %L9.preloop.preheader
%value_phi233.preloop = phi i64 [ %42, %idxend.preloop ], [ 0, %L9.preloop.preheader ]
; @ REPL[11]:7 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%42 = add nuw nsw i64 %value_phi233.preloop, 1
; └
; @ REPL[11]:6 within `simple_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond56 = icmp eq i64 %value_phi233.preloop, %smax55
; └
br i1 %exitcond56, label %L20, label %L13.preloop
L13.preloop: ; preds = %L9.preloop
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%43 = add nuw i64 %.unbox.fr, %value_phi233.preloop
%.not23.preloop = icmp ult i64 %43, %1
%.idx71 = shl nsw i64 %value_phi233.preloop, 3
%44 = icmp ult i64 %.idx71, %2
%45 = and i1 %.not23.preloop, %44
br i1 %45, label %idxend.preloop, label %oob
idxend.preloop: ; preds = %L13.preloop
; │ @ genericmemory.jl:213 within `setindex!`
%46 = getelementptr inbounds i64, ptr %0, i64 %value_phi233.preloop
store i64 %42, ptr %46, align 8
; └
; @ REPL[11]:4 within `simple_vector_loop`
%.not72 = icmp ult i64 %42, %exit.preloop.at
br i1 %.not72, label %L9.preloop, label %preloop.exit.selector
preloop.exit.selector: ; preds = %idxend.preloop
%47 = icmp ult i64 %value_phi233.preloop, 999
br i1 %47, label %preloop.pseudo.exit, label %L22
preloop.pseudo.exit: ; preds = %preloop.exit.selector, %top.split
%value_phi233.preloop.copy = phi i64 [ 0, %top.split ], [ %42, %preloop.exit.selector ]
%48 = icmp slt i64 %value_phi233.preloop.copy, %exit.mainloop.at
br i1 %48, label %L9.preheader, label %main.pseudo.exit
L9.postloop: ; preds = %idxend.postloop, %main.pseudo.exit
%value_phi233.postloop = phi i64 [ %value_phi233.copy, %main.pseudo.exit ], [ %50, %idxend.postloop ]
%value_phi32.postloop = phi i64 [ %value_phi32.copy, %main.pseudo.exit ], [ %49, %idxend.postloop ]
; @ REPL[11]:5 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%49 = add nuw nsw i64 %value_phi32.postloop, 1
; └
; @ REPL[11]:6 within `simple_vector_loop`
; ┌ @ int.jl:514 within `<=`
%exitcond = icmp eq i64 %value_phi233.postloop, %39
; └
br i1 %exitcond, label %L20, label %L13.postloop
L13.postloop: ; preds = %L9.postloop
; @ REPL[11]:7 within `simple_vector_loop`
; ┌ @ int.jl:87 within `+`
%50 = add nuw nsw i64 %value_phi233.postloop, 1
; └
; @ REPL[11]:8 within `simple_vector_loop`
; ┌ @ genericmemory.jl:212 within `setindex!`
%51 = add i64 %.unbox.fr, %value_phi233.postloop
%.not23.postloop = icmp ult i64 %51, %1
%.idx = shl nsw i64 %value_phi233.postloop, 3
%52 = icmp ult i64 %.idx, %2
%53 = and i1 %.not23.postloop, %52
br i1 %53, label %idxend.postloop, label %oob
idxend.postloop: ; preds = %L13.postloop
; │ @ genericmemory.jl:213 within `setindex!`
%54 = getelementptr inbounds i64, ptr %0, i64 %value_phi233.postloop
store i64 %49, ptr %54, align 8
; └
; @ REPL[11]:4 within `simple_vector_loop`
; ┌ @ int.jl:83 within `<`
%55 = icmp ugt i64 %value_phi32.postloop, 998
; └
br i1 %55, label %L22, label %L9.postloop
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment