Skip to content

Instantly share code, notes, and snippets.

@nlw0
Last active April 18, 2019 21:23
Show Gist options
  • Save nlw0/dc88109dfd7ad9073e5ae1a64d7427f6 to your computer and use it in GitHub Desktop.
Save nlw0/dc88109dfd7ad9073e5ae1a64d7427f6 to your computer and use it in GitHub Desktop.
LLVM passes from the min and sum reductions with float and i32 in Julia
*** IR Dump After Demanded bits analysis ***
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34
%11 = icmp sgt i64 %10, 1, !dbg !36
%12 = select i1 %11, i64 %10, i64 1, !dbg !42
%13 = add nsw i64 %12, -2, !dbg !49
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57
%17 = zext i1 %16 to i8, !dbg !57
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57
br i1 %16, label %L38, label %L42, !dbg !61
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61
call void @llvm.trap(), !dbg !61
unreachable, !dbg !61
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73
%20 = icmp slt i64 %19, 1, !dbg !68
br i1 %20, label %L80, label %L45, !dbg !72
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73
%24 = shl i64 %23, 3, !dbg !79
%25 = and i64 %23, 1152921504606846976, !dbg !83
%26 = icmp eq i64 %25, 0, !dbg !83
br i1 %26, label %L70, label %L62, !dbg !85
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85
call void @llvm.trap(), !dbg !85
unreachable, !dbg !85
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102
%37 = ptrtoint i8* %36 to i64, !dbg !102
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105
br label %L80, !dbg !106
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112
%43 = icmp slt i64 %42, 1, !dbg !114
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)*
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4
br label %L88, !dbg !116
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ]
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ]
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124
%exitcond = icmp eq i64 %49, %42, !dbg !127
br i1 %exitcond, label %L97, label %L88, !dbg !116, !llvm.loop !128
L97: ; preds = %L88, %L80
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L88 ]
ret double %value_phi4, !dbg !129
}
*** IR Dump After Loop Vectorization ***
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34
%11 = icmp sgt i64 %10, 1, !dbg !36
%12 = select i1 %11, i64 %10, i64 1, !dbg !42
%13 = add nsw i64 %12, -2, !dbg !49
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57
%17 = zext i1 %16 to i8, !dbg !57
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57
br i1 %16, label %L38, label %L42, !dbg !61
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61
call void @llvm.trap(), !dbg !61
unreachable, !dbg !61
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73
%20 = icmp slt i64 %19, 1, !dbg !68
br i1 %20, label %L80, label %L45, !dbg !72
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73
%24 = shl i64 %23, 3, !dbg !79
%25 = and i64 %23, 1152921504606846976, !dbg !83
%26 = icmp eq i64 %25, 0, !dbg !83
br i1 %26, label %L70, label %L62, !dbg !85
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85
call void @llvm.trap(), !dbg !85
unreachable, !dbg !85
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102
%37 = ptrtoint i8* %36 to i64, !dbg !102
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105
br label %L80, !dbg !106
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112
%43 = icmp slt i64 %42, 1, !dbg !114
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)*
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4
br label %L88, !dbg !116
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ]
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ]
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124
%exitcond = icmp eq i64 %49, %42, !dbg !127
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !116, !llvm.loop !128
L97.loopexit: ; preds = %L88
%.lcssa = phi double [ %48, %L88 ]
br label %L97, !dbg !129
L97: ; preds = %L97.loopexit, %L80
%value_phi4 = phi double [ %6, %L80 ], [ %.lcssa, %L97.loopexit ]
ret double %value_phi4, !dbg !129
}
*** IR Dump After Combine redundant instructions ***
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34
%11 = icmp sgt i64 %10, 1, !dbg !36
%12 = select i1 %11, i64 %10, i64 1, !dbg !42
%13 = add nsw i64 %12, -2, !dbg !49
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57
%17 = zext i1 %16 to i8, !dbg !57
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57
br i1 %16, label %L38, label %L42, !dbg !61
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61
call void @llvm.trap(), !dbg !61
unreachable, !dbg !61
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73
%20 = icmp slt i64 %19, 1, !dbg !68
br i1 %20, label %L80, label %L45, !dbg !72
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73
%24 = shl i64 %23, 3, !dbg !79
%25 = and i64 %23, 1152921504606846976, !dbg !83
%26 = icmp eq i64 %25, 0, !dbg !83
br i1 %26, label %L70, label %L62, !dbg !85
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85
call void @llvm.trap(), !dbg !85
unreachable, !dbg !85
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102
%37 = ptrtoint i8* %36 to i64, !dbg !102
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105
br label %L80, !dbg !106
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112
%43 = icmp slt i64 %42, 1, !dbg !114
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)*
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4
br label %L88, !dbg !116
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ]
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ]
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124
%exitcond = icmp eq i64 %49, %42, !dbg !127
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !116, !llvm.loop !128
L97.loopexit: ; preds = %L88
br label %L97, !dbg !129
L97: ; preds = %L97.loopexit, %L80
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L97.loopexit ]
ret double %value_phi4, !dbg !129
}
*** IR Dump After Demanded bits analysis ***
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33
%11 = icmp sgt i64 %10, 1, !dbg !35
%12 = select i1 %11, i64 %10, i64 1, !dbg !41
%13 = add nsw i64 %12, -2, !dbg !48
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56
%17 = zext i1 %16 to i8, !dbg !56
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56
br i1 %16, label %L38, label %L42, !dbg !60
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347041104 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60
call void @llvm.trap(), !dbg !60
unreachable, !dbg !60
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139756632138832 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426689936 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72
%20 = icmp slt i64 %19, 1, !dbg !67
br i1 %20, label %L80, label %L45, !dbg !71
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72
%24 = shl i64 %23, 3, !dbg !78
%25 = and i64 %23, 1152921504606846976, !dbg !82
%26 = icmp eq i64 %25, 0, !dbg !82
br i1 %26, label %L70, label %L62, !dbg !84
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426542672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426543424 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347044096 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756349605872 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84
call void @llvm.trap(), !dbg !84
unreachable, !dbg !84
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101
%37 = ptrtoint i8* %36 to i64, !dbg !101
%38 = call i64 inttoptr (i64 139756622757376 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104
br label %L80, !dbg !105
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111
%43 = icmp slt i64 %42, 1, !dbg !113
br i1 %43, label %L97, label %L88.lr.ph, !dbg !114
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)*
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4
br label %L88, !dbg !115
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %50, %L88 ]
%value_phi16 = phi i64 [ %6, %L88.lr.ph ], [ %49, %L88 ]
%46 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !116
%47 = load i64, i64 addrspace(13)* %46, align 8, !dbg !116, !tbaa !15
%48 = icmp slt i64 %value_phi16, %47, !dbg !120
%49 = select i1 %48, i64 %value_phi16, i64 %47, !dbg !124
%50 = add nuw nsw i64 %value_phi317, 1, !dbg !127
%exitcond = icmp eq i64 %50, %42, !dbg !130
br i1 %exitcond, label %L97, label %L88, !dbg !115, !llvm.loop !131
L97: ; preds = %L88, %L80
%value_phi4 = phi i64 [ %6, %L80 ], [ %49, %L88 ]
ret i64 %value_phi4, !dbg !132
}
*** IR Dump After Loop Vectorization ***
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33
%11 = icmp sgt i64 %10, 1, !dbg !35
%12 = select i1 %11, i64 %10, i64 1, !dbg !41
%13 = add nsw i64 %12, -2, !dbg !48
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56
%17 = zext i1 %16 to i8, !dbg !56
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56
br i1 %16, label %L38, label %L42, !dbg !60
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347041104 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60
call void @llvm.trap(), !dbg !60
unreachable, !dbg !60
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139756632138832 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426689936 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72
%20 = icmp slt i64 %19, 1, !dbg !67
br i1 %20, label %L80, label %L45, !dbg !71
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72
%24 = shl i64 %23, 3, !dbg !78
%25 = and i64 %23, 1152921504606846976, !dbg !82
%26 = icmp eq i64 %25, 0, !dbg !82
br i1 %26, label %L70, label %L62, !dbg !84
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426542672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426543424 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347044096 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756349605872 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84
call void @llvm.trap(), !dbg !84
unreachable, !dbg !84
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101
%37 = ptrtoint i8* %36 to i64, !dbg !101
%38 = call i64 inttoptr (i64 139756622757376 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104
br label %L80, !dbg !105
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111
%43 = icmp slt i64 %42, 1, !dbg !113
br i1 %43, label %L97, label %L88.lr.ph, !dbg !114
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)*
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4
%min.iters.check = icmp ult i64 %42, 16, !dbg !115
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !115
vector.ph: ; preds = %L88.lr.ph
%n.mod.vf = urem i64 %42, 16, !dbg !115
%n.vec = sub i64 %42, %n.mod.vf, !dbg !115
%minmax.ident.splatinsert = insertelement <4 x i64> undef, i64 %6, i32 0, !dbg !115
%minmax.ident.splat = shufflevector <4 x i64> %minmax.ident.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !115
br label %vector.body, !dbg !115
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !116
%vec.phi = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %67, %vector.body ]
%vec.phi21 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %68, %vector.body ]
%vec.phi22 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %69, %vector.body ]
%vec.phi23 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %70, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !116
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !116
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !116
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !116
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !116
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !116
%46 = add i64 %index, 0, !dbg !116
%47 = add i64 %index, 4, !dbg !116
%48 = add i64 %index, 8, !dbg !116
%49 = add i64 %index, 12, !dbg !116
%50 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %46, !dbg !119
%51 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %47, !dbg !119
%52 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %48, !dbg !119
%53 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %49, !dbg !119
%54 = getelementptr i64, i64 addrspace(13)* %50, i32 0, !dbg !119
%55 = bitcast i64 addrspace(13)* %54 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load = load <4 x i64>, <4 x i64> addrspace(13)* %55, align 8, !dbg !119, !tbaa !15
%56 = getelementptr i64, i64 addrspace(13)* %50, i32 4, !dbg !119
%57 = bitcast i64 addrspace(13)* %56 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load24 = load <4 x i64>, <4 x i64> addrspace(13)* %57, align 8, !dbg !119, !tbaa !15
%58 = getelementptr i64, i64 addrspace(13)* %50, i32 8, !dbg !119
%59 = bitcast i64 addrspace(13)* %58 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load25 = load <4 x i64>, <4 x i64> addrspace(13)* %59, align 8, !dbg !119, !tbaa !15
%60 = getelementptr i64, i64 addrspace(13)* %50, i32 12, !dbg !119
%61 = bitcast i64 addrspace(13)* %60 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load26 = load <4 x i64>, <4 x i64> addrspace(13)* %61, align 8, !dbg !119, !tbaa !15
%62 = icmp slt <4 x i64> %vec.phi, %wide.load, !dbg !123
%63 = icmp slt <4 x i64> %vec.phi21, %wide.load24, !dbg !123
%64 = icmp slt <4 x i64> %vec.phi22, %wide.load25, !dbg !123
%65 = icmp slt <4 x i64> %vec.phi23, %wide.load26, !dbg !123
%66 = extractelement <4 x i1> %62, i32 0, !dbg !127
%67 = select <4 x i1> %62, <4 x i64> %vec.phi, <4 x i64> %wide.load, !dbg !127
%68 = select <4 x i1> %63, <4 x i64> %vec.phi21, <4 x i64> %wide.load24, !dbg !127
%69 = select <4 x i1> %64, <4 x i64> %vec.phi22, <4 x i64> %wide.load25, !dbg !127
%70 = select <4 x i1> %65, <4 x i64> %vec.phi23, <4 x i64> %wide.load26, !dbg !127
%index.next = add i64 %index, 16, !dbg !116
%71 = icmp eq i64 %index.next, %n.vec, !dbg !116
br i1 %71, label %middle.block, label %vector.body, !dbg !116, !llvm.loop !130
middle.block: ; preds = %vector.body
%rdx.minmax.cmp = icmp slt <4 x i64> %67, %68, !dbg !127
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i64> %67, <4 x i64> %68, !dbg !127
%rdx.minmax.cmp27 = icmp slt <4 x i64> %rdx.minmax.select, %69, !dbg !127
%rdx.minmax.select28 = select <4 x i1> %rdx.minmax.cmp27, <4 x i64> %rdx.minmax.select, <4 x i64> %69, !dbg !127
%rdx.minmax.cmp29 = icmp slt <4 x i64> %rdx.minmax.select28, %70, !dbg !127
%rdx.minmax.select30 = select <4 x i1> %rdx.minmax.cmp29, <4 x i64> %rdx.minmax.select28, <4 x i64> %70, !dbg !127
%rdx.shuf = shufflevector <4 x i64> %rdx.minmax.select30, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !127
%rdx.minmax.cmp31 = icmp slt <4 x i64> %rdx.minmax.select30, %rdx.shuf, !dbg !127
%rdx.minmax.select32 = select <4 x i1> %rdx.minmax.cmp31, <4 x i64> %rdx.minmax.select30, <4 x i64> %rdx.shuf, !dbg !127
%rdx.shuf33 = shufflevector <4 x i64> %rdx.minmax.select32, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !127
%rdx.minmax.cmp34 = icmp slt <4 x i64> %rdx.minmax.select32, %rdx.shuf33, !dbg !127
%rdx.minmax.select35 = select <4 x i1> %rdx.minmax.cmp34, <4 x i64> %rdx.minmax.select32, <4 x i64> %rdx.shuf33, !dbg !127
%72 = extractelement <4 x i64> %rdx.minmax.select35, i32 0, !dbg !127
%cmp.n = icmp eq i64 %42, %n.vec
br i1 %cmp.n, label %L97.loopexit, label %scalar.ph, !dbg !115
scalar.ph: ; preds = %middle.block, %L88.lr.ph
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ]
%bc.merge.rdx = phi i64 [ %6, %L88.lr.ph ], [ %72, %middle.block ]
br label %L88, !dbg !115
L88: ; preds = %L88, %scalar.ph
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %77, %L88 ]
%value_phi16 = phi i64 [ %bc.merge.rdx, %scalar.ph ], [ %76, %L88 ]
%73 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !119
%74 = load i64, i64 addrspace(13)* %73, align 8, !dbg !119, !tbaa !15
%75 = icmp slt i64 %value_phi16, %74, !dbg !123
%76 = select i1 %75, i64 %value_phi16, i64 %74, !dbg !127
%77 = add nuw nsw i64 %value_phi317, 1, !dbg !116
%exitcond = icmp eq i64 %77, %42, !dbg !132
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !115, !llvm.loop !133
L97.loopexit: ; preds = %middle.block, %L88
%.lcssa = phi i64 [ %76, %L88 ], [ %72, %middle.block ]
br label %L97, !dbg !135
L97: ; preds = %L97.loopexit, %L80
%value_phi4 = phi i64 [ %6, %L80 ], [ %.lcssa, %L97.loopexit ]
ret i64 %value_phi4, !dbg !135
}
*** IR Dump After Demanded bits analysis ***
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34
%11 = icmp sgt i64 %10, 1, !dbg !36
%12 = select i1 %11, i64 %10, i64 1, !dbg !42
%13 = add nsw i64 %12, -2, !dbg !49
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57
%17 = zext i1 %16 to i8, !dbg !57
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57
br i1 %16, label %L38, label %L42, !dbg !61
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062258000 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61
call void @llvm.trap(), !dbg !61
unreachable, !dbg !61
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139889347355728 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889154398336 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73
%20 = icmp slt i64 %19, 1, !dbg !68
br i1 %20, label %L80, label %L45, !dbg !72
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73
%24 = shl i64 %23, 3, !dbg !79
%25 = and i64 %23, 1152921504606846976, !dbg !83
%26 = icmp eq i64 %25, 0, !dbg !83
br i1 %26, label %L70, label %L62, !dbg !85
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141759568 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141760320 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062260992 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889064822768 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85
call void @llvm.trap(), !dbg !85
unreachable, !dbg !85
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102
%37 = ptrtoint i8* %36 to i64, !dbg !102
%38 = call i64 inttoptr (i64 139889337974272 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105
br label %L80, !dbg !106
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112
%43 = icmp slt i64 %42, 1, !dbg !114
br i1 %43, label %L96, label %L88.lr.ph, !dbg !115
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)*
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4
br label %L88, !dbg !116
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ]
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ]
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17
%48 = fadd fast double %47, %value_phi16, !dbg !121
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !127
%exitcond = icmp eq i64 %49, %42, !dbg !130
br i1 %exitcond, label %L96, label %L88, !dbg !116, !llvm.loop !131
L96: ; preds = %L88, %L80
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L88 ]
ret double %value_phi4, !dbg !132
}
*** IR Dump After Loop Vectorization ***
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34
%11 = icmp sgt i64 %10, 1, !dbg !36
%12 = select i1 %11, i64 %10, i64 1, !dbg !42
%13 = add nsw i64 %12, -2, !dbg !49
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57
%17 = zext i1 %16 to i8, !dbg !57
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57
br i1 %16, label %L38, label %L42, !dbg !61
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062258000 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61
call void @llvm.trap(), !dbg !61
unreachable, !dbg !61
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139889347355728 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889154398336 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73
%20 = icmp slt i64 %19, 1, !dbg !68
br i1 %20, label %L80, label %L45, !dbg !72
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73
%24 = shl i64 %23, 3, !dbg !79
%25 = and i64 %23, 1152921504606846976, !dbg !83
%26 = icmp eq i64 %25, 0, !dbg !83
br i1 %26, label %L70, label %L62, !dbg !85
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141759568 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141760320 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062260992 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889064822768 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85
call void @llvm.trap(), !dbg !85
unreachable, !dbg !85
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102
%37 = ptrtoint i8* %36 to i64, !dbg !102
%38 = call i64 inttoptr (i64 139889337974272 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105
br label %L80, !dbg !106
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112
%43 = icmp slt i64 %42, 1, !dbg !114
br i1 %43, label %L96, label %L88.lr.ph, !dbg !115
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)*
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4
%min.iters.check = icmp ult i64 %42, 16, !dbg !116
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !116
vector.ph: ; preds = %L88.lr.ph
%n.mod.vf = urem i64 %42, 16, !dbg !116
%n.vec = sub i64 %42, %n.mod.vf, !dbg !116
%46 = insertelement <4 x double> zeroinitializer, double %6, i32 0, !dbg !116
br label %vector.body, !dbg !116
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !117
%vec.phi = phi <4 x double> [ %46, %vector.ph ], [ %63, %vector.body ]
%vec.phi21 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %64, %vector.body ]
%vec.phi22 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %65, %vector.body ]
%vec.phi23 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %66, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !117
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !117
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !117
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !117
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !117
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !117
%47 = add i64 %index, 0, !dbg !117
%48 = add i64 %index, 4, !dbg !117
%49 = add i64 %index, 8, !dbg !117
%50 = add i64 %index, 12, !dbg !117
%51 = getelementptr inbounds double, double addrspace(13)* %45, i64 %47, !dbg !120
%52 = getelementptr inbounds double, double addrspace(13)* %45, i64 %48, !dbg !120
%53 = getelementptr inbounds double, double addrspace(13)* %45, i64 %49, !dbg !120
%54 = getelementptr inbounds double, double addrspace(13)* %45, i64 %50, !dbg !120
%55 = getelementptr double, double addrspace(13)* %51, i32 0, !dbg !120
%56 = bitcast double addrspace(13)* %55 to <4 x double> addrspace(13)*, !dbg !120
%wide.load = load <4 x double>, <4 x double> addrspace(13)* %56, align 8, !dbg !120, !tbaa !17
%57 = getelementptr double, double addrspace(13)* %51, i32 4, !dbg !120
%58 = bitcast double addrspace(13)* %57 to <4 x double> addrspace(13)*, !dbg !120
%wide.load24 = load <4 x double>, <4 x double> addrspace(13)* %58, align 8, !dbg !120, !tbaa !17
%59 = getelementptr double, double addrspace(13)* %51, i32 8, !dbg !120
%60 = bitcast double addrspace(13)* %59 to <4 x double> addrspace(13)*, !dbg !120
%wide.load25 = load <4 x double>, <4 x double> addrspace(13)* %60, align 8, !dbg !120, !tbaa !17
%61 = getelementptr double, double addrspace(13)* %51, i32 12, !dbg !120
%62 = bitcast double addrspace(13)* %61 to <4 x double> addrspace(13)*, !dbg !120
%wide.load26 = load <4 x double>, <4 x double> addrspace(13)* %62, align 8, !dbg !120, !tbaa !17
%63 = fadd fast <4 x double> %wide.load, %vec.phi, !dbg !124
%64 = fadd fast <4 x double> %wide.load24, %vec.phi21, !dbg !124
%65 = fadd fast <4 x double> %wide.load25, %vec.phi22, !dbg !124
%66 = fadd fast <4 x double> %wide.load26, %vec.phi23, !dbg !124
%index.next = add i64 %index, 16, !dbg !117
%67 = icmp eq i64 %index.next, %n.vec, !dbg !117
br i1 %67, label %middle.block, label %vector.body, !dbg !117, !llvm.loop !130
middle.block: ; preds = %vector.body
%bin.rdx = fadd fast <4 x double> %64, %63, !dbg !124
%bin.rdx27 = fadd fast <4 x double> %65, %bin.rdx, !dbg !124
%bin.rdx28 = fadd fast <4 x double> %66, %bin.rdx27, !dbg !124
%rdx.shuf = shufflevector <4 x double> %bin.rdx28, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !124
%bin.rdx29 = fadd fast <4 x double> %bin.rdx28, %rdx.shuf, !dbg !124
%rdx.shuf30 = shufflevector <4 x double> %bin.rdx29, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !124
%bin.rdx31 = fadd fast <4 x double> %bin.rdx29, %rdx.shuf30, !dbg !124
%68 = extractelement <4 x double> %bin.rdx31, i32 0, !dbg !124
%cmp.n = icmp eq i64 %42, %n.vec
br i1 %cmp.n, label %L96.loopexit, label %scalar.ph, !dbg !116
scalar.ph: ; preds = %middle.block, %L88.lr.ph
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ]
%bc.merge.rdx = phi double [ %6, %L88.lr.ph ], [ %68, %middle.block ]
br label %L88, !dbg !116
L88: ; preds = %L88, %scalar.ph
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %72, %L88 ]
%value_phi16 = phi double [ %bc.merge.rdx, %scalar.ph ], [ %71, %L88 ]
%69 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !120
%70 = load double, double addrspace(13)* %69, align 8, !dbg !120, !tbaa !17
%71 = fadd fast double %70, %value_phi16, !dbg !124
%72 = add nuw nsw i64 %value_phi317, 1, !dbg !117
%exitcond = icmp eq i64 %72, %42, !dbg !132
br i1 %exitcond, label %L96.loopexit, label %L88, !dbg !116, !llvm.loop !133
L96.loopexit: ; preds = %middle.block, %L88
%.lcssa = phi double [ %71, %L88 ], [ %68, %middle.block ]
br label %L96, !dbg !135
L96: ; preds = %L96.loopexit, %L80
%value_phi4 = phi double [ %6, %L80 ], [ %.lcssa, %L96.loopexit ]
ret double %value_phi4, !dbg !135
}
*** IR Dump After Demanded bits analysis ***
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33
%11 = icmp sgt i64 %10, 1, !dbg !35
%12 = select i1 %11, i64 %10, i64 1, !dbg !41
%13 = add nsw i64 %12, -2, !dbg !48
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56
%17 = zext i1 %16 to i8, !dbg !56
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56
br i1 %16, label %L38, label %L42, !dbg !60
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120053584 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60
call void @llvm.trap(), !dbg !60
unreachable, !dbg !60
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139991405151312 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199702416 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72
%20 = icmp slt i64 %19, 1, !dbg !67
br i1 %20, label %L80, label %L45, !dbg !71
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72
%24 = shl i64 %23, 3, !dbg !78
%25 = and i64 %23, 1152921504606846976, !dbg !82
%26 = icmp eq i64 %25, 0, !dbg !82
br i1 %26, label %L70, label %L62, !dbg !84
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555152 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555904 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120056576 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991122626544 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84
call void @llvm.trap(), !dbg !84
unreachable, !dbg !84
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101
%37 = ptrtoint i8* %36 to i64, !dbg !101
%38 = call i64 inttoptr (i64 139991395769856 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104
br label %L80, !dbg !105
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111
%43 = icmp slt i64 %42, 1, !dbg !113
br i1 %43, label %L96, label %L88.lr.ph, !dbg !114
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)*
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4
br label %L88, !dbg !115
L88: ; preds = %L88, %L88.lr.ph
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ]
%value_phi16 = phi i64 [ %6, %L88.lr.ph ], [ %48, %L88 ]
%46 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !116
%47 = load i64, i64 addrspace(13)* %46, align 8, !dbg !116, !tbaa !15
%48 = add i64 %47, %value_phi16, !dbg !120
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !128
%exitcond = icmp eq i64 %49, %42, !dbg !130
br i1 %exitcond, label %L96, label %L88, !dbg !115, !llvm.loop !131
L96: ; preds = %L88, %L80
%value_phi4 = phi i64 [ %6, %L80 ], [ %48, %L88 ]
ret i64 %value_phi4, !dbg !132
}
*** IR Dump After Loop Vectorization ***
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 {
top:
%1 = alloca { i64, i8 }, align 8
%2 = call %jl_value_t*** @julia.ptls_states()
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33
%11 = icmp sgt i64 %10, 1, !dbg !35
%12 = select i1 %11, i64 %10, i64 1, !dbg !41
%13 = add nsw i64 %12, -2, !dbg !48
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56
%17 = zext i1 %16 to i8, !dbg !56
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56
br i1 %16, label %L38, label %L42, !dbg !60
L38: ; preds = %top
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120053584 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60
call void @llvm.trap(), !dbg !60
unreachable, !dbg !60
L42: ; preds = %top
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139991405151312 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199702416 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72
%20 = icmp slt i64 %19, 1, !dbg !67
br i1 %20, label %L80, label %L45, !dbg !71
L45: ; preds = %L42
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72
%24 = shl i64 %23, 3, !dbg !78
%25 = and i64 %23, 1152921504606846976, !dbg !82
%26 = icmp eq i64 %25, 0, !dbg !82
br i1 %26, label %L70, label %L62, !dbg !84
L62: ; preds = %L45
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555152 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555904 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120056576 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991122626544 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84
call void @llvm.trap(), !dbg !84
unreachable, !dbg !84
L70: ; preds = %L45
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101
%37 = ptrtoint i8* %36 to i64, !dbg !101
%38 = call i64 inttoptr (i64 139991395769856 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104
br label %L80, !dbg !105
L80: ; preds = %L70, %L42
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111
%43 = icmp slt i64 %42, 1, !dbg !113
br i1 %43, label %L96, label %L88.lr.ph, !dbg !114
L88.lr.ph: ; preds = %L80
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)*
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4
%min.iters.check = icmp ult i64 %42, 16, !dbg !115
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !115
vector.ph: ; preds = %L88.lr.ph
%n.mod.vf = urem i64 %42, 16, !dbg !115
%n.vec = sub i64 %42, %n.mod.vf, !dbg !115
%46 = insertelement <4 x i64> zeroinitializer, i64 %6, i32 0, !dbg !115
br label %vector.body, !dbg !115
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !116
%vec.phi = phi <4 x i64> [ %46, %vector.ph ], [ %63, %vector.body ]
%vec.phi21 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %64, %vector.body ]
%vec.phi22 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %65, %vector.body ]
%vec.phi23 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %66, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !116
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !116
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !116
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !116
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !116
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !116
%47 = add i64 %index, 0, !dbg !116
%48 = add i64 %index, 4, !dbg !116
%49 = add i64 %index, 8, !dbg !116
%50 = add i64 %index, 12, !dbg !116
%51 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %47, !dbg !119
%52 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %48, !dbg !119
%53 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %49, !dbg !119
%54 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %50, !dbg !119
%55 = getelementptr i64, i64 addrspace(13)* %51, i32 0, !dbg !119
%56 = bitcast i64 addrspace(13)* %55 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load = load <4 x i64>, <4 x i64> addrspace(13)* %56, align 8, !dbg !119, !tbaa !15
%57 = getelementptr i64, i64 addrspace(13)* %51, i32 4, !dbg !119
%58 = bitcast i64 addrspace(13)* %57 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load24 = load <4 x i64>, <4 x i64> addrspace(13)* %58, align 8, !dbg !119, !tbaa !15
%59 = getelementptr i64, i64 addrspace(13)* %51, i32 8, !dbg !119
%60 = bitcast i64 addrspace(13)* %59 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load25 = load <4 x i64>, <4 x i64> addrspace(13)* %60, align 8, !dbg !119, !tbaa !15
%61 = getelementptr i64, i64 addrspace(13)* %51, i32 12, !dbg !119
%62 = bitcast i64 addrspace(13)* %61 to <4 x i64> addrspace(13)*, !dbg !119
%wide.load26 = load <4 x i64>, <4 x i64> addrspace(13)* %62, align 8, !dbg !119, !tbaa !15
%63 = add <4 x i64> %wide.load, %vec.phi, !dbg !123
%64 = add <4 x i64> %wide.load24, %vec.phi21, !dbg !123
%65 = add <4 x i64> %wide.load25, %vec.phi22, !dbg !123
%66 = add <4 x i64> %wide.load26, %vec.phi23, !dbg !123
%index.next = add i64 %index, 16, !dbg !116
%67 = icmp eq i64 %index.next, %n.vec, !dbg !116
br i1 %67, label %middle.block, label %vector.body, !dbg !116, !llvm.loop !130
middle.block: ; preds = %vector.body
%bin.rdx = add <4 x i64> %64, %63, !dbg !123
%bin.rdx27 = add <4 x i64> %65, %bin.rdx, !dbg !123
%bin.rdx28 = add <4 x i64> %66, %bin.rdx27, !dbg !123
%rdx.shuf = shufflevector <4 x i64> %bin.rdx28, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !123
%bin.rdx29 = add <4 x i64> %bin.rdx28, %rdx.shuf, !dbg !123
%rdx.shuf30 = shufflevector <4 x i64> %bin.rdx29, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !123
%bin.rdx31 = add <4 x i64> %bin.rdx29, %rdx.shuf30, !dbg !123
%68 = extractelement <4 x i64> %bin.rdx31, i32 0, !dbg !123
%cmp.n = icmp eq i64 %42, %n.vec
br i1 %cmp.n, label %L96.loopexit, label %scalar.ph, !dbg !115
scalar.ph: ; preds = %middle.block, %L88.lr.ph
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ]
%bc.merge.rdx = phi i64 [ %6, %L88.lr.ph ], [ %68, %middle.block ]
br label %L88, !dbg !115
L88: ; preds = %L88, %scalar.ph
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %72, %L88 ]
%value_phi16 = phi i64 [ %bc.merge.rdx, %scalar.ph ], [ %71, %L88 ]
%69 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !119
%70 = load i64, i64 addrspace(13)* %69, align 8, !dbg !119, !tbaa !15
%71 = add i64 %70, %value_phi16, !dbg !123
%72 = add nuw nsw i64 %value_phi317, 1, !dbg !116
%exitcond = icmp eq i64 %72, %42, !dbg !132
br i1 %exitcond, label %L96.loopexit, label %L88, !dbg !115, !llvm.loop !133
L96.loopexit: ; preds = %middle.block, %L88
%.lcssa = phi i64 [ %71, %L88 ], [ %68, %middle.block ]
br label %L96, !dbg !135
L96: ; preds = %L96.loopexit, %L80
%value_phi4 = phi i64 [ %6, %L80 ], [ %.lcssa, %L96.loopexit ]
ret i64 %value_phi4, !dbg !135
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment