Last active
April 18, 2019 21:23
-
-
Save nlw0/dc88109dfd7ad9073e5ae1a64d7427f6 to your computer and use it in GitHub Desktop.
LLVM passes from the min and sum reductions with float and i32 in Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*** IR Dump After Demanded bits analysis *** | |
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9 | |
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4 | |
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34 | |
%11 = icmp sgt i64 %10, 1, !dbg !36 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !42 | |
%13 = add nsw i64 %12, -2, !dbg !49 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57 | |
%17 = zext i1 %16 to i8, !dbg !57 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57 | |
br i1 %16, label %L38, label %L42, !dbg !61 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61 | |
call void @llvm.trap(), !dbg !61 | |
unreachable, !dbg !61 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73 | |
%20 = icmp slt i64 %19, 1, !dbg !68 | |
br i1 %20, label %L80, label %L45, !dbg !72 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73 | |
%24 = shl i64 %23, 3, !dbg !79 | |
%25 = and i64 %23, 1152921504606846976, !dbg !83 | |
%26 = icmp eq i64 %25, 0, !dbg !83 | |
br i1 %26, label %L70, label %L62, !dbg !85 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85 | |
call void @llvm.trap(), !dbg !85 | |
unreachable, !dbg !85 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96 | |
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96 | |
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102 | |
%37 = ptrtoint i8* %36 to i64, !dbg !102 | |
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105 | |
br label %L80, !dbg !106 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112 | |
%43 = icmp slt i64 %42, 1, !dbg !114 | |
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)* | |
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4 | |
br label %L88, !dbg !116 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ] | |
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ] | |
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117 | |
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17 | |
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121 | |
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121 | |
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124 | |
%exitcond = icmp eq i64 %49, %42, !dbg !127 | |
br i1 %exitcond, label %L97, label %L88, !dbg !116, !llvm.loop !128 | |
L97: ; preds = %L88, %L80 | |
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L88 ] | |
ret double %value_phi4, !dbg !129 | |
} | |
*** IR Dump After Loop Vectorization *** | |
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9 | |
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4 | |
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34 | |
%11 = icmp sgt i64 %10, 1, !dbg !36 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !42 | |
%13 = add nsw i64 %12, -2, !dbg !49 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57 | |
%17 = zext i1 %16 to i8, !dbg !57 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57 | |
br i1 %16, label %L38, label %L42, !dbg !61 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61 | |
call void @llvm.trap(), !dbg !61 | |
unreachable, !dbg !61 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73 | |
%20 = icmp slt i64 %19, 1, !dbg !68 | |
br i1 %20, label %L80, label %L45, !dbg !72 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73 | |
%24 = shl i64 %23, 3, !dbg !79 | |
%25 = and i64 %23, 1152921504606846976, !dbg !83 | |
%26 = icmp eq i64 %25, 0, !dbg !83 | |
br i1 %26, label %L70, label %L62, !dbg !85 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85 | |
call void @llvm.trap(), !dbg !85 | |
unreachable, !dbg !85 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96 | |
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96 | |
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102 | |
%37 = ptrtoint i8* %36 to i64, !dbg !102 | |
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105 | |
br label %L80, !dbg !106 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112 | |
%43 = icmp slt i64 %42, 1, !dbg !114 | |
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)* | |
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4 | |
br label %L88, !dbg !116 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ] | |
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ] | |
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117 | |
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17 | |
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121 | |
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121 | |
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124 | |
%exitcond = icmp eq i64 %49, %42, !dbg !127 | |
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !116, !llvm.loop !128 | |
L97.loopexit: ; preds = %L88 | |
%.lcssa = phi double [ %48, %L88 ] | |
br label %L97, !dbg !129 | |
L97: ; preds = %L97.loopexit, %L80 | |
%value_phi4 = phi double [ %6, %L80 ], [ %.lcssa, %L97.loopexit ] | |
ret double %value_phi4, !dbg !129 | |
} | |
*** IR Dump After Combine redundant instructions *** | |
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9 | |
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4 | |
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34 | |
%11 = icmp sgt i64 %10, 1, !dbg !36 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !42 | |
%13 = add nsw i64 %12, -2, !dbg !49 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57 | |
%17 = zext i1 %16 to i8, !dbg !57 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57 | |
br i1 %16, label %L38, label %L42, !dbg !61 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023289680 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61 | |
call void @llvm.trap(), !dbg !61 | |
unreachable, !dbg !61 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139695308387408 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695115430016 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73 | |
%20 = icmp slt i64 %19, 1, !dbg !68 | |
br i1 %20, label %L80, label %L45, !dbg !72 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73 | |
%24 = shl i64 %23, 3, !dbg !79 | |
%25 = and i64 %23, 1152921504606846976, !dbg !83 | |
%26 = icmp eq i64 %25, 0, !dbg !83 | |
br i1 %26, label %L70, label %L62, !dbg !85 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102791248 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695102792000 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695023292672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139695025850352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85 | |
call void @llvm.trap(), !dbg !85 | |
unreachable, !dbg !85 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96 | |
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96 | |
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102 | |
%37 = ptrtoint i8* %36 to i64, !dbg !102 | |
%38 = call i64 inttoptr (i64 139695299005952 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105 | |
br label %L80, !dbg !106 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112 | |
%43 = icmp slt i64 %42, 1, !dbg !114 | |
br i1 %43, label %L97, label %L88.lr.ph, !dbg !115 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)* | |
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4 | |
br label %L88, !dbg !116 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ] | |
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ] | |
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117 | |
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17 | |
%.inv = fcmp fast olt double %value_phi16, %47, !dbg !121 | |
%48 = select i1 %.inv, double %value_phi16, double %47, !dbg !121 | |
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !124 | |
%exitcond = icmp eq i64 %49, %42, !dbg !127 | |
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !116, !llvm.loop !128 | |
L97.loopexit: ; preds = %L88 | |
br label %L97, !dbg !129 | |
L97: ; preds = %L97.loopexit, %L80 | |
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L97.loopexit ] | |
ret double %value_phi4, !dbg !129 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*** IR Dump After Demanded bits analysis *** | |
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7 | |
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4 | |
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33 | |
%11 = icmp sgt i64 %10, 1, !dbg !35 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !41 | |
%13 = add nsw i64 %12, -2, !dbg !48 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56 | |
%17 = zext i1 %16 to i8, !dbg !56 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56 | |
br i1 %16, label %L38, label %L42, !dbg !60 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347041104 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60 | |
call void @llvm.trap(), !dbg !60 | |
unreachable, !dbg !60 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139756632138832 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426689936 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72 | |
%20 = icmp slt i64 %19, 1, !dbg !67 | |
br i1 %20, label %L80, label %L45, !dbg !71 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72 | |
%24 = shl i64 %23, 3, !dbg !78 | |
%25 = and i64 %23, 1152921504606846976, !dbg !82 | |
%26 = icmp eq i64 %25, 0, !dbg !82 | |
br i1 %26, label %L70, label %L62, !dbg !84 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426542672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426543424 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347044096 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756349605872 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84 | |
call void @llvm.trap(), !dbg !84 | |
unreachable, !dbg !84 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95 | |
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95 | |
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101 | |
%37 = ptrtoint i8* %36 to i64, !dbg !101 | |
%38 = call i64 inttoptr (i64 139756622757376 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104 | |
br label %L80, !dbg !105 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111 | |
%43 = icmp slt i64 %42, 1, !dbg !113 | |
br i1 %43, label %L97, label %L88.lr.ph, !dbg !114 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)* | |
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4 | |
br label %L88, !dbg !115 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %50, %L88 ] | |
%value_phi16 = phi i64 [ %6, %L88.lr.ph ], [ %49, %L88 ] | |
%46 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !116 | |
%47 = load i64, i64 addrspace(13)* %46, align 8, !dbg !116, !tbaa !15 | |
%48 = icmp slt i64 %value_phi16, %47, !dbg !120 | |
%49 = select i1 %48, i64 %value_phi16, i64 %47, !dbg !124 | |
%50 = add nuw nsw i64 %value_phi317, 1, !dbg !127 | |
%exitcond = icmp eq i64 %50, %42, !dbg !130 | |
br i1 %exitcond, label %L97, label %L88, !dbg !115, !llvm.loop !131 | |
L97: ; preds = %L88, %L80 | |
%value_phi4 = phi i64 [ %6, %L80 ], [ %49, %L88 ] | |
ret i64 %value_phi4, !dbg !132 | |
} | |
*** IR Dump After Loop Vectorization *** | |
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7 | |
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4 | |
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33 | |
%11 = icmp sgt i64 %10, 1, !dbg !35 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !41 | |
%13 = add nsw i64 %12, -2, !dbg !48 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56 | |
%17 = zext i1 %16 to i8, !dbg !56 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56 | |
br i1 %16, label %L38, label %L42, !dbg !60 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347041104 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60 | |
call void @llvm.trap(), !dbg !60 | |
unreachable, !dbg !60 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139756632138832 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426689936 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72 | |
%20 = icmp slt i64 %19, 1, !dbg !67 | |
br i1 %20, label %L80, label %L45, !dbg !71 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72 | |
%24 = shl i64 %23, 3, !dbg !78 | |
%25 = and i64 %23, 1152921504606846976, !dbg !82 | |
%26 = icmp eq i64 %25, 0, !dbg !82 | |
br i1 %26, label %L70, label %L62, !dbg !84 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426542672 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756426543424 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756347044096 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139756349605872 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84 | |
call void @llvm.trap(), !dbg !84 | |
unreachable, !dbg !84 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95 | |
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95 | |
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101 | |
%37 = ptrtoint i8* %36 to i64, !dbg !101 | |
%38 = call i64 inttoptr (i64 139756622757376 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104 | |
br label %L80, !dbg !105 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111 | |
%43 = icmp slt i64 %42, 1, !dbg !113 | |
br i1 %43, label %L97, label %L88.lr.ph, !dbg !114 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)* | |
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4 | |
%min.iters.check = icmp ult i64 %42, 16, !dbg !115 | |
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !115 | |
vector.ph: ; preds = %L88.lr.ph | |
%n.mod.vf = urem i64 %42, 16, !dbg !115 | |
%n.vec = sub i64 %42, %n.mod.vf, !dbg !115 | |
%minmax.ident.splatinsert = insertelement <4 x i64> undef, i64 %6, i32 0, !dbg !115 | |
%minmax.ident.splat = shufflevector <4 x i64> %minmax.ident.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !115 | |
br label %vector.body, !dbg !115 | |
vector.body: ; preds = %vector.body, %vector.ph | |
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !116 | |
%vec.phi = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %67, %vector.body ] | |
%vec.phi21 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %68, %vector.body ] | |
%vec.phi22 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %69, %vector.body ] | |
%vec.phi23 = phi <4 x i64> [ %minmax.ident.splat, %vector.ph ], [ %70, %vector.body ] | |
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !116 | |
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !116 | |
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !116 | |
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !116 | |
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !116 | |
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !116 | |
%46 = add i64 %index, 0, !dbg !116 | |
%47 = add i64 %index, 4, !dbg !116 | |
%48 = add i64 %index, 8, !dbg !116 | |
%49 = add i64 %index, 12, !dbg !116 | |
%50 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %46, !dbg !119 | |
%51 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %47, !dbg !119 | |
%52 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %48, !dbg !119 | |
%53 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %49, !dbg !119 | |
%54 = getelementptr i64, i64 addrspace(13)* %50, i32 0, !dbg !119 | |
%55 = bitcast i64 addrspace(13)* %54 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load = load <4 x i64>, <4 x i64> addrspace(13)* %55, align 8, !dbg !119, !tbaa !15 | |
%56 = getelementptr i64, i64 addrspace(13)* %50, i32 4, !dbg !119 | |
%57 = bitcast i64 addrspace(13)* %56 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load24 = load <4 x i64>, <4 x i64> addrspace(13)* %57, align 8, !dbg !119, !tbaa !15 | |
%58 = getelementptr i64, i64 addrspace(13)* %50, i32 8, !dbg !119 | |
%59 = bitcast i64 addrspace(13)* %58 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load25 = load <4 x i64>, <4 x i64> addrspace(13)* %59, align 8, !dbg !119, !tbaa !15 | |
%60 = getelementptr i64, i64 addrspace(13)* %50, i32 12, !dbg !119 | |
%61 = bitcast i64 addrspace(13)* %60 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load26 = load <4 x i64>, <4 x i64> addrspace(13)* %61, align 8, !dbg !119, !tbaa !15 | |
%62 = icmp slt <4 x i64> %vec.phi, %wide.load, !dbg !123 | |
%63 = icmp slt <4 x i64> %vec.phi21, %wide.load24, !dbg !123 | |
%64 = icmp slt <4 x i64> %vec.phi22, %wide.load25, !dbg !123 | |
%65 = icmp slt <4 x i64> %vec.phi23, %wide.load26, !dbg !123 | |
%66 = extractelement <4 x i1> %62, i32 0, !dbg !127 | |
%67 = select <4 x i1> %62, <4 x i64> %vec.phi, <4 x i64> %wide.load, !dbg !127 | |
%68 = select <4 x i1> %63, <4 x i64> %vec.phi21, <4 x i64> %wide.load24, !dbg !127 | |
%69 = select <4 x i1> %64, <4 x i64> %vec.phi22, <4 x i64> %wide.load25, !dbg !127 | |
%70 = select <4 x i1> %65, <4 x i64> %vec.phi23, <4 x i64> %wide.load26, !dbg !127 | |
%index.next = add i64 %index, 16, !dbg !116 | |
%71 = icmp eq i64 %index.next, %n.vec, !dbg !116 | |
br i1 %71, label %middle.block, label %vector.body, !dbg !116, !llvm.loop !130 | |
middle.block: ; preds = %vector.body | |
%rdx.minmax.cmp = icmp slt <4 x i64> %67, %68, !dbg !127 | |
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i64> %67, <4 x i64> %68, !dbg !127 | |
%rdx.minmax.cmp27 = icmp slt <4 x i64> %rdx.minmax.select, %69, !dbg !127 | |
%rdx.minmax.select28 = select <4 x i1> %rdx.minmax.cmp27, <4 x i64> %rdx.minmax.select, <4 x i64> %69, !dbg !127 | |
%rdx.minmax.cmp29 = icmp slt <4 x i64> %rdx.minmax.select28, %70, !dbg !127 | |
%rdx.minmax.select30 = select <4 x i1> %rdx.minmax.cmp29, <4 x i64> %rdx.minmax.select28, <4 x i64> %70, !dbg !127 | |
%rdx.shuf = shufflevector <4 x i64> %rdx.minmax.select30, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !127 | |
%rdx.minmax.cmp31 = icmp slt <4 x i64> %rdx.minmax.select30, %rdx.shuf, !dbg !127 | |
%rdx.minmax.select32 = select <4 x i1> %rdx.minmax.cmp31, <4 x i64> %rdx.minmax.select30, <4 x i64> %rdx.shuf, !dbg !127 | |
%rdx.shuf33 = shufflevector <4 x i64> %rdx.minmax.select32, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !127 | |
%rdx.minmax.cmp34 = icmp slt <4 x i64> %rdx.minmax.select32, %rdx.shuf33, !dbg !127 | |
%rdx.minmax.select35 = select <4 x i1> %rdx.minmax.cmp34, <4 x i64> %rdx.minmax.select32, <4 x i64> %rdx.shuf33, !dbg !127 | |
%72 = extractelement <4 x i64> %rdx.minmax.select35, i32 0, !dbg !127 | |
%cmp.n = icmp eq i64 %42, %n.vec | |
br i1 %cmp.n, label %L97.loopexit, label %scalar.ph, !dbg !115 | |
scalar.ph: ; preds = %middle.block, %L88.lr.ph | |
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ] | |
%bc.merge.rdx = phi i64 [ %6, %L88.lr.ph ], [ %72, %middle.block ] | |
br label %L88, !dbg !115 | |
L88: ; preds = %L88, %scalar.ph | |
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %77, %L88 ] | |
%value_phi16 = phi i64 [ %bc.merge.rdx, %scalar.ph ], [ %76, %L88 ] | |
%73 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !119 | |
%74 = load i64, i64 addrspace(13)* %73, align 8, !dbg !119, !tbaa !15 | |
%75 = icmp slt i64 %value_phi16, %74, !dbg !123 | |
%76 = select i1 %75, i64 %value_phi16, i64 %74, !dbg !127 | |
%77 = add nuw nsw i64 %value_phi317, 1, !dbg !116 | |
%exitcond = icmp eq i64 %77, %42, !dbg !132 | |
br i1 %exitcond, label %L97.loopexit, label %L88, !dbg !115, !llvm.loop !133 | |
L97.loopexit: ; preds = %middle.block, %L88 | |
%.lcssa = phi i64 [ %76, %L88 ], [ %72, %middle.block ] | |
br label %L97, !dbg !135 | |
L97: ; preds = %L97.loopexit, %L80 | |
%value_phi4 = phi i64 [ %6, %L80 ], [ %.lcssa, %L97.loopexit ] | |
ret i64 %value_phi4, !dbg !135 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*** IR Dump After Demanded bits analysis *** | |
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9 | |
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4 | |
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34 | |
%11 = icmp sgt i64 %10, 1, !dbg !36 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !42 | |
%13 = add nsw i64 %12, -2, !dbg !49 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57 | |
%17 = zext i1 %16 to i8, !dbg !57 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57 | |
br i1 %16, label %L38, label %L42, !dbg !61 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062258000 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61 | |
call void @llvm.trap(), !dbg !61 | |
unreachable, !dbg !61 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139889347355728 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889154398336 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73 | |
%20 = icmp slt i64 %19, 1, !dbg !68 | |
br i1 %20, label %L80, label %L45, !dbg !72 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73 | |
%24 = shl i64 %23, 3, !dbg !79 | |
%25 = and i64 %23, 1152921504606846976, !dbg !83 | |
%26 = icmp eq i64 %25, 0, !dbg !83 | |
br i1 %26, label %L70, label %L62, !dbg !85 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141759568 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141760320 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062260992 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889064822768 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85 | |
call void @llvm.trap(), !dbg !85 | |
unreachable, !dbg !85 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96 | |
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96 | |
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102 | |
%37 = ptrtoint i8* %36 to i64, !dbg !102 | |
%38 = call i64 inttoptr (i64 139889337974272 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105 | |
br label %L80, !dbg !106 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112 | |
%43 = icmp slt i64 %42, 1, !dbg !114 | |
br i1 %43, label %L96, label %L88.lr.ph, !dbg !115 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)* | |
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4 | |
br label %L88, !dbg !116 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ] | |
%value_phi16 = phi double [ %6, %L88.lr.ph ], [ %48, %L88 ] | |
%46 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !117 | |
%47 = load double, double addrspace(13)* %46, align 8, !dbg !117, !tbaa !17 | |
%48 = fadd fast double %47, %value_phi16, !dbg !121 | |
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !127 | |
%exitcond = icmp eq i64 %49, %42, !dbg !130 | |
br i1 %exitcond, label %L96, label %L88, !dbg !116, !llvm.loop !131 | |
L96: ; preds = %L88, %L80 | |
%value_phi4 = phi double [ %6, %L80 ], [ %48, %L88 ] | |
ret double %value_phi4, !dbg !132 | |
} | |
*** IR Dump After Loop Vectorization *** | |
define double @julia_myfun_15949(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !7 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !9 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to double addrspace(13)* addrspace(11)*, !dbg !9 | |
%5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %4, align 8, !dbg !9, !tbaa !13, !nonnull !4 | |
%6 = load double, double addrspace(13)* %5, align 8, !dbg !9, !tbaa !17 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !20 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !20 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !20 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !20, !tbaa !34 | |
%11 = icmp sgt i64 %10, 1, !dbg !36 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !42 | |
%13 = add nsw i64 %12, -2, !dbg !49 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !57 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !57 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !57 | |
%17 = zext i1 %16 to i8, !dbg !57 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !57 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !57 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !57 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !57 | |
br i1 %16, label %L38, label %L42, !dbg !61 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062258000 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !61 | |
call void @llvm.trap(), !dbg !61 | |
unreachable, !dbg !61 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139889347355728 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889154398336 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !62 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !68, !tbaa !73 | |
%20 = icmp slt i64 %19, 1, !dbg !68 | |
br i1 %20, label %L80, label %L45, !dbg !72 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !75 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !78 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !79, !tbaa !73 | |
%24 = shl i64 %23, 3, !dbg !79 | |
%25 = and i64 %23, 1152921504606846976, !dbg !83 | |
%26 = icmp eq i64 %25, 0, !dbg !83 | |
br i1 %26, label %L70, label %L62, !dbg !85 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !85 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141759568 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889141760320 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889062260992 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139889064822768 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !85 | |
call void @llvm.trap(), !dbg !85 | |
unreachable, !dbg !85 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !96 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #7, !dbg !96 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !96 | |
%32 = load i64, i64* %31, align 8, !dbg !96, !tbaa !13, !range !101 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #7, !dbg !96 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !96 | |
%35 = load i8*, i8** %34, align 8, !dbg !96, !tbaa !13, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !102 | |
%37 = ptrtoint i8* %36 to i64, !dbg !102 | |
%38 = call i64 inttoptr (i64 139889337974272 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !81 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !104 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !105 | |
br label %L80, !dbg !106 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !107 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !107 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !107 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !107, !tbaa !112 | |
%43 = icmp slt i64 %42, 1, !dbg !114 | |
br i1 %43, label %L96, label %L88.lr.ph, !dbg !115 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to double addrspace(13)* addrspace(11)* | |
%45 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %44, align 8, !tbaa !13, !nonnull !4 | |
%min.iters.check = icmp ult i64 %42, 16, !dbg !116 | |
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !116 | |
vector.ph: ; preds = %L88.lr.ph | |
%n.mod.vf = urem i64 %42, 16, !dbg !116 | |
%n.vec = sub i64 %42, %n.mod.vf, !dbg !116 | |
%46 = insertelement <4 x double> zeroinitializer, double %6, i32 0, !dbg !116 | |
br label %vector.body, !dbg !116 | |
vector.body: ; preds = %vector.body, %vector.ph | |
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !117 | |
%vec.phi = phi <4 x double> [ %46, %vector.ph ], [ %63, %vector.body ] | |
%vec.phi21 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %64, %vector.body ] | |
%vec.phi22 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %65, %vector.body ] | |
%vec.phi23 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %66, %vector.body ] | |
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !117 | |
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !117 | |
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !117 | |
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !117 | |
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !117 | |
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !117 | |
%47 = add i64 %index, 0, !dbg !117 | |
%48 = add i64 %index, 4, !dbg !117 | |
%49 = add i64 %index, 8, !dbg !117 | |
%50 = add i64 %index, 12, !dbg !117 | |
%51 = getelementptr inbounds double, double addrspace(13)* %45, i64 %47, !dbg !120 | |
%52 = getelementptr inbounds double, double addrspace(13)* %45, i64 %48, !dbg !120 | |
%53 = getelementptr inbounds double, double addrspace(13)* %45, i64 %49, !dbg !120 | |
%54 = getelementptr inbounds double, double addrspace(13)* %45, i64 %50, !dbg !120 | |
%55 = getelementptr double, double addrspace(13)* %51, i32 0, !dbg !120 | |
%56 = bitcast double addrspace(13)* %55 to <4 x double> addrspace(13)*, !dbg !120 | |
%wide.load = load <4 x double>, <4 x double> addrspace(13)* %56, align 8, !dbg !120, !tbaa !17 | |
%57 = getelementptr double, double addrspace(13)* %51, i32 4, !dbg !120 | |
%58 = bitcast double addrspace(13)* %57 to <4 x double> addrspace(13)*, !dbg !120 | |
%wide.load24 = load <4 x double>, <4 x double> addrspace(13)* %58, align 8, !dbg !120, !tbaa !17 | |
%59 = getelementptr double, double addrspace(13)* %51, i32 8, !dbg !120 | |
%60 = bitcast double addrspace(13)* %59 to <4 x double> addrspace(13)*, !dbg !120 | |
%wide.load25 = load <4 x double>, <4 x double> addrspace(13)* %60, align 8, !dbg !120, !tbaa !17 | |
%61 = getelementptr double, double addrspace(13)* %51, i32 12, !dbg !120 | |
%62 = bitcast double addrspace(13)* %61 to <4 x double> addrspace(13)*, !dbg !120 | |
%wide.load26 = load <4 x double>, <4 x double> addrspace(13)* %62, align 8, !dbg !120, !tbaa !17 | |
%63 = fadd fast <4 x double> %wide.load, %vec.phi, !dbg !124 | |
%64 = fadd fast <4 x double> %wide.load24, %vec.phi21, !dbg !124 | |
%65 = fadd fast <4 x double> %wide.load25, %vec.phi22, !dbg !124 | |
%66 = fadd fast <4 x double> %wide.load26, %vec.phi23, !dbg !124 | |
%index.next = add i64 %index, 16, !dbg !117 | |
%67 = icmp eq i64 %index.next, %n.vec, !dbg !117 | |
br i1 %67, label %middle.block, label %vector.body, !dbg !117, !llvm.loop !130 | |
middle.block: ; preds = %vector.body | |
%bin.rdx = fadd fast <4 x double> %64, %63, !dbg !124 | |
%bin.rdx27 = fadd fast <4 x double> %65, %bin.rdx, !dbg !124 | |
%bin.rdx28 = fadd fast <4 x double> %66, %bin.rdx27, !dbg !124 | |
%rdx.shuf = shufflevector <4 x double> %bin.rdx28, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !124 | |
%bin.rdx29 = fadd fast <4 x double> %bin.rdx28, %rdx.shuf, !dbg !124 | |
%rdx.shuf30 = shufflevector <4 x double> %bin.rdx29, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !124 | |
%bin.rdx31 = fadd fast <4 x double> %bin.rdx29, %rdx.shuf30, !dbg !124 | |
%68 = extractelement <4 x double> %bin.rdx31, i32 0, !dbg !124 | |
%cmp.n = icmp eq i64 %42, %n.vec | |
br i1 %cmp.n, label %L96.loopexit, label %scalar.ph, !dbg !116 | |
scalar.ph: ; preds = %middle.block, %L88.lr.ph | |
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ] | |
%bc.merge.rdx = phi double [ %6, %L88.lr.ph ], [ %68, %middle.block ] | |
br label %L88, !dbg !116 | |
L88: ; preds = %L88, %scalar.ph | |
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %72, %L88 ] | |
%value_phi16 = phi double [ %bc.merge.rdx, %scalar.ph ], [ %71, %L88 ] | |
%69 = getelementptr inbounds double, double addrspace(13)* %45, i64 %value_phi317, !dbg !120 | |
%70 = load double, double addrspace(13)* %69, align 8, !dbg !120, !tbaa !17 | |
%71 = fadd fast double %70, %value_phi16, !dbg !124 | |
%72 = add nuw nsw i64 %value_phi317, 1, !dbg !117 | |
%exitcond = icmp eq i64 %72, %42, !dbg !132 | |
br i1 %exitcond, label %L96.loopexit, label %L88, !dbg !116, !llvm.loop !133 | |
L96.loopexit: ; preds = %middle.block, %L88 | |
%.lcssa = phi double [ %71, %L88 ], [ %68, %middle.block ] | |
br label %L96, !dbg !135 | |
L96: ; preds = %L96.loopexit, %L80 | |
%value_phi4 = phi double [ %6, %L80 ], [ %.lcssa, %L96.loopexit ] | |
ret double %value_phi4, !dbg !135 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*** IR Dump After Demanded bits analysis *** | |
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7 | |
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4 | |
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33 | |
%11 = icmp sgt i64 %10, 1, !dbg !35 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !41 | |
%13 = add nsw i64 %12, -2, !dbg !48 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56 | |
%17 = zext i1 %16 to i8, !dbg !56 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56 | |
br i1 %16, label %L38, label %L42, !dbg !60 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120053584 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60 | |
call void @llvm.trap(), !dbg !60 | |
unreachable, !dbg !60 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139991405151312 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199702416 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72 | |
%20 = icmp slt i64 %19, 1, !dbg !67 | |
br i1 %20, label %L80, label %L45, !dbg !71 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72 | |
%24 = shl i64 %23, 3, !dbg !78 | |
%25 = and i64 %23, 1152921504606846976, !dbg !82 | |
%26 = icmp eq i64 %25, 0, !dbg !82 | |
br i1 %26, label %L70, label %L62, !dbg !84 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555152 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555904 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120056576 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991122626544 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84 | |
call void @llvm.trap(), !dbg !84 | |
unreachable, !dbg !84 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95 | |
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95 | |
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101 | |
%37 = ptrtoint i8* %36 to i64, !dbg !101 | |
%38 = call i64 inttoptr (i64 139991395769856 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104 | |
br label %L80, !dbg !105 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111 | |
%43 = icmp slt i64 %42, 1, !dbg !113 | |
br i1 %43, label %L96, label %L88.lr.ph, !dbg !114 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)* | |
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4 | |
br label %L88, !dbg !115 | |
L88: ; preds = %L88, %L88.lr.ph | |
%value_phi317 = phi i64 [ 0, %L88.lr.ph ], [ %49, %L88 ] | |
%value_phi16 = phi i64 [ %6, %L88.lr.ph ], [ %48, %L88 ] | |
%46 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !116 | |
%47 = load i64, i64 addrspace(13)* %46, align 8, !dbg !116, !tbaa !15 | |
%48 = add i64 %47, %value_phi16, !dbg !120 | |
%49 = add nuw nsw i64 %value_phi317, 1, !dbg !128 | |
%exitcond = icmp eq i64 %49, %42, !dbg !130 | |
br i1 %exitcond, label %L96, label %L88, !dbg !115, !llvm.loop !131 | |
L96: ; preds = %L88, %L80 | |
%value_phi4 = phi i64 [ %6, %L80 ], [ %48, %L88 ] | |
ret i64 %value_phi4, !dbg !132 | |
} | |
*** IR Dump After Loop Vectorization *** | |
define i64 @julia_myfun_15947(%jl_value_t addrspace(10)* nonnull align 16 dereferenceable(40)) !dbg !5 { | |
top: | |
%1 = alloca { i64, i8 }, align 8 | |
%2 = call %jl_value_t*** @julia.ptls_states() | |
%3 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !7 | |
%4 = bitcast %jl_value_t addrspace(11)* %3 to i64 addrspace(13)* addrspace(11)*, !dbg !7 | |
%5 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %4, align 8, !dbg !7, !tbaa !11, !nonnull !4 | |
%6 = load i64, i64 addrspace(13)* %5, align 8, !dbg !7, !tbaa !15 | |
%7 = bitcast %jl_value_t addrspace(11)* %3 to %jl_value_t addrspace(10)* addrspace(11)*, !dbg !18 | |
%8 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %7, i64 3, !dbg !18 | |
%9 = bitcast %jl_value_t addrspace(10)* addrspace(11)* %8 to i64 addrspace(11)*, !dbg !18 | |
%10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !18, !tbaa !33 | |
%11 = icmp sgt i64 %10, 1, !dbg !35 | |
%12 = select i1 %11, i64 %10, i64 1, !dbg !41 | |
%13 = add nsw i64 %12, -2, !dbg !48 | |
%14 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %13, i64 1), !dbg !56 | |
%15 = extractvalue { i64, i1 } %14, 0, !dbg !56 | |
%16 = extractvalue { i64, i1 } %14, 1, !dbg !56 | |
%17 = zext i1 %16 to i8, !dbg !56 | |
%.fca.0.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 0, !dbg !56 | |
store i64 %15, i64* %.fca.0.gep, align 8, !dbg !56 | |
%.fca.1.gep = getelementptr inbounds { i64, i8 }, { i64, i8 }* %1, i64 0, i32 1, !dbg !56 | |
store i8 %17, i8* %.fca.1.gep, align 8, !dbg !56 | |
br i1 %16, label %L38, label %L42, !dbg !60 | |
L38: ; preds = %top | |
call void @julia_throw_overflowerr_binaryop_12945(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120053584 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %13, i64 1), !dbg !60 | |
call void @llvm.trap(), !dbg !60 | |
unreachable, !dbg !60 | |
L42: ; preds = %top | |
%18 = call %jl_value_t addrspace(10)* inttoptr (i64 139991405151312 to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, i64)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199702416 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %15) [ "jl_roots"(i64* %.fca.0.gep) ], !dbg !61 | |
%19 = load i64, i64* %.fca.0.gep, align 8, !dbg !67, !tbaa !72 | |
%20 = icmp slt i64 %19, 1, !dbg !67 | |
br i1 %20, label %L80, label %L45, !dbg !71 | |
L45: ; preds = %L42 | |
%21 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* %18), !dbg !74 | |
%22 = call token (...) @llvm.julia.gc_preserve_begin(%jl_value_t addrspace(10)* nonnull %0), !dbg !77 | |
%23 = load i64, i64* %.fca.0.gep, align 8, !dbg !78, !tbaa !72 | |
%24 = shl i64 %23, 3, !dbg !78 | |
%25 = and i64 %23, 1152921504606846976, !dbg !82 | |
%26 = icmp eq i64 %25, 0, !dbg !82 | |
br i1 %26, label %L70, label %L62, !dbg !84 | |
L62: ; preds = %L45 | |
%27 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %24), !dbg !84 | |
%28 = call cc37 nonnull %jl_value_t addrspace(10)* bitcast (%jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)* @jl_invoke to %jl_value_t addrspace(10)* (%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)*)(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555152 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991199555904 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991120056576 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139991122626544 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* %27), !dbg !84 | |
call void @llvm.trap(), !dbg !84 | |
unreachable, !dbg !84 | |
L70: ; preds = %L45 | |
%29 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !95 | |
%30 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %29) #5, !dbg !95 | |
%31 = bitcast %jl_value_t* %30 to i64*, !dbg !95 | |
%32 = load i64, i64* %31, align 8, !dbg !95, !tbaa !11, !range !100 | |
%33 = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %3) #5, !dbg !95 | |
%34 = bitcast %jl_value_t* %33 to i8**, !dbg !95 | |
%35 = load i8*, i8** %34, align 8, !dbg !95, !tbaa !11, !nonnull !4 | |
%36 = getelementptr i8, i8* %35, i64 8, !dbg !101 | |
%37 = ptrtoint i8* %36 to i64, !dbg !101 | |
%38 = call i64 inttoptr (i64 139991395769856 to i64 (i64, i64, i64)*)(i64 %32, i64 %37, i64 %24) [ "jl_roots"(i64 %24, i64 %37, i64 %32) ], !dbg !80 | |
call void @llvm.julia.gc_preserve_end(token %22), !dbg !103 | |
call void @llvm.julia.gc_preserve_end(token %21), !dbg !104 | |
br label %L80, !dbg !105 | |
L80: ; preds = %L70, %L42 | |
%39 = addrspacecast %jl_value_t addrspace(10)* %18 to %jl_value_t addrspace(11)*, !dbg !106 | |
%40 = bitcast %jl_value_t addrspace(11)* %39 to %jl_array_t addrspace(11)*, !dbg !106 | |
%41 = getelementptr inbounds %jl_array_t, %jl_array_t addrspace(11)* %40, i64 0, i32 1, !dbg !106 | |
%42 = load i64, i64 addrspace(11)* %41, align 8, !dbg !106, !tbaa !111 | |
%43 = icmp slt i64 %42, 1, !dbg !113 | |
br i1 %43, label %L96, label %L88.lr.ph, !dbg !114 | |
L88.lr.ph: ; preds = %L80 | |
%44 = bitcast %jl_value_t addrspace(11)* %39 to i64 addrspace(13)* addrspace(11)* | |
%45 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* %44, align 8, !tbaa !11, !nonnull !4 | |
%min.iters.check = icmp ult i64 %42, 16, !dbg !115 | |
br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !115 | |
vector.ph: ; preds = %L88.lr.ph | |
%n.mod.vf = urem i64 %42, 16, !dbg !115 | |
%n.vec = sub i64 %42, %n.mod.vf, !dbg !115 | |
%46 = insertelement <4 x i64> zeroinitializer, i64 %6, i32 0, !dbg !115 | |
br label %vector.body, !dbg !115 | |
vector.body: ; preds = %vector.body, %vector.ph | |
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !116 | |
%vec.phi = phi <4 x i64> [ %46, %vector.ph ], [ %63, %vector.body ] | |
%vec.phi21 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %64, %vector.body ] | |
%vec.phi22 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %65, %vector.body ] | |
%vec.phi23 = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ %66, %vector.body ] | |
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0, !dbg !116 | |
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !116 | |
%induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>, !dbg !116 | |
%induction18 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>, !dbg !116 | |
%induction19 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>, !dbg !116 | |
%induction20 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>, !dbg !116 | |
%47 = add i64 %index, 0, !dbg !116 | |
%48 = add i64 %index, 4, !dbg !116 | |
%49 = add i64 %index, 8, !dbg !116 | |
%50 = add i64 %index, 12, !dbg !116 | |
%51 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %47, !dbg !119 | |
%52 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %48, !dbg !119 | |
%53 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %49, !dbg !119 | |
%54 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %50, !dbg !119 | |
%55 = getelementptr i64, i64 addrspace(13)* %51, i32 0, !dbg !119 | |
%56 = bitcast i64 addrspace(13)* %55 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load = load <4 x i64>, <4 x i64> addrspace(13)* %56, align 8, !dbg !119, !tbaa !15 | |
%57 = getelementptr i64, i64 addrspace(13)* %51, i32 4, !dbg !119 | |
%58 = bitcast i64 addrspace(13)* %57 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load24 = load <4 x i64>, <4 x i64> addrspace(13)* %58, align 8, !dbg !119, !tbaa !15 | |
%59 = getelementptr i64, i64 addrspace(13)* %51, i32 8, !dbg !119 | |
%60 = bitcast i64 addrspace(13)* %59 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load25 = load <4 x i64>, <4 x i64> addrspace(13)* %60, align 8, !dbg !119, !tbaa !15 | |
%61 = getelementptr i64, i64 addrspace(13)* %51, i32 12, !dbg !119 | |
%62 = bitcast i64 addrspace(13)* %61 to <4 x i64> addrspace(13)*, !dbg !119 | |
%wide.load26 = load <4 x i64>, <4 x i64> addrspace(13)* %62, align 8, !dbg !119, !tbaa !15 | |
%63 = add <4 x i64> %wide.load, %vec.phi, !dbg !123 | |
%64 = add <4 x i64> %wide.load24, %vec.phi21, !dbg !123 | |
%65 = add <4 x i64> %wide.load25, %vec.phi22, !dbg !123 | |
%66 = add <4 x i64> %wide.load26, %vec.phi23, !dbg !123 | |
%index.next = add i64 %index, 16, !dbg !116 | |
%67 = icmp eq i64 %index.next, %n.vec, !dbg !116 | |
br i1 %67, label %middle.block, label %vector.body, !dbg !116, !llvm.loop !130 | |
middle.block: ; preds = %vector.body | |
%bin.rdx = add <4 x i64> %64, %63, !dbg !123 | |
%bin.rdx27 = add <4 x i64> %65, %bin.rdx, !dbg !123 | |
%bin.rdx28 = add <4 x i64> %66, %bin.rdx27, !dbg !123 | |
%rdx.shuf = shufflevector <4 x i64> %bin.rdx28, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>, !dbg !123 | |
%bin.rdx29 = add <4 x i64> %bin.rdx28, %rdx.shuf, !dbg !123 | |
%rdx.shuf30 = shufflevector <4 x i64> %bin.rdx29, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, !dbg !123 | |
%bin.rdx31 = add <4 x i64> %bin.rdx29, %rdx.shuf30, !dbg !123 | |
%68 = extractelement <4 x i64> %bin.rdx31, i32 0, !dbg !123 | |
%cmp.n = icmp eq i64 %42, %n.vec | |
br i1 %cmp.n, label %L96.loopexit, label %scalar.ph, !dbg !115 | |
scalar.ph: ; preds = %middle.block, %L88.lr.ph | |
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L88.lr.ph ] | |
%bc.merge.rdx = phi i64 [ %6, %L88.lr.ph ], [ %68, %middle.block ] | |
br label %L88, !dbg !115 | |
L88: ; preds = %L88, %scalar.ph | |
%value_phi317 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %72, %L88 ] | |
%value_phi16 = phi i64 [ %bc.merge.rdx, %scalar.ph ], [ %71, %L88 ] | |
%69 = getelementptr inbounds i64, i64 addrspace(13)* %45, i64 %value_phi317, !dbg !119 | |
%70 = load i64, i64 addrspace(13)* %69, align 8, !dbg !119, !tbaa !15 | |
%71 = add i64 %70, %value_phi16, !dbg !123 | |
%72 = add nuw nsw i64 %value_phi317, 1, !dbg !116 | |
%exitcond = icmp eq i64 %72, %42, !dbg !132 | |
br i1 %exitcond, label %L96.loopexit, label %L88, !dbg !115, !llvm.loop !133 | |
L96.loopexit: ; preds = %middle.block, %L88 | |
%.lcssa = phi i64 [ %71, %L88 ], [ %68, %middle.block ] | |
br label %L96, !dbg !135 | |
L96: ; preds = %L96.loopexit, %L80 | |
%value_phi4 = phi i64 [ %6, %L80 ], [ %.lcssa, %L96.loopexit ] | |
ret i64 %value_phi4, !dbg !135 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment