Skip to content

Instantly share code, notes, and snippets.

@eddyb
Created August 26, 2016 09:35
; ModuleID = 'test.cgu-0.rs'
source_filename = "test.cgu-0.rs"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind
define void @_ZN4test6kernel17hb853c571274f3b08E([4 x float]* noalias nocapture readonly dereferenceable(16), [4 x float]* noalias nocapture readonly dereferenceable(16), [4 x float]* nocapture dereferenceable(16)) unnamed_addr #0 {
entry-block:
%ab = alloca [2 x [2 x float]], align 4
%3 = bitcast [2 x [2 x float]]* %ab to i8*
call void @llvm.lifetime.start(i64 16, i8* %3)
%4 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %ab, i64 0, i64 0
br i1 true, label %slice_loop_next3.unr-lcssa, label %entry-block.new
entry-block.new: ; preds = %entry-block
br label %slice_loop_body2
slice_loop_body2: ; preds = %slice_loop_body2, %entry-block.new
br i1 true, label %slice_loop_next3.unr-lcssa.loopexit, label %slice_loop_body2
slice_loop_next3.unr-lcssa.loopexit: ; preds = %slice_loop_body2
br label %slice_loop_next3.unr-lcssa
slice_loop_next3.unr-lcssa: ; preds = %slice_loop_next3.unr-lcssa.loopexit, %entry-block
br i1 true, label %slice_loop_body2.epil.preheader, label %slice_loop_next3
slice_loop_body2.epil.preheader: ; preds = %slice_loop_next3.unr-lcssa
br label %slice_loop_body2.epil
slice_loop_body2.epil: ; preds = %slice_loop_body2.epil, %slice_loop_body2.epil.preheader
%5 = phi [2 x float]* [ %4, %slice_loop_body2.epil.preheader ], [ %8, %slice_loop_body2.epil ]
%epil.iter = phi i64 [ 2, %slice_loop_body2.epil.preheader ], [ %epil.iter.sub, %slice_loop_body2.epil ]
%6 = bitcast [2 x float]* %5 to i32*
store i32 0, i32* %6, align 4
%.fca.1.insert.fca.1.gep.epil = getelementptr inbounds [2 x float], [2 x float]* %5, i64 0, i64 1
%7 = bitcast float* %.fca.1.insert.fca.1.gep.epil to i32*
store i32 0, i32* %7, align 4
%8 = getelementptr inbounds [2 x float], [2 x float]* %5, i64 1
%epil.iter.sub = add i64 %epil.iter, -1
%epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
br i1 %epil.iter.cmp, label %slice_loop_next3.epilog-lcssa, label %slice_loop_body2.epil, !llvm.loop !0
slice_loop_next3.epilog-lcssa: ; preds = %slice_loop_body2.epil
br label %slice_loop_next3
slice_loop_next3: ; preds = %slice_loop_next3.unr-lcssa, %slice_loop_next3.epilog-lcssa
%9 = getelementptr inbounds [4 x float], [4 x float]* %0, i64 0, i64 0
%10 = load float, float* %9, align 4
%11 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0
%12 = load float, float* %11, align 4
%13 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %ab, i64 0, i64 0, i64 0
%14 = load float, float* %13, align 4
%15 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1
%16 = load float, float* %15, align 4
%17 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %ab, i64 0, i64 0, i64 1
%18 = load float, float* %17, align 4
%19 = getelementptr inbounds [4 x float], [4 x float]* %0, i64 0, i64 1
%20 = load float, float* %19, align 4
%21 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %ab, i64 0, i64 1, i64 0
%22 = load float, float* %21, align 4
%23 = insertelement <4 x float> undef, float %10, i32 0
%24 = insertelement <4 x float> %23, float %20, i32 1
%25 = insertelement <4 x float> %24, float %10, i32 2
%26 = insertelement <4 x float> %25, float %20, i32 3
%27 = insertelement <4 x float> undef, float %12, i32 0
%28 = insertelement <4 x float> %27, float %12, i32 1
%29 = insertelement <4 x float> %28, float %16, i32 2
%30 = insertelement <4 x float> %29, float %16, i32 3
%31 = fmul <4 x float> %26, %30
%32 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %ab, i64 0, i64 1, i64 1
%33 = load float, float* %32, align 4
%34 = insertelement <4 x float> undef, float %14, i32 0
%35 = insertelement <4 x float> %34, float %22, i32 1
%36 = insertelement <4 x float> %35, float %18, i32 2
%37 = insertelement <4 x float> %36, float %33, i32 3
%38 = fadd <4 x float> %37, %31
%39 = getelementptr inbounds [4 x float], [4 x float]* %0, i64 0, i64 2
%40 = load float, float* %39, align 4
%41 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2
%42 = load float, float* %41, align 4
%43 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3
%44 = load float, float* %43, align 4
%45 = getelementptr inbounds [4 x float], [4 x float]* %0, i64 0, i64 3
%46 = load float, float* %45, align 4
%47 = insertelement <4 x float> undef, float %40, i32 0
%48 = insertelement <4 x float> %47, float %46, i32 1
%49 = insertelement <4 x float> %48, float %40, i32 2
%50 = insertelement <4 x float> %49, float %46, i32 3
%51 = insertelement <4 x float> undef, float %42, i32 0
%52 = insertelement <4 x float> %51, float %42, i32 1
%53 = insertelement <4 x float> %52, float %44, i32 2
%54 = insertelement <4 x float> %53, float %44, i32 3
%55 = fmul <4 x float> %50, %54
%56 = fadd <4 x float> %38, %55
%57 = bitcast [4 x float]* %2 to <4 x float>*
store <4 x float> %56, <4 x float>* %57, align 4
call void @llvm.lifetime.end(i64 16, i8* %3)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
attributes #0 = { noinline nounwind }
attributes #1 = { argmemonly nounwind }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment