Created
September 16, 2022 14:40
-
-
Save arpit15/117ba1b0a1ba350d9d417928c67095ab to your computer and use it in GitHub Desktop.
mitsuba3 python path integrator dump
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python customplugins/path_integrator_refined.py 2>&1 crashdump.txt | |
Critical Dr.Jit compiler failure: jit_llvm_compile(): parsing failed. Please see the LLVM IR and error message below: | |
define void @drjit_32c09595272bc74a586d2fe8f7e4726e(i64 %start, i64 %end, i8** noalias %params) #0 { | |
entry: | |
%callables = load i8**, i8*** @callables | |
%buffer = alloca i8, i32 736, align 32 | |
br label %body | |
body: | |
%index = phi i64 [ %index_next, %suffix ], [ %start, %entry ] | |
%f1_p1 = getelementptr inbounds i8*, i8** %params, i32 3 | |
%f1_p2 = load i8*, i8** %f1_p1, align 8, !alias.scope !2 | |
%f1_p3 = bitcast i8* %f1_p2 to float* | |
%f1_0 = load float, float* %f1_p3, align 4, !alias.scope !2 | |
%f1_1 = insertelement <8 x float> undef, float %f1_0, i32 0 | |
%f1 = shufflevector <8 x float> %f1_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f2_p1 = getelementptr inbounds i8*, i8** %params, i32 4 | |
%f2_p2 = load i8*, i8** %f2_p1, align 8, !alias.scope !2 | |
%f2_p3 = bitcast i8* %f2_p2 to float* | |
%f2_0 = load float, float* %f2_p3, align 4, !alias.scope !2 | |
%f2_1 = insertelement <8 x float> undef, float %f2_0, i32 0 | |
%f2 = shufflevector <8 x float> %f2_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f3_p1 = getelementptr inbounds i8*, i8** %params, i32 5 | |
%f3_p2 = load i8*, i8** %f3_p1, align 8, !alias.scope !2 | |
%f3_p3 = bitcast i8* %f3_p2 to float* | |
%f3_0 = load float, float* %f3_p3, align 4, !alias.scope !2 | |
%f3_1 = insertelement <8 x float> undef, float %f3_0, i32 0 | |
%f3 = shufflevector <8 x float> %f3_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f4_p1 = getelementptr inbounds i8*, i8** %params, i32 6 | |
%f4_p2 = load i8*, i8** %f4_p1, align 8, !alias.scope !2 | |
%f4_p3 = bitcast i8* %f4_p2 to float* | |
%f4_0 = load float, float* %f4_p3, align 4, !alias.scope !2 | |
%f4_1 = insertelement <8 x float> undef, float %f4_0, i32 0 | |
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f5_p1 = getelementptr inbounds i8*, i8** %params, i32 7 | |
%f5_p2 = load i8*, i8** %f5_p1, align 8, !alias.scope !2 | |
%f5_p3 = bitcast i8* %f5_p2 to float* | |
%f5_0 = load float, float* %f5_p3, align 4, !alias.scope !2 | |
%f5_1 = insertelement <8 x float> undef, float %f5_0, i32 0 | |
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f6_p1 = getelementptr inbounds i8*, i8** %params, i32 8 | |
%f6_p2 = load i8*, i8** %f6_p1, align 8, !alias.scope !2 | |
%f6_p3 = bitcast i8* %f6_p2 to float* | |
%f6_0 = load float, float* %f6_p3, align 4, !alias.scope !2 | |
%f6_1 = insertelement <8 x float> undef, float %f6_0, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f7_p1 = getelementptr inbounds i8*, i8** %params, i32 9 | |
%f7_p2 = load i8*, i8** %f7_p1, align 8, !alias.scope !2 | |
%f7_p3 = bitcast i8* %f7_p2 to float* | |
%f7_0 = load float, float* %f7_p3, align 4, !alias.scope !2 | |
%f7_1 = insertelement <8 x float> undef, float %f7_0, i32 0 | |
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f8_p1 = getelementptr inbounds i8*, i8** %params, i32 10 | |
%f8_p2 = load i8*, i8** %f8_p1, align 8, !alias.scope !2 | |
%f8_p3 = bitcast i8* %f8_p2 to float* | |
%f8_0 = load float, float* %f8_p3, align 4, !alias.scope !2 | |
%f8_1 = insertelement <8 x float> undef, float %f8_0, i32 0 | |
%f8 = shufflevector <8 x float> %f8_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f9_p1 = getelementptr inbounds i8*, i8** %params, i32 11 | |
%f9_p2 = load i8*, i8** %f9_p1, align 8, !alias.scope !2 | |
%f9_p3 = bitcast i8* %f9_p2 to float* | |
%f9_0 = load float, float* %f9_p3, align 4, !alias.scope !2 | |
%f9_1 = insertelement <8 x float> undef, float %f9_0, i32 0 | |
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f10_p1 = getelementptr inbounds i8*, i8** %params, i32 12 | |
%f10_p2 = load i8*, i8** %f10_p1, align 8, !alias.scope !2 | |
%f10_p3 = bitcast i8* %f10_p2 to float* | |
%f10_0 = load float, float* %f10_p3, align 4, !alias.scope !2 | |
%f10_1 = insertelement <8 x float> undef, float %f10_0, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11_p1 = getelementptr inbounds i8*, i8** %params, i32 13 | |
%f11_p2 = load i8*, i8** %f11_p1, align 8, !alias.scope !2 | |
%f11_p3 = bitcast i8* %f11_p2 to float* | |
%f11_0 = load float, float* %f11_p3, align 4, !alias.scope !2 | |
%f11_1 = insertelement <8 x float> undef, float %f11_0, i32 0 | |
%f11 = shufflevector <8 x float> %f11_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f12_p1 = getelementptr inbounds i8*, i8** %params, i32 14 | |
%f12_p2 = load i8*, i8** %f12_p1, align 8, !alias.scope !2 | |
%f12_p3 = bitcast i8* %f12_p2 to float* | |
%f12_0 = load float, float* %f12_p3, align 4, !alias.scope !2 | |
%f12_1 = insertelement <8 x float> undef, float %f12_0, i32 0 | |
%f12 = shufflevector <8 x float> %f12_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f13_p1 = getelementptr inbounds i8*, i8** %params, i32 15 | |
%f13_p2 = load i8*, i8** %f13_p1, align 8, !alias.scope !2 | |
%f13_p3 = bitcast i8* %f13_p2 to float* | |
%f13_0 = load float, float* %f13_p3, align 4, !alias.scope !2 | |
%f13_1 = insertelement <8 x float> undef, float %f13_0, i32 0 | |
%f13 = shufflevector <8 x float> %f13_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f14_p1 = getelementptr inbounds i8*, i8** %params, i32 16 | |
%f14_p2 = load i8*, i8** %f14_p1, align 8, !alias.scope !2 | |
%f14_p3 = bitcast i8* %f14_p2 to float* | |
%f14_0 = load float, float* %f14_p3, align 4, !alias.scope !2 | |
%f14_1 = insertelement <8 x float> undef, float %f14_0, i32 0 | |
%f14 = shufflevector <8 x float> %f14_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f15_p1 = getelementptr inbounds i8*, i8** %params, i32 17 | |
%f15_p2 = load i8*, i8** %f15_p1, align 8, !alias.scope !2 | |
%f15_p3 = bitcast i8* %f15_p2 to float* | |
%f15_0 = load float, float* %f15_p3, align 4, !alias.scope !2 | |
%f15_1 = insertelement <8 x float> undef, float %f15_0, i32 0 | |
%f15 = shufflevector <8 x float> %f15_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f16_p1 = getelementptr inbounds i8*, i8** %params, i32 18 | |
%f16_p2 = load i8*, i8** %f16_p1, align 8, !alias.scope !2 | |
%f16_p3 = bitcast i8* %f16_p2 to float* | |
%f16_0 = load float, float* %f16_p3, align 4, !alias.scope !2 | |
%f16_1 = insertelement <8 x float> undef, float %f16_0, i32 0 | |
%f16 = shufflevector <8 x float> %f16_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f17_p1 = getelementptr inbounds i8*, i8** %params, i32 19 | |
%f17_p2 = load i8*, i8** %f17_p1, align 8, !alias.scope !2 | |
%f17_p3 = bitcast i8* %f17_p2 to float* | |
%f17_0 = load float, float* %f17_p3, align 4, !alias.scope !2 | |
%f17_1 = insertelement <8 x float> undef, float %f17_0, i32 0 | |
%f17 = shufflevector <8 x float> %f17_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f18_p1 = getelementptr inbounds i8*, i8** %params, i32 20 | |
%f18_p2 = load i8*, i8** %f18_p1, align 8, !alias.scope !2 | |
%f18_p3 = bitcast i8* %f18_p2 to float* | |
%f18_0 = load float, float* %f18_p3, align 4, !alias.scope !2 | |
%f18_1 = insertelement <8 x float> undef, float %f18_0, i32 0 | |
%f18 = shufflevector <8 x float> %f18_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f19_p1 = getelementptr inbounds i8*, i8** %params, i32 21 | |
%f19_p2 = load i8*, i8** %f19_p1, align 8, !alias.scope !2 | |
%f19_p3 = bitcast i8* %f19_p2 to float* | |
%f19_0 = load float, float* %f19_p3, align 4, !alias.scope !2 | |
%f19_1 = insertelement <8 x float> undef, float %f19_0, i32 0 | |
%f19 = shufflevector <8 x float> %f19_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f20_p1 = getelementptr inbounds i8*, i8** %params, i32 22 | |
%f20_p2 = load i8*, i8** %f20_p1, align 8, !alias.scope !2 | |
%f20_p3 = bitcast i8* %f20_p2 to float* | |
%f20_0 = load float, float* %f20_p3, align 4, !alias.scope !2 | |
%f20_1 = insertelement <8 x float> undef, float %f20_0, i32 0 | |
%f20 = shufflevector <8 x float> %f20_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f21_p1 = getelementptr inbounds i8*, i8** %params, i32 23 | |
%f21_p2 = load i8*, i8** %f21_p1, align 8, !alias.scope !2 | |
%f21_p3 = bitcast i8* %f21_p2 to float* | |
%f21_0 = load float, float* %f21_p3, align 4, !alias.scope !2 | |
%f21_1 = insertelement <8 x float> undef, float %f21_0, i32 0 | |
%f21 = shufflevector <8 x float> %f21_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f22_p1 = getelementptr inbounds i8*, i8** %params, i32 24 | |
%f22_p2 = load i8*, i8** %f22_p1, align 8, !alias.scope !2 | |
%f22_p3 = bitcast i8* %f22_p2 to float* | |
%f22_0 = load float, float* %f22_p3, align 4, !alias.scope !2 | |
%f22_1 = insertelement <8 x float> undef, float %f22_0, i32 0 | |
%f22 = shufflevector <8 x float> %f22_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f23_p1 = getelementptr inbounds i8*, i8** %params, i32 25 | |
%f23_p2 = load i8*, i8** %f23_p1, align 8, !alias.scope !2 | |
%f23_p3 = bitcast i8* %f23_p2 to float* | |
%f23_0 = load float, float* %f23_p3, align 4, !alias.scope !2 | |
%f23_1 = insertelement <8 x float> undef, float %f23_0, i32 0 | |
%f23 = shufflevector <8 x float> %f23_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f24_p1 = getelementptr inbounds i8*, i8** %params, i32 26 | |
%f24_p2 = load i8*, i8** %f24_p1, align 8, !alias.scope !2 | |
%f24_p3 = bitcast i8* %f24_p2 to float* | |
%f24_0 = load float, float* %f24_p3, align 4, !alias.scope !2 | |
%f24_1 = insertelement <8 x float> undef, float %f24_0, i32 0 | |
%f24 = shufflevector <8 x float> %f24_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f25_p1 = getelementptr inbounds i8*, i8** %params, i32 27 | |
%f25_p2 = load i8*, i8** %f25_p1, align 8, !alias.scope !2 | |
%f25_p3 = bitcast i8* %f25_p2 to float* | |
%f25_0 = load float, float* %f25_p3, align 4, !alias.scope !2 | |
%f25_1 = insertelement <8 x float> undef, float %f25_0, i32 0 | |
%f25 = shufflevector <8 x float> %f25_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f26_p1 = getelementptr inbounds i8*, i8** %params, i32 28 | |
%f26_p2 = load i8*, i8** %f26_p1, align 8, !alias.scope !2 | |
%f26_p3 = bitcast i8* %f26_p2 to float* | |
%f26_0 = load float, float* %f26_p3, align 4, !alias.scope !2 | |
%f26_1 = insertelement <8 x float> undef, float %f26_0, i32 0 | |
%f26 = shufflevector <8 x float> %f26_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f27_p1 = getelementptr inbounds i8*, i8** %params, i32 29 | |
%f27_p2 = load i8*, i8** %f27_p1, align 8, !alias.scope !2 | |
%f27_p3 = bitcast i8* %f27_p2 to float* | |
%f27_0 = load float, float* %f27_p3, align 4, !alias.scope !2 | |
%f27_1 = insertelement <8 x float> undef, float %f27_0, i32 0 | |
%f27 = shufflevector <8 x float> %f27_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f28_p1 = getelementptr inbounds i8*, i8** %params, i32 30 | |
%f28_p2 = load i8*, i8** %f28_p1, align 8, !alias.scope !2 | |
%f28_p3 = bitcast i8* %f28_p2 to float* | |
%f28_0 = load float, float* %f28_p3, align 4, !alias.scope !2 | |
%f28_1 = insertelement <8 x float> undef, float %f28_0, i32 0 | |
%f28 = shufflevector <8 x float> %f28_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f29_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f29 = shufflevector <8 x float> %f29_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f30_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r31_0 = trunc i64 %index to i32 | |
%r31_1 = insertelement <8 x i32> undef, i32 %r31_0, i32 0 | |
%r31_2 = shufflevector <8 x i32> %r31_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r31 = add <8 x i32> %r31_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
%r32_p1 = getelementptr inbounds i8*, i8** %params, i32 31 | |
%r32_p2 = load i8*, i8** %r32_p1, align 8, !alias.scope !2 | |
%r32_p3 = bitcast i8* %r32_p2 to i32* | |
%r32_0 = load i32, i32* %r32_p3, align 4, !alias.scope !2 | |
%r32_1 = insertelement <8 x i32> undef, i32 %r32_0, i32 0 | |
%r32 = shufflevector <8 x i32> %r32_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r33_1 = insertelement <8 x i32> undef, i32 2654435769, i32 0 | |
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r34_1 = insertelement <8 x i32> undef, i32 4, i32 0 | |
%r34 = shufflevector <8 x i32> %r34_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r35 = shl <8 x i32> %r31, %r34 | |
%r36_1 = insertelement <8 x i32> undef, i32 2738958700, i32 0 | |
%r36 = shufflevector <8 x i32> %r36_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r37 = add <8 x i32> %r35, %r36 | |
%r38 = add <8 x i32> %r31, %r33 | |
%r39 = xor <8 x i32> %r37, %r38 | |
%r40_1 = insertelement <8 x i32> undef, i32 5, i32 0 | |
%r40 = shufflevector <8 x i32> %r40_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r41 = lshr <8 x i32> %r31, %r40 | |
%r42_1 = insertelement <8 x i32> undef, i32 3355524772, i32 0 | |
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r43 = add <8 x i32> %r41, %r42 | |
%r44 = xor <8 x i32> %r39, %r43 | |
%r45 = add <8 x i32> %r32, %r44 | |
%r46 = shl <8 x i32> %r45, %r34 | |
%r47_1 = insertelement <8 x i32> undef, i32 2911926141, i32 0 | |
%r47 = shufflevector <8 x i32> %r47_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r48 = add <8 x i32> %r46, %r47 | |
%r49 = add <8 x i32> %r45, %r33 | |
%r50 = xor <8 x i32> %r48, %r49 | |
%r51 = lshr <8 x i32> %r45, %r40 | |
%r52_1 = insertelement <8 x i32> undef, i32 2123724318, i32 0 | |
%r52 = shufflevector <8 x i32> %r52_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r53 = add <8 x i32> %r51, %r52 | |
%r54 = xor <8 x i32> %r50, %r53 | |
%r55 = add <8 x i32> %r31, %r54 | |
%r56_1 = insertelement <8 x i32> undef, i32 1013904242, i32 0 | |
%r56 = shufflevector <8 x i32> %r56_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r57 = shl <8 x i32> %r55, %r34 | |
%r58 = add <8 x i32> %r57, %r36 | |
%r59 = add <8 x i32> %r55, %r56 | |
%r60 = xor <8 x i32> %r58, %r59 | |
%r61 = lshr <8 x i32> %r55, %r40 | |
%r62 = add <8 x i32> %r61, %r42 | |
%r63 = xor <8 x i32> %r60, %r62 | |
%r64 = add <8 x i32> %r45, %r63 | |
%r65 = shl <8 x i32> %r64, %r34 | |
%r66 = add <8 x i32> %r65, %r47 | |
%r67 = add <8 x i32> %r64, %r56 | |
%r68 = xor <8 x i32> %r66, %r67 | |
%r69 = lshr <8 x i32> %r64, %r40 | |
%r70 = add <8 x i32> %r69, %r52 | |
%r71 = xor <8 x i32> %r68, %r70 | |
%r72 = add <8 x i32> %r55, %r71 | |
%r73_1 = insertelement <8 x i32> undef, i32 3668340011, i32 0 | |
%r73 = shufflevector <8 x i32> %r73_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r74 = shl <8 x i32> %r72, %r34 | |
%r75 = add <8 x i32> %r74, %r36 | |
%r76 = add <8 x i32> %r72, %r73 | |
%r77 = xor <8 x i32> %r75, %r76 | |
%r78 = lshr <8 x i32> %r72, %r40 | |
%r79 = add <8 x i32> %r78, %r42 | |
%r80 = xor <8 x i32> %r77, %r79 | |
%r81 = add <8 x i32> %r64, %r80 | |
%r82 = shl <8 x i32> %r81, %r34 | |
%r83 = add <8 x i32> %r82, %r47 | |
%r84 = add <8 x i32> %r81, %r73 | |
%r85 = xor <8 x i32> %r83, %r84 | |
%r86 = lshr <8 x i32> %r81, %r40 | |
%r87 = add <8 x i32> %r86, %r52 | |
%r88 = xor <8 x i32> %r85, %r87 | |
%r89 = add <8 x i32> %r72, %r88 | |
%r90_1 = insertelement <8 x i32> undef, i32 2027808484, i32 0 | |
%r90 = shufflevector <8 x i32> %r90_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r91 = shl <8 x i32> %r89, %r34 | |
%r92 = add <8 x i32> %r91, %r36 | |
%r93 = add <8 x i32> %r89, %r90 | |
%r94 = xor <8 x i32> %r92, %r93 | |
%r95 = lshr <8 x i32> %r89, %r40 | |
%r96 = add <8 x i32> %r95, %r42 | |
%r97 = xor <8 x i32> %r94, %r96 | |
%r98 = add <8 x i32> %r81, %r97 | |
%r99 = shl <8 x i32> %r98, %r34 | |
%r100 = add <8 x i32> %r99, %r47 | |
%r101 = add <8 x i32> %r98, %r90 | |
%r102 = xor <8 x i32> %r100, %r101 | |
%r103 = lshr <8 x i32> %r98, %r40 | |
%r104 = add <8 x i32> %r103, %r52 | |
%r105 = xor <8 x i32> %r102, %r104 | |
%r106 = add <8 x i32> %r89, %r105 | |
%rd107 = zext <8 x i32> %r98 to <8 x i64> | |
%rd108 = zext <8 x i32> %r106 to <8 x i64> | |
%rd109_1 = insertelement <8 x i64> undef, i64 0, i32 0 | |
%rd109 = shufflevector <8 x i64> %rd109_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd110_1 = insertelement <8 x i64> undef, i64 1, i32 0 | |
%rd110 = shufflevector <8 x i64> %rd110_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd111 = shl <8 x i64> %rd108, %rd110 | |
%rd112 = or <8 x i64> %rd111, %rd110 | |
%rd113_1 = insertelement <8 x i64> undef, i64 6364136223846793005, i32 0 | |
%rd113 = shufflevector <8 x i64> %rd113_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd114_0 = mul <8 x i64> %rd109, %rd113 | |
%rd114 = add <8 x i64> %rd114_0, %rd112 | |
%rd115 = add <8 x i64> %rd114, %rd107 | |
%rd116_0 = mul <8 x i64> %rd115, %rd113 | |
%rd116 = add <8 x i64> %rd116_0, %rd112 | |
%r117_p1 = getelementptr inbounds i8*, i8** %params, i32 32 | |
%r117_p2 = load i8*, i8** %r117_p1, align 8, !alias.scope !2 | |
%r117_p3 = bitcast i8* %r117_p2 to i32* | |
%r117_0 = load i32, i32* %r117_p3, align 4, !alias.scope !2 | |
%r117_1 = insertelement <8 x i32> undef, i32 %r117_0, i32 0 | |
%r117 = shufflevector <8 x i32> %r117_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r118 = lshr <8 x i32> %r31, %r117 | |
%r119_1 = insertelement <8 x i32> undef, i32 8, i32 0 | |
%r119 = shufflevector <8 x i32> %r119_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r120 = lshr <8 x i32> %r118, %r119 | |
%r121_1 = insertelement <8 x i32> undef, i32 256, i32 0 | |
%r121 = shufflevector <8 x i32> %r121_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r122 = sub <8 x i32> zeroinitializer, %r120 | |
%r123_0 = mul <8 x i32> %r121, %r122 | |
%r123 = add <8 x i32> %r123_0, %r118 | |
%f124 = uitofp <8 x i32> %r123 to <8 x float> | |
%f125 = uitofp <8 x i32> %r120 to <8 x float> | |
%rd126_0 = mul <8 x i64> %rd116, %rd113 | |
%rd126 = add <8 x i64> %rd126_0, %rd112 | |
%rd127_1 = insertelement <8 x i64> undef, i64 18, i32 0 | |
%rd127 = shufflevector <8 x i64> %rd127_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd128 = lshr <8 x i64> %rd116, %rd127 | |
%rd129 = xor <8 x i64> %rd128, %rd116 | |
%rd130_1 = insertelement <8 x i64> undef, i64 27, i32 0 | |
%rd130 = shufflevector <8 x i64> %rd130_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd131 = lshr <8 x i64> %rd129, %rd130 | |
%r132 = trunc <8 x i64> %rd131 to <8 x i32> | |
%rd133_1 = insertelement <8 x i64> undef, i64 59, i32 0 | |
%rd133 = shufflevector <8 x i64> %rd133_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd134 = lshr <8 x i64> %rd116, %rd133 | |
%r135 = trunc <8 x i64> %rd134 to <8 x i32> | |
%r136 = lshr <8 x i32> %r132, %r135 | |
%r137 = bitcast <8 x i32> %r135 to <8 x i32> | |
%r138 = sub <8 x i32> zeroinitializer, %r137 | |
%r139_1 = insertelement <8 x i32> undef, i32 31, i32 0 | |
%r139 = shufflevector <8 x i32> %r139_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r140 = and <8 x i32> %r138, %r139 | |
%r141 = bitcast <8 x i32> %r140 to <8 x i32> | |
%r142 = shl <8 x i32> %r132, %r141 | |
%r143 = or <8 x i32> %r136, %r142 | |
%r144_1 = insertelement <8 x i32> undef, i32 9, i32 0 | |
%r144 = shufflevector <8 x i32> %r144_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r145 = lshr <8 x i32> %r143, %r144 | |
%r146_1 = insertelement <8 x i32> undef, i32 1065353216, i32 0 | |
%r146 = shufflevector <8 x i32> %r146_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r147 = or <8 x i32> %r145, %r146 | |
%f148 = bitcast <8 x i32> %r147 to <8 x float> | |
%f149 = fsub <8 x float> %f148, %f30 | |
%rd150_0 = mul <8 x i64> %rd126, %rd113 | |
%rd150 = add <8 x i64> %rd150_0, %rd112 | |
%rd151 = lshr <8 x i64> %rd126, %rd127 | |
%rd152 = xor <8 x i64> %rd151, %rd126 | |
%rd153 = lshr <8 x i64> %rd152, %rd130 | |
%r154 = trunc <8 x i64> %rd153 to <8 x i32> | |
%rd155 = lshr <8 x i64> %rd126, %rd133 | |
%r156 = trunc <8 x i64> %rd155 to <8 x i32> | |
%r157 = lshr <8 x i32> %r154, %r156 | |
%r158 = bitcast <8 x i32> %r156 to <8 x i32> | |
%r159 = sub <8 x i32> zeroinitializer, %r158 | |
%r160 = and <8 x i32> %r159, %r139 | |
%r161 = bitcast <8 x i32> %r160 to <8 x i32> | |
%r162 = shl <8 x i32> %r154, %r161 | |
%r163 = or <8 x i32> %r157, %r162 | |
%r164 = lshr <8 x i32> %r163, %r144 | |
%r165 = or <8 x i32> %r164, %r146 | |
%f166 = bitcast <8 x i32> %r165 to <8 x float> | |
%f167 = fsub <8 x float> %f166, %f30 | |
%f168 = fadd <8 x float> %f124, %f149 | |
%f169 = fadd <8 x float> %f125, %f167 | |
%f170_1 = insertelement <8 x float> undef, float 0x3f70000000000000, i32 0 | |
%f170 = shufflevector <8 x float> %f170_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f171_1 = insertelement <8 x float> undef, float 0x8000000000000000, i32 0 | |
%f171 = shufflevector <8 x float> %f171_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f172 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f168, <8 x float> %f170, <8 x float> %f171) | |
%f173 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f169, <8 x float> %f170, <8 x float> %f171) | |
%f174 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f172, <8 x float> %f25) | |
%f175 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f172, <8 x float> %f26) | |
%f176 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f172, <8 x float> %f27) | |
%f177 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f172, <8 x float> %f28) | |
%f178 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f173, <8 x float> %f174) | |
%f179 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f173, <8 x float> %f175) | |
%f180 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f173, <8 x float> %f176) | |
%f181 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f20, <8 x float> %f173, <8 x float> %f177) | |
%f182 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f21, <8 x float> %f29, <8 x float> %f178) | |
%f183 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f22, <8 x float> %f29, <8 x float> %f179) | |
%f184 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f23, <8 x float> %f29, <8 x float> %f180) | |
%f185 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f24, <8 x float> %f29, <8 x float> %f181) | |
%f186 = fdiv <8 x float> %f30, %f185 | |
%f187 = fmul <8 x float> %f182, %f186 | |
%f188 = fmul <8 x float> %f183, %f186 | |
%f189 = fmul <8 x float> %f184, %f186 | |
%f190 = fmul <8 x float> %f187, %f187 | |
%f191 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f188, <8 x float> %f188, <8 x float> %f190) | |
%f192 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f189, <8 x float> %f189, <8 x float> %f191) | |
%f193 = fdiv <8 x float> %f30, %f192 | |
%f194 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f193) | |
%f195 = fmul <8 x float> %f187, %f194 | |
%f196 = fmul <8 x float> %f188, %f194 | |
%f197 = fmul <8 x float> %f189, %f194 | |
%f198 = fmul <8 x float> %f1, %f195 | |
%f199 = fmul <8 x float> %f2, %f195 | |
%f200 = fmul <8 x float> %f3, %f195 | |
%f201 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f4, <8 x float> %f196, <8 x float> %f198) | |
%f202 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f5, <8 x float> %f196, <8 x float> %f199) | |
%f203 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f6, <8 x float> %f196, <8 x float> %f200) | |
%f204 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f7, <8 x float> %f197, <8 x float> %f201) | |
%f205 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f197, <8 x float> %f202) | |
%f206 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f197, <8 x float> %f203) | |
%f207 = fdiv <8 x float> %f30, %f197 | |
%f208_1 = insertelement <8 x float> undef, float 0x3f50624de0000000, i32 0 | |
%f208 = shufflevector <8 x float> %f208_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f209 = fmul <8 x float> %f208, %f207 | |
%f210_1 = insertelement <8 x float> undef, float 0x4059000000000000, i32 0 | |
%f210 = shufflevector <8 x float> %f210_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f211 = fmul <8 x float> %f210, %f207 | |
%f212 = fmul <8 x float> %f204, %f209 | |
%f213 = fmul <8 x float> %f205, %f209 | |
%f214 = fmul <8 x float> %f206, %f209 | |
%f215 = fadd <8 x float> %f10, %f212 | |
%f216 = fadd <8 x float> %f11, %f213 | |
%f217 = fadd <8 x float> %f12, %f214 | |
%f218 = fsub <8 x float> %f211, %f209 | |
%r219_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r219 = shufflevector <8 x i32> %r219_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%f220_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0 | |
%f220 = shufflevector <8 x float> %f220_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p221_1 = insertelement <8 x i1> undef, i1 1, i32 0 | |
%p221 = shufflevector <8 x i1> %p221_1, <8 x i1> undef, <8 x i32> zeroinitializer | |
%r222_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r222 = shufflevector <8 x i32> %r222_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%f223_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f223 = shufflevector <8 x float> %f223_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p224_1 = insertelement <8 x i1> undef, i1 1, i32 0 | |
%p224 = shufflevector <8 x i1> %p224_1, <8 x i1> undef, <8 x i32> zeroinitializer | |
%f225_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f225 = shufflevector <8 x float> %f225_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f226_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f226 = shufflevector <8 x float> %f226_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f227_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f227 = shufflevector <8 x float> %f227_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f228_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f228 = shufflevector <8 x float> %f228_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f229_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f229 = shufflevector <8 x float> %f229_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f230_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f230 = shufflevector <8 x float> %f230_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f231_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f231 = shufflevector <8 x float> %f231_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f232_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f232 = shufflevector <8 x float> %f232_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f233_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f233 = shufflevector <8 x float> %f233_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f234_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f234 = shufflevector <8 x float> %f234_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f235_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f235 = shufflevector <8 x float> %f235_1, <8 x float> undef, <8 x i32> zeroinitializer | |
br label %l_236_start | |
l_236_start: | |
br label %l_236_cond | |
l_236_cond: ; Loop (Path Tracer) | |
%rd237 = phi <8 x i64> [ %rd237_final, %l_236_tail ], [ %rd150, %l_236_start ] | |
%f238 = phi <8 x float> [ %f238_final, %l_236_tail ], [ %f215, %l_236_start ] | |
%f239 = phi <8 x float> [ %f239_final, %l_236_tail ], [ %f216, %l_236_start ] | |
%f240 = phi <8 x float> [ %f240_final, %l_236_tail ], [ %f217, %l_236_start ] | |
%f241 = phi <8 x float> [ %f241_final, %l_236_tail ], [ %f204, %l_236_start ] | |
%f242 = phi <8 x float> [ %f242_final, %l_236_tail ], [ %f205, %l_236_start ] | |
%f243 = phi <8 x float> [ %f243_final, %l_236_tail ], [ %f206, %l_236_start ] | |
%f244 = phi <8 x float> [ %f244_final, %l_236_tail ], [ %f218, %l_236_start ] | |
%f245 = phi <8 x float> [ %f245_final, %l_236_tail ], [ %f233, %l_236_start ] | |
%f246 = phi <8 x float> [ %f246_final, %l_236_tail ], [ %f234, %l_236_start ] | |
%f247 = phi <8 x float> [ %f247_final, %l_236_tail ], [ %f235, %l_236_start ] | |
%f248 = phi <8 x float> [ %f248_final, %l_236_tail ], [ %f230, %l_236_start ] | |
%f249 = phi <8 x float> [ %f249_final, %l_236_tail ], [ %f231, %l_236_start ] | |
%f250 = phi <8 x float> [ %f250_final, %l_236_tail ], [ %f232, %l_236_start ] | |
%f251 = phi <8 x float> [ %f251_final, %l_236_tail ], [ %f223, %l_236_start ] | |
%r252 = phi <8 x i32> [ %r252_final, %l_236_tail ], [ %r222, %l_236_start ] | |
%f253 = phi <8 x float> [ %f253_final, %l_236_tail ], [ %f220, %l_236_start ] | |
%f254 = phi <8 x float> [ %f254_final, %l_236_tail ], [ %f226, %l_236_start ] | |
%f255 = phi <8 x float> [ %f255_final, %l_236_tail ], [ %f227, %l_236_start ] | |
%f256 = phi <8 x float> [ %f256_final, %l_236_tail ], [ %f228, %l_236_start ] | |
%f257 = phi <8 x float> [ %f257_final, %l_236_tail ], [ %f225, %l_236_start ] | |
%p258 = phi <8 x i1> [ %p258_final, %l_236_tail ], [ %p224, %l_236_start ] | |
%p259 = phi <8 x i1> [ %p259_final, %l_236_tail ], [ %p221, %l_236_start ] | |
%p236 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p259) | |
br i1 %p236, label %l_236_body, label %l_236_done | |
l_236_body: | |
%rd261 = phi <8 x i64> [ %rd237, %l_236_cond ] | |
%f262 = phi <8 x float> [ %f238, %l_236_cond ] | |
%f263 = phi <8 x float> [ %f239, %l_236_cond ] | |
%f264 = phi <8 x float> [ %f240, %l_236_cond ] | |
%f265 = phi <8 x float> [ %f241, %l_236_cond ] | |
%f266 = phi <8 x float> [ %f242, %l_236_cond ] | |
%f267 = phi <8 x float> [ %f243, %l_236_cond ] | |
%f268 = phi <8 x float> [ %f244, %l_236_cond ] | |
%f269 = phi <8 x float> [ %f245, %l_236_cond ] | |
%f270 = phi <8 x float> [ %f246, %l_236_cond ] | |
%f271 = phi <8 x float> [ %f247, %l_236_cond ] | |
%f272 = phi <8 x float> [ %f248, %l_236_cond ] | |
%f273 = phi <8 x float> [ %f249, %l_236_cond ] | |
%f274 = phi <8 x float> [ %f250, %l_236_cond ] | |
%f275 = phi <8 x float> [ %f251, %l_236_cond ] | |
%r276 = phi <8 x i32> [ %r252, %l_236_cond ] | |
%f277 = phi <8 x float> [ %f254, %l_236_cond ] | |
%f278 = phi <8 x float> [ %f255, %l_236_cond ] | |
%f279 = phi <8 x float> [ %f256, %l_236_cond ] | |
%f280 = phi <8 x float> [ %f257, %l_236_cond ] | |
%p281 = phi <8 x i1> [ %p258, %l_236_cond ] | |
%p282 = phi <8 x i1> [ %p259, %l_236_cond ] | |
%r283_0 = trunc i64 %index to i32 | |
%r283_1 = insertelement <8 x i32> undef, i32 %r283_0, i32 0 | |
%r283_2 = shufflevector <8 x i32> %r283_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r283 = add <8 x i32> %r283_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
%p284_0 = trunc i64 %end to i32 | |
%p284_1 = insertelement <8 x i32> undef, i32 %p284_0, i32 0 | |
%p284_2 = shufflevector <8 x i32> %p284_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p284 = icmp ult <8 x i32> %r283, %p284_2 | |
%p285 = and <8 x i1> %p284, %p259 | |
%p286_1 = insertelement <8 x i1> undef, i1 0, i32 0 | |
%p286 = shufflevector <8 x i1> %p286_1, <8 x i1> undef, <8 x i32> zeroinitializer | |
%rd287_p1 = getelementptr inbounds i8*, i8** %params, i32 33 | |
%rd287 = load i8*, i8** %rd287_p1, align 8, !alias.scope !2 | |
%rd288_p1 = getelementptr inbounds i8*, i8** %params, i32 34 | |
%rd288 = load i8*, i8** %rd288_p1, align 8, !alias.scope !2 | |
%r289_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r289 = shufflevector <8 x i32> %r289_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%f290_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f290 = shufflevector <8 x float> %f290_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r291_1 = insertelement <8 x i32> undef, i32 4294967295, i32 0 | |
%r291 = shufflevector <8 x i32> %r291_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r292_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r292 = shufflevector <8 x i32> %r292_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r293 = select <8 x i1> %p285, <8 x i32> %r291, <8 x i32> zeroinitializer | |
%f294_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0 | |
%f294 = shufflevector <8 x float> %f294_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r295_1 = insertelement <8 x i32> undef, i32 4294967295, i32 0 | |
%r295 = shufflevector <8 x i32> %r295_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%rd296_p1 = getelementptr inbounds i8*, i8** %params, i32 35 | |
%rd296 = load i8*, i8** %rd296_p1, align 8, !alias.scope !2 | |
%f297_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f297 = shufflevector <8 x float> %f297_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f298_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0 | |
%f298 = shufflevector <8 x float> %f298_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f299 = fneg <8 x float> %f265 | |
%f300 = fneg <8 x float> %f266 | |
%f301 = fneg <8 x float> %f267 | |
%p302 = xor <8 x i1> %p281, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%f303 = fmul <8 x float> %f280, %f280 | |
%p304 = fcmp ogt <8 x float> %f303, %f290 | |
%r305_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r305 = shufflevector <8 x i32> %r305_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r306 = add <8 x i32> %r276, %r305 | |
%r307_1 = insertelement <8 x i32> undef, i32 6, i32 0 | |
%r307 = shufflevector <8 x i32> %r307_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p308 = icmp ult <8 x i32> %r306, %r307 | |
%r309_1 = insertelement <8 x i32> undef, i32 30, i32 0 | |
%r309 = shufflevector <8 x i32> %r309_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%rd310_1 = insertelement <8 x i64> undef, i64 6364136223846793005, i32 0 | |
%rd310 = shufflevector <8 x i64> %rd310_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd311_0 = mul <8 x i64> %rd261, %rd310 | |
%rd311 = add <8 x i64> %rd311_0, %rd112 | |
%rd312_1 = insertelement <8 x i64> undef, i64 18, i32 0 | |
%rd312 = shufflevector <8 x i64> %rd312_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd313 = lshr <8 x i64> %rd261, %rd312 | |
%rd314 = xor <8 x i64> %rd313, %rd261 | |
%rd315_1 = insertelement <8 x i64> undef, i64 27, i32 0 | |
%rd315 = shufflevector <8 x i64> %rd315_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd316 = lshr <8 x i64> %rd314, %rd315 | |
%r317 = trunc <8 x i64> %rd316 to <8 x i32> | |
%rd318_1 = insertelement <8 x i64> undef, i64 59, i32 0 | |
%rd318 = shufflevector <8 x i64> %rd318_1, <8 x i64> undef, <8 x i32> zeroinitializer | |
%rd319 = lshr <8 x i64> %rd261, %rd318 | |
%r320 = trunc <8 x i64> %rd319 to <8 x i32> | |
%r321 = lshr <8 x i32> %r317, %r320 | |
%r322 = bitcast <8 x i32> %r320 to <8 x i32> | |
%r323 = sub <8 x i32> zeroinitializer, %r322 | |
%r324_1 = insertelement <8 x i32> undef, i32 31, i32 0 | |
%r324 = shufflevector <8 x i32> %r324_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r325 = and <8 x i32> %r323, %r324 | |
%r326 = bitcast <8 x i32> %r325 to <8 x i32> | |
%r327 = shl <8 x i32> %r317, %r326 | |
%r328 = or <8 x i32> %r321, %r327 | |
%r329_1 = insertelement <8 x i32> undef, i32 9, i32 0 | |
%r329 = shufflevector <8 x i32> %r329_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r330 = lshr <8 x i32> %r328, %r329 | |
%r331_1 = insertelement <8 x i32> undef, i32 1065353216, i32 0 | |
%r331 = shufflevector <8 x i32> %r331_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r332 = or <8 x i32> %r330, %r331 | |
%f333 = bitcast <8 x i32> %r332 to <8 x float> | |
%f334 = fsub <8 x float> %f333, %f297 | |
%rd335_p1 = getelementptr inbounds i8*, i8** %params, i32 36 | |
%rd335 = load i8*, i8** %rd335_p1, align 8, !alias.scope !2 | |
%f336_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f336 = shufflevector <8 x float> %f336_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f337_1 = insertelement <8 x float> undef, float 0x3f17700000000000, i32 0 | |
%f337 = shufflevector <8 x float> %f337_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f338_1 = insertelement <8 x float> undef, float 0x3feff8ad00000000, i32 0 | |
%f338 = shufflevector <8 x float> %f338_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%rd339_p1 = getelementptr inbounds i8*, i8** %params, i32 37 | |
%rd339 = load i8*, i8** %rd339_p1, align 8, !alias.scope !2 | |
%f340_1 = insertelement <8 x float> undef, float 0x3fee666660000000, i32 0 | |
%f340 = shufflevector <8 x float> %f340_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r341_1 = insertelement <8 x i32> undef, i32 5, i32 0 | |
%r341 = shufflevector <8 x i32> %r341_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
; -------- Ray trace ------- | |
%u342_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u342_in_0_1 = bitcast i8* %u342_in_0_0 to <8 x i32> * | |
store <8 x i32> %r293, <8 x i32>* %u342_in_0_1, align 32 | |
%u342_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u342_in_1_1 = bitcast i8* %u342_in_1_0 to <8 x float> * | |
store <8 x float> %f262, <8 x float>* %u342_in_1_1, align 32 | |
%u342_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u342_in_2_1 = bitcast i8* %u342_in_2_0 to <8 x float> * | |
store <8 x float> %f263, <8 x float>* %u342_in_2_1, align 32 | |
%u342_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u342_in_3_1 = bitcast i8* %u342_in_3_0 to <8 x float> * | |
store <8 x float> %f264, <8 x float>* %u342_in_3_1, align 32 | |
%u342_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u342_in_4_1 = bitcast i8* %u342_in_4_0 to <8 x float> * | |
store <8 x float> %f290, <8 x float>* %u342_in_4_1, align 32 | |
%u342_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u342_in_5_1 = bitcast i8* %u342_in_5_0 to <8 x float> * | |
store <8 x float> %f265, <8 x float>* %u342_in_5_1, align 32 | |
%u342_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u342_in_6_1 = bitcast i8* %u342_in_6_0 to <8 x float> * | |
store <8 x float> %f266, <8 x float>* %u342_in_6_1, align 32 | |
%u342_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224 | |
%u342_in_7_1 = bitcast i8* %u342_in_7_0 to <8 x float> * | |
store <8 x float> %f267, <8 x float>* %u342_in_7_1, align 32 | |
%u342_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256 | |
%u342_in_8_1 = bitcast i8* %u342_in_8_0 to <8 x float> * | |
store <8 x float> %f229, <8 x float>* %u342_in_8_1, align 32 | |
%u342_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u342_in_9_1 = bitcast i8* %u342_in_9_0 to <8 x float> * | |
store <8 x float> %f268, <8 x float>* %u342_in_9_1, align 32 | |
%u342_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320 | |
%u342_in_10_1 = bitcast i8* %u342_in_10_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u342_in_10_1, align 32 | |
%u342_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352 | |
%u342_in_11_1 = bitcast i8* %u342_in_11_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u342_in_11_1, align 32 | |
%u342_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384 | |
%u342_in_12_1 = bitcast i8* %u342_in_12_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u342_in_12_1, align 32 | |
%u342_in_geomid_0 = getelementptr inbounds i8, i8* %buffer, i32 608 | |
%u342_in_geomid_1 = bitcast i8* %u342_in_geomid_0 to <8 x i32> * | |
store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %u342_in_geomid_1, align 32 | |
%u342_in_ctx_0 = getelementptr inbounds i8, i8* %buffer, i32 672 | |
%u342_in_ctx_1 = bitcast i8* %u342_in_ctx_0 to <6 x i32> * | |
store <6 x i32> <i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0>, <6 x i32>* %u342_in_ctx_1, align 4 | |
%u342_func = bitcast i8* %rd287 to void (i8*, i8*, i8*, i8*)* | |
call void %u342_func(i8* %u342_in_0_0, i8* %rd288, i8* %u342_in_ctx_0, i8* %u342_in_1_0) | |
%u342_out_0_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u342_out_0_1 = bitcast i8* %u342_out_0_0 to <8 x float> * | |
%u342_out_0 = load <8 x float>, <8 x float>* %u342_out_0_1, align 32 | |
%u342_out_1_0 = getelementptr inbounds i8, i8* %buffer, i32 512 | |
%u342_out_1_1 = bitcast i8* %u342_out_1_0 to <8 x float> * | |
%u342_out_1 = load <8 x float>, <8 x float>* %u342_out_1_1, align 32 | |
%u342_out_2_0 = getelementptr inbounds i8, i8* %buffer, i32 544 | |
%u342_out_2_1 = bitcast i8* %u342_out_2_0 to <8 x float> * | |
%u342_out_2 = load <8 x float>, <8 x float>* %u342_out_2_1, align 32 | |
%u342_out_3_0 = getelementptr inbounds i8, i8* %buffer, i32 576 | |
%u342_out_3_1 = bitcast i8* %u342_out_3_0 to <8 x i32> * | |
%u342_out_3 = load <8 x i32>, <8 x i32>* %u342_out_3_1, align 32 | |
%u342_out_4_0 = getelementptr inbounds i8, i8* %buffer, i32 608 | |
%u342_out_4_1 = bitcast i8* %u342_out_4_0 to <8 x i32> * | |
%u342_out_4 = load <8 x i32>, <8 x i32>* %u342_out_4_1, align 32 | |
%u342_out_5_0 = getelementptr inbounds i8, i8* %buffer, i32 640 | |
%u342_out_5_1 = bitcast i8* %u342_out_5_0 to <8 x i32> * | |
%u342_out_5 = load <8 x i32>, <8 x i32>* %u342_out_5_1, align 32 | |
; ------------------- | |
%f343 = bitcast <8 x float> %u342_out_0 to <8 x float> | |
%f344 = bitcast <8 x float> %u342_out_1 to <8 x float> | |
%f345 = bitcast <8 x float> %u342_out_2 to <8 x float> | |
%r346 = bitcast <8 x i32> %u342_out_3 to <8 x i32> | |
%r347 = bitcast <8 x i32> %u342_out_4 to <8 x i32> | |
%r348 = bitcast <8 x i32> %u342_out_5 to <8 x i32> | |
%p349 = fcmp one <8 x float> %f343, %f268 | |
%f350 = select <8 x i1> %p349, <8 x float> %f343, <8 x float> %f294 | |
%p351 = icmp ne <8 x i32> %r348, %r295 | |
%p352 = and <8 x i1> %p349, %p351 | |
%r353 = select <8 x i1> %p352, <8 x i32> %r348, <8 x i32> %r347 | |
%p354 = and <8 x i1> %p349, %p285 | |
%r355_0 = bitcast i8* %rd296 to i32* | |
%r355_1 = getelementptr i32, i32* %r355_0, <8 x i32> %r353 | |
%r355 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r355_1, i32 4, <8 x i1> %p354, <8 x i32> zeroinitializer) | |
%r356 = select <8 x i1> %p352, <8 x i32> %r355, <8 x i32> zeroinitializer | |
%p357 = xor <8 x i1> %p352, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%r358 = select <8 x i1> %p357, <8 x i32> %r355, <8 x i32> zeroinitializer | |
%p359 = fcmp one <8 x float> %f350, %f294 | |
%p360 = icmp eq <8 x i32> %r356, %r289 | |
%r361 = select <8 x i1> %p360, <8 x i32> %r358, <8 x i32> %r356 | |
%p362 = icmp ne <8 x i32> %r361, %r289 | |
%p363 = and <8 x i1> %p359, %p362 | |
%p364 = and <8 x i1> %p363, %p285 | |
%f365 = bitcast <8 x float> %f350 to <8 x float> | |
%rd366_p1 = getelementptr inbounds i8*, i8** %params, i32 38 | |
%rd366 = load i8*, i8** %rd366_p1, align 8, !alias.scope !2 | |
%rd367_p1 = getelementptr inbounds i8*, i8** %params, i32 39 | |
%rd367 = load i8*, i8** %rd367_p1, align 8, !alias.scope !2 | |
br label %l368_start | |
l368_start: | |
; VCall: mitsuba::Shape::compute_surface_interaction() | |
; - target 1 = @func_bac3cacf8aebcc87b13f4c43e6cdd822; | |
; - target 2 = @func_570d1d9103e40ae9598d8f351c2e5c2a; | |
; - target 3 = @func_e17fd895e12cc8bec8c4e55075a069b3; | |
; - target 4 = @func_dcfca7b5b523c613a93412a4c52b1f7a; | |
%u368_self_ptr_0 = bitcast i8* %rd366 to i64* | |
%u368_self_ptr = getelementptr i64, i64* %u368_self_ptr_0, <8 x i32> %r361 | |
%u368_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u368_self_ptr, i32 8, <8 x i1> %p364, <8 x i64> zeroinitializer) | |
%u368_self_initial = trunc <8 x i64> %u368_self_combined to <8 x i32> | |
%u368_offset_1 = lshr <8 x i64> %u368_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u368_offset = trunc <8 x i64> %u368_offset_1 to <8 x i32> | |
%u368_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u368_in_0_1 = bitcast i8* %u368_in_0_0 to <8 x float> * | |
store <8 x float> %f262, <8 x float>* %u368_in_0_1, align 32 | |
%u368_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u368_in_1_1 = bitcast i8* %u368_in_1_0 to <8 x float> * | |
store <8 x float> %f263, <8 x float>* %u368_in_1_1, align 32 | |
%u368_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u368_in_2_1 = bitcast i8* %u368_in_2_0 to <8 x float> * | |
store <8 x float> %f264, <8 x float>* %u368_in_2_1, align 32 | |
%u368_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u368_in_3_1 = bitcast i8* %u368_in_3_0 to <8 x float> * | |
store <8 x float> %f265, <8 x float>* %u368_in_3_1, align 32 | |
%u368_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u368_in_4_1 = bitcast i8* %u368_in_4_0 to <8 x float> * | |
store <8 x float> %f266, <8 x float>* %u368_in_4_1, align 32 | |
%u368_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u368_in_5_1 = bitcast i8* %u368_in_5_0 to <8 x float> * | |
store <8 x float> %f267, <8 x float>* %u368_in_5_1, align 32 | |
%u368_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u368_in_6_1 = bitcast i8* %u368_in_6_0 to <8 x float> * | |
store <8 x float> %f350, <8 x float>* %u368_in_6_1, align 32 | |
%u368_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224 | |
%u368_in_7_1 = bitcast i8* %u368_in_7_0 to <8 x float> * | |
store <8 x float> %f344, <8 x float>* %u368_in_7_1, align 32 | |
%u368_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256 | |
%u368_in_8_1 = bitcast i8* %u368_in_8_0 to <8 x float> * | |
store <8 x float> %f345, <8 x float>* %u368_in_8_1, align 32 | |
%u368_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u368_in_9_1 = bitcast i8* %u368_in_9_0 to <8 x i32> * | |
store <8 x i32> %r346, <8 x i32>* %u368_in_9_1, align 32 | |
%u368_out = getelementptr i8, i8* %buffer, i32 320 | |
%u368_tmp_0_0 = getelementptr inbounds i8, i8* %u368_out, i64 0 | |
%u368_tmp_0_1 = bitcast i8* %u368_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_0_1, align 32 | |
%u368_tmp_2_0 = getelementptr inbounds i8, i8* %u368_out, i64 32 | |
%u368_tmp_2_1 = bitcast i8* %u368_tmp_2_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_2_1, align 32 | |
%u368_tmp_3_0 = getelementptr inbounds i8, i8* %u368_out, i64 64 | |
%u368_tmp_3_1 = bitcast i8* %u368_tmp_3_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_3_1, align 32 | |
%u368_tmp_4_0 = getelementptr inbounds i8, i8* %u368_out, i64 96 | |
%u368_tmp_4_1 = bitcast i8* %u368_tmp_4_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_4_1, align 32 | |
%u368_tmp_5_0 = getelementptr inbounds i8, i8* %u368_out, i64 128 | |
%u368_tmp_5_1 = bitcast i8* %u368_tmp_5_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_5_1, align 32 | |
%u368_tmp_6_0 = getelementptr inbounds i8, i8* %u368_out, i64 160 | |
%u368_tmp_6_1 = bitcast i8* %u368_tmp_6_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_6_1, align 32 | |
%u368_tmp_7_0 = getelementptr inbounds i8, i8* %u368_out, i64 192 | |
%u368_tmp_7_1 = bitcast i8* %u368_tmp_7_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_7_1, align 32 | |
%u368_tmp_8_0 = getelementptr inbounds i8, i8* %u368_out, i64 224 | |
%u368_tmp_8_1 = bitcast i8* %u368_tmp_8_0 to <8 x i32> * | |
store <8 x i32> zeroinitializer, <8 x i32>* %u368_tmp_8_1, align 32 | |
%u368_tmp_13_0 = getelementptr inbounds i8, i8* %u368_out, i64 256 | |
%u368_tmp_13_1 = bitcast i8* %u368_tmp_13_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_13_1, align 32 | |
%u368_tmp_14_0 = getelementptr inbounds i8, i8* %u368_out, i64 288 | |
%u368_tmp_14_1 = bitcast i8* %u368_tmp_14_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_14_1, align 32 | |
%u368_tmp_15_0 = getelementptr inbounds i8, i8* %u368_out, i64 320 | |
%u368_tmp_15_1 = bitcast i8* %u368_tmp_15_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_15_1, align 32 | |
%u368_tmp_16_0 = getelementptr inbounds i8, i8* %u368_out, i64 352 | |
%u368_tmp_16_1 = bitcast i8* %u368_tmp_16_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_16_1, align 32 | |
%u368_tmp_17_0 = getelementptr inbounds i8, i8* %u368_out, i64 384 | |
%u368_tmp_17_1 = bitcast i8* %u368_tmp_17_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_17_1, align 32 | |
br label %l368_check | |
l368_check: | |
%u368_self = phi <8 x i32> [ %u368_self_initial, %l368_start ], [ %u368_self_next, %l368_call ] | |
%u368_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u368_self) | |
%u368_valid = icmp ne i32 %u368_next, 0 | |
br i1 %u368_valid, label %l368_call, label %l368_end | |
l368_call: | |
%u368_bcast_0 = insertelement <8 x i32> undef, i32 %u368_next, i32 0 | |
%u368_bcast = shufflevector <8 x i32> %u368_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u368_active = icmp eq <8 x i32> %u368_self, %u368_bcast | |
%u368_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u368_next | |
%u368_func_1 = load i8*, i8** %u368_func_0 | |
%u368_func = bitcast i8* %u368_func_1 to void (<8 x i1>, <8 x i32>, i8*, i8*, <8 x i32>)* | |
call void %u368_func(<8 x i1> %u368_active, <8 x i32> %r361, i8* %buffer, i8* %rd367, <8 x i32> %u368_offset) | |
%u368_self_next = select <8 x i1> %u368_active, <8 x i32> zeroinitializer, <8 x i32> %u368_self | |
br label %l368_check | |
l368_end: | |
%u368_out_0_0 = getelementptr inbounds i8, i8* %u368_out, i64 0 | |
%u368_out_0_1 = bitcast i8* %u368_out_0_0 to <8 x float> * | |
%f381 = load <8 x float>, <8 x float>* %u368_out_0_1, align 32 | |
%u368_out_2_0 = getelementptr inbounds i8, i8* %u368_out, i64 32 | |
%u368_out_2_1 = bitcast i8* %u368_out_2_0 to <8 x float> * | |
%f370 = load <8 x float>, <8 x float>* %u368_out_2_1, align 32 | |
%u368_out_3_0 = getelementptr inbounds i8, i8* %u368_out, i64 64 | |
%u368_out_3_1 = bitcast i8* %u368_out_3_0 to <8 x float> * | |
%f371 = load <8 x float>, <8 x float>* %u368_out_3_1, align 32 | |
%u368_out_4_0 = getelementptr inbounds i8, i8* %u368_out, i64 96 | |
%u368_out_4_1 = bitcast i8* %u368_out_4_0 to <8 x float> * | |
%f372 = load <8 x float>, <8 x float>* %u368_out_4_1, align 32 | |
%u368_out_5_0 = getelementptr inbounds i8, i8* %u368_out, i64 128 | |
%u368_out_5_1 = bitcast i8* %u368_out_5_0 to <8 x float> * | |
%f373 = load <8 x float>, <8 x float>* %u368_out_5_1, align 32 | |
%u368_out_6_0 = getelementptr inbounds i8, i8* %u368_out, i64 160 | |
%u368_out_6_1 = bitcast i8* %u368_out_6_0 to <8 x float> * | |
%f374 = load <8 x float>, <8 x float>* %u368_out_6_1, align 32 | |
%u368_out_7_0 = getelementptr inbounds i8, i8* %u368_out, i64 192 | |
%u368_out_7_1 = bitcast i8* %u368_out_7_0 to <8 x float> * | |
%f375 = load <8 x float>, <8 x float>* %u368_out_7_1, align 32 | |
%u368_out_8_0 = getelementptr inbounds i8, i8* %u368_out, i64 224 | |
%u368_out_8_1 = bitcast i8* %u368_out_8_0 to <8 x i32> * | |
%r376 = load <8 x i32>, <8 x i32>* %u368_out_8_1, align 32 | |
%u368_out_13_0 = getelementptr inbounds i8, i8* %u368_out, i64 256 | |
%u368_out_13_1 = bitcast i8* %u368_out_13_0 to <8 x float> * | |
%f382 = load <8 x float>, <8 x float>* %u368_out_13_1, align 32 | |
%u368_out_14_0 = getelementptr inbounds i8, i8* %u368_out, i64 288 | |
%u368_out_14_1 = bitcast i8* %u368_out_14_0 to <8 x float> * | |
%f380 = load <8 x float>, <8 x float>* %u368_out_14_1, align 32 | |
%u368_out_15_0 = getelementptr inbounds i8, i8* %u368_out, i64 320 | |
%u368_out_15_1 = bitcast i8* %u368_out_15_0 to <8 x float> * | |
%f379 = load <8 x float>, <8 x float>* %u368_out_15_1, align 32 | |
%u368_out_16_0 = getelementptr inbounds i8, i8* %u368_out, i64 352 | |
%u368_out_16_1 = bitcast i8* %u368_out_16_0 to <8 x float> * | |
%f378 = load <8 x float>, <8 x float>* %u368_out_16_1, align 32 | |
%u368_out_17_0 = getelementptr inbounds i8, i8* %u368_out, i64 384 | |
%u368_out_17_1 = bitcast i8* %u368_out_17_0 to <8 x float> * | |
%f377 = load <8 x float>, <8 x float>* %u368_out_17_1, align 32 | |
br label %l368_done | |
l368_done: | |
%f369 = select <8 x i1> %p364, <8 x float> %f365, <8 x float> zeroinitializer | |
%p383 = xor <8 x i1> %p359, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%f384 = select <8 x i1> %p383, <8 x float> %f294, <8 x float> %f369 | |
%p385 = fcmp one <8 x float> %f384, %f294 | |
%p386 = and <8 x i1> %p359, %p385 | |
%p387 = xor <8 x i1> %p386, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%r388 = select <8 x i1> %p387, <8 x i32> %r289, <8 x i32> %r376 | |
%f389 = fmul <8 x float> %f377, %f380 | |
%f390 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f381, <8 x float> %f389) | |
%f391 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f382, <8 x float> %f390) | |
%f392 = fneg <8 x float> %f391 | |
%f393 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f392, <8 x float> %f380) | |
%f394 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f392, <8 x float> %f381) | |
%f395 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f392, <8 x float> %f382) | |
%f396 = fmul <8 x float> %f393, %f393 | |
%f397 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f394, <8 x float> %f394, <8 x float> %f396) | |
%f398 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f395, <8 x float> %f395, <8 x float> %f397) | |
%f399 = fdiv <8 x float> %f297, %f398 | |
%f400 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f399) | |
%f401 = fmul <8 x float> %f393, %f400 | |
%f402 = fmul <8 x float> %f394, %f400 | |
%f403 = fmul <8 x float> %f395, %f400 | |
%p404 = fcmp oeq <8 x float> %f380, %f290 | |
%p405 = fcmp oeq <8 x float> %f381, %f290 | |
%p406 = fcmp oeq <8 x float> %f382, %f290 | |
%p407 = and <8 x i1> %p404, %p405 | |
%p408 = and <8 x i1> %p407, %p406 | |
%p409 = fcmp oge <8 x float> %f379, %f290 | |
%f410 = select <8 x i1> %p409, <8 x float> %f297, <8 x float> %f298 | |
%f411 = fadd <8 x float> %f410, %f379 | |
%f412 = fdiv <8 x float> %f297, %f411 | |
%f413 = fneg <8 x float> %f412 | |
%f414 = fmul <8 x float> %f377, %f378 | |
%f415 = fmul <8 x float> %f414, %f413 | |
%f416 = fmul <8 x float> %f377, %f377 | |
%f417 = fmul <8 x float> %f416, %f413 | |
%f418 = fneg <8 x float> %f417 | |
%f419 = select <8 x i1> %p409, <8 x float> %f417, <8 x float> %f418 | |
%f420 = fadd <8 x float> %f419, %f297 | |
%f421 = fneg <8 x float> %f415 | |
%f422 = select <8 x i1> %p409, <8 x float> %f415, <8 x float> %f421 | |
%f423 = fneg <8 x float> %f377 | |
%f424 = select <8 x i1> %p409, <8 x float> %f423, <8 x float> %f377 | |
%f425 = select <8 x i1> %p408, <8 x float> %f420, <8 x float> %f401 | |
%f426 = select <8 x i1> %p408, <8 x float> %f422, <8 x float> %f402 | |
%f427 = select <8 x i1> %p408, <8 x float> %f424, <8 x float> %f403 | |
%f428 = fmul <8 x float> %f379, %f426 | |
%f429 = fmul <8 x float> %f377, %f427 | |
%f430 = fmul <8 x float> %f378, %f425 | |
%f431 = fneg <8 x float> %f428 | |
%f432 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f427, <8 x float> %f431) | |
%f433 = fneg <8 x float> %f429 | |
%f434 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f425, <8 x float> %f433) | |
%f435 = fneg <8 x float> %f430 | |
%f436 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f426, <8 x float> %f435) | |
%f437 = fmul <8 x float> %f299, %f425 | |
%f438 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f426, <8 x float> %f437) | |
%f439 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f427, <8 x float> %f438) | |
%f440 = fmul <8 x float> %f299, %f432 | |
%f441 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f434, <8 x float> %f440) | |
%f442 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f436, <8 x float> %f441) | |
%f443 = fmul <8 x float> %f299, %f377 | |
%f444 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f378, <8 x float> %f443) | |
%f445 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f379, <8 x float> %f444) | |
%f446 = select <8 x i1> %p386, <8 x float> %f439, <8 x float> %f299 | |
%f447 = select <8 x i1> %p386, <8 x float> %f442, <8 x float> %f300 | |
%f448 = select <8 x i1> %p386, <8 x float> %f445, <8 x float> %f301 | |
%p449 = and <8 x i1> %p385, %p282 | |
%p450 = icmp ne <8 x i32> %r388, %r289 | |
%f451 = fsub <8 x float> %f370, %f277 | |
%f452 = fsub <8 x float> %f371, %f278 | |
%f453 = fsub <8 x float> %f372, %f279 | |
%f454 = fmul <8 x float> %f451, %f451 | |
%f455 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f452, <8 x float> %f452, <8 x float> %f454) | |
%f456 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f453, <8 x float> %f453, <8 x float> %f455) | |
%f457 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f456) | |
%f458 = fdiv <8 x float> %f297, %f457 | |
%f459 = fmul <8 x float> %f451, %f458 | |
%f460 = fmul <8 x float> %f452, %f458 | |
%f461 = fmul <8 x float> %f453, %f458 | |
%f462 = fneg <8 x float> %f446 | |
%f463 = fneg <8 x float> %f447 | |
%f464 = fneg <8 x float> %f448 | |
%f465 = select <8 x i1> %p385, <8 x float> %f459, <8 x float> %f462 | |
%f466 = select <8 x i1> %p385, <8 x float> %f460, <8 x float> %f463 | |
%f467 = select <8 x i1> %p385, <8 x float> %f461, <8 x float> %f464 | |
%p468 = and <8 x i1> %p450, %p285 | |
%rd469_p1 = getelementptr inbounds i8*, i8** %params, i32 40 | |
%rd469 = load i8*, i8** %rd469_p1, align 8, !alias.scope !2 | |
%r470_0 = bitcast i8* %rd469 to i32* | |
%r470_1 = getelementptr i32, i32* %r470_0, <8 x i32> %r388 | |
%r470 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r470_1, i32 4, <8 x i1> %p468, <8 x i32> zeroinitializer) | |
%p471 = icmp ne <8 x i32> %r470, %r289 | |
%p472 = and <8 x i1> %p302, %p471 | |
%p473 = and <8 x i1> %p472, %p285 | |
%rd474_p1 = getelementptr inbounds i8*, i8** %params, i32 41 | |
%rd474 = load i8*, i8** %rd474_p1, align 8, !alias.scope !2 | |
%rd475_p1 = getelementptr inbounds i8*, i8** %params, i32 42 | |
%rd475 = load i8*, i8** %rd475_p1, align 8, !alias.scope !2 | |
br label %l476_start | |
l476_start: | |
; VCall: mitsuba::Emitter::pdf_direction() | |
; - target 1 = @func_bf888b7a279d5208759c322da4df8aa9; | |
%u476_self_ptr_0 = bitcast i8* %rd474 to i64* | |
%u476_self_ptr = getelementptr i64, i64* %u476_self_ptr_0, <8 x i32> %r470 | |
%u476_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u476_self_ptr, i32 8, <8 x i1> %p473, <8 x i64> zeroinitializer) | |
%u476_self_initial = trunc <8 x i64> %u476_self_combined to <8 x i32> | |
%u476_offset_1 = lshr <8 x i64> %u476_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u476_offset = trunc <8 x i64> %u476_offset_1 to <8 x i32> | |
%u476_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u476_in_0_1 = bitcast i8* %u476_in_0_0 to <8 x float> * | |
store <8 x float> %f377, <8 x float>* %u476_in_0_1, align 32 | |
%u476_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u476_in_1_1 = bitcast i8* %u476_in_1_0 to <8 x float> * | |
store <8 x float> %f378, <8 x float>* %u476_in_1_1, align 32 | |
%u476_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u476_in_2_1 = bitcast i8* %u476_in_2_0 to <8 x float> * | |
store <8 x float> %f379, <8 x float>* %u476_in_2_1, align 32 | |
%u476_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u476_in_3_1 = bitcast i8* %u476_in_3_0 to <8 x float> * | |
store <8 x float> %f465, <8 x float>* %u476_in_3_1, align 32 | |
%u476_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u476_in_4_1 = bitcast i8* %u476_in_4_0 to <8 x float> * | |
store <8 x float> %f466, <8 x float>* %u476_in_4_1, align 32 | |
%u476_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u476_in_5_1 = bitcast i8* %u476_in_5_0 to <8 x float> * | |
store <8 x float> %f467, <8 x float>* %u476_in_5_1, align 32 | |
%u476_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u476_in_6_1 = bitcast i8* %u476_in_6_0 to <8 x float> * | |
store <8 x float> %f457, <8 x float>* %u476_in_6_1, align 32 | |
%u476_out = getelementptr i8, i8* %buffer, i32 224 | |
%u476_tmp_0_0 = getelementptr inbounds i8, i8* %u476_out, i64 0 | |
%u476_tmp_0_1 = bitcast i8* %u476_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u476_tmp_0_1, align 32 | |
br label %l476_check | |
l476_check: | |
%u476_self = phi <8 x i32> [ %u476_self_initial, %l476_start ], [ %u476_self_next, %l476_call ] | |
%u476_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u476_self) | |
%u476_valid = icmp ne i32 %u476_next, 0 | |
br i1 %u476_valid, label %l476_call, label %l476_end | |
l476_call: | |
%u476_bcast_0 = insertelement <8 x i32> undef, i32 %u476_next, i32 0 | |
%u476_bcast = shufflevector <8 x i32> %u476_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u476_active = icmp eq <8 x i32> %u476_self, %u476_bcast | |
%u476_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u476_next | |
%u476_func_1 = load i8*, i8** %u476_func_0 | |
%u476_func = bitcast i8* %u476_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)* | |
call void %u476_func(<8 x i1> %u476_active, i8* %buffer, i8* %rd475, <8 x i32> %u476_offset) | |
%u476_self_next = select <8 x i1> %u476_active, <8 x i32> zeroinitializer, <8 x i32> %u476_self | |
br label %l476_check | |
l476_end: | |
%u476_out_0_0 = getelementptr inbounds i8, i8* %u476_out, i64 0 | |
%u476_out_0_1 = bitcast i8* %u476_out_0_0 to <8 x float> * | |
%f477 = load <8 x float>, <8 x float>* %u476_out_0_1, align 32 | |
br label %l476_done | |
l476_done: | |
%f478 = fmul <8 x float> %f477, %f477 | |
%f479 = fadd <8 x float> %f303, %f478 | |
%f480 = fdiv <8 x float> %f303, %f479 | |
%f481 = select <8 x i1> %p304, <8 x float> %f480, <8 x float> zeroinitializer | |
%p482 = and <8 x i1> %p304, %p471 | |
%p483 = and <8 x i1> %p482, %p285 | |
%rd484_p1 = getelementptr inbounds i8*, i8** %params, i32 43 | |
%rd484 = load i8*, i8** %rd484_p1, align 8, !alias.scope !2 | |
%rd485_p1 = getelementptr inbounds i8*, i8** %params, i32 44 | |
%rd485 = load i8*, i8** %rd485_p1, align 8, !alias.scope !2 | |
br label %l486_start | |
l486_start: | |
; VCall: mitsuba::Emitter::eval() | |
; - target 1 = @func_972f6c492a7ec6284cf60b520b91aba3; | |
%u486_self_ptr_0 = bitcast i8* %rd484 to i64* | |
%u486_self_ptr = getelementptr i64, i64* %u486_self_ptr_0, <8 x i32> %r470 | |
%u486_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u486_self_ptr, i32 8, <8 x i1> %p483, <8 x i64> zeroinitializer) | |
%u486_self_initial = trunc <8 x i64> %u486_self_combined to <8 x i32> | |
%u486_offset_1 = lshr <8 x i64> %u486_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u486_offset = trunc <8 x i64> %u486_offset_1 to <8 x i32> | |
%u486_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u486_in_0_1 = bitcast i8* %u486_in_0_0 to <8 x float> * | |
store <8 x float> %f377, <8 x float>* %u486_in_0_1, align 32 | |
%u486_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u486_in_1_1 = bitcast i8* %u486_in_1_0 to <8 x float> * | |
store <8 x float> %f378, <8 x float>* %u486_in_1_1, align 32 | |
%u486_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u486_in_2_1 = bitcast i8* %u486_in_2_0 to <8 x float> * | |
store <8 x float> %f379, <8 x float>* %u486_in_2_1, align 32 | |
%u486_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u486_in_3_1 = bitcast i8* %u486_in_3_0 to <8 x float> * | |
store <8 x float> %f448, <8 x float>* %u486_in_3_1, align 32 | |
%u486_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u486_in_4_1 = bitcast i8* %u486_in_4_0 to <8 x i32> * | |
store <8 x i32> %r346, <8 x i32>* %u486_in_4_1, align 32 | |
%u486_out = getelementptr i8, i8* %buffer, i32 160 | |
%u486_tmp_0_0 = getelementptr inbounds i8, i8* %u486_out, i64 0 | |
%u486_tmp_0_1 = bitcast i8* %u486_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_0_1, align 32 | |
%u486_tmp_1_0 = getelementptr inbounds i8, i8* %u486_out, i64 32 | |
%u486_tmp_1_1 = bitcast i8* %u486_tmp_1_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_1_1, align 32 | |
%u486_tmp_2_0 = getelementptr inbounds i8, i8* %u486_out, i64 64 | |
%u486_tmp_2_1 = bitcast i8* %u486_tmp_2_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_2_1, align 32 | |
br label %l486_check | |
l486_check: | |
%u486_self = phi <8 x i32> [ %u486_self_initial, %l486_start ], [ %u486_self_next, %l486_call ] | |
%u486_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u486_self) | |
%u486_valid = icmp ne i32 %u486_next, 0 | |
br i1 %u486_valid, label %l486_call, label %l486_end | |
l486_call: | |
%u486_bcast_0 = insertelement <8 x i32> undef, i32 %u486_next, i32 0 | |
%u486_bcast = shufflevector <8 x i32> %u486_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u486_active = icmp eq <8 x i32> %u486_self, %u486_bcast | |
%u486_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u486_next | |
%u486_func_1 = load i8*, i8** %u486_func_0 | |
%u486_func = bitcast i8* %u486_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)* | |
call void %u486_func(<8 x i1> %u486_active, i8* %buffer, i8* %rd485, <8 x i32> %u486_offset) | |
%u486_self_next = select <8 x i1> %u486_active, <8 x i32> zeroinitializer, <8 x i32> %u486_self | |
br label %l486_check | |
l486_end: | |
%u486_out_0_0 = getelementptr inbounds i8, i8* %u486_out, i64 0 | |
%u486_out_0_1 = bitcast i8* %u486_out_0_0 to <8 x float> * | |
%f487 = load <8 x float>, <8 x float>* %u486_out_0_1, align 32 | |
%u486_out_1_0 = getelementptr inbounds i8, i8* %u486_out, i64 32 | |
%u486_out_1_1 = bitcast i8* %u486_out_1_0 to <8 x float> * | |
%f488 = load <8 x float>, <8 x float>* %u486_out_1_1, align 32 | |
%u486_out_2_0 = getelementptr inbounds i8, i8* %u486_out, i64 64 | |
%u486_out_2_1 = bitcast i8* %u486_out_2_0 to <8 x float> * | |
%f489 = load <8 x float>, <8 x float>* %u486_out_2_1, align 32 | |
br label %l486_done | |
l486_done: | |
%f490 = fmul <8 x float> %f487, %f481 | |
%f491 = fmul <8 x float> %f488, %f481 | |
%f492 = fmul <8 x float> %f489, %f481 | |
%f493 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f269, <8 x float> %f490, <8 x float> %f272) | |
%f494 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f270, <8 x float> %f491, <8 x float> %f273) | |
%f495 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f271, <8 x float> %f492, <8 x float> %f274) | |
%p496 = and <8 x i1> %p308, %p385 | |
%rd497_p1 = getelementptr inbounds i8*, i8** %params, i32 45 | |
%rd497 = load i8*, i8** %rd497_p1, align 8, !alias.scope !2 | |
%r498_0 = bitcast i8* %rd497 to i32* | |
%r498_1 = getelementptr i32, i32* %r498_0, <8 x i32> %r388 | |
%r498 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r498_1, i32 4, <8 x i1> %p468, <8 x i32> zeroinitializer) | |
%p499 = icmp ne <8 x i32> %r498, %r289 | |
%p500 = and <8 x i1> %p499, %p285 | |
%rd501_p1 = getelementptr inbounds i8*, i8** %params, i32 46 | |
%rd501 = load i8*, i8** %rd501_p1, align 8, !alias.scope !2 | |
%r502_0 = bitcast i8* %rd501 to i32* | |
%r502_1 = getelementptr i32, i32* %r502_0, <8 x i32> %r498 | |
%r502 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r502_1, i32 4, <8 x i1> %p500, <8 x i32> zeroinitializer) | |
%r503 = and <8 x i32> %r502, %r309 | |
%p504 = icmp ne <8 x i32> %r503, %r289 | |
%p505 = and <8 x i1> %p496, %p504 | |
%rd506 = select <8 x i1> %p505, <8 x i64> %rd311, <8 x i64> %rd261 | |
%rd507_0 = mul <8 x i64> %rd506, %rd310 | |
%rd507 = add <8 x i64> %rd507_0, %rd112 | |
%rd508 = select <8 x i1> %p505, <8 x i64> %rd507, <8 x i64> %rd506 | |
%rd509 = lshr <8 x i64> %rd506, %rd312 | |
%rd510 = xor <8 x i64> %rd509, %rd506 | |
%rd511 = lshr <8 x i64> %rd510, %rd315 | |
%r512 = trunc <8 x i64> %rd511 to <8 x i32> | |
%rd513 = lshr <8 x i64> %rd506, %rd318 | |
%r514 = trunc <8 x i64> %rd513 to <8 x i32> | |
%r515 = lshr <8 x i32> %r512, %r514 | |
%r516 = bitcast <8 x i32> %r514 to <8 x i32> | |
%r517 = sub <8 x i32> zeroinitializer, %r516 | |
%r518 = and <8 x i32> %r517, %r324 | |
%r519 = bitcast <8 x i32> %r518 to <8 x i32> | |
%r520 = shl <8 x i32> %r512, %r519 | |
%r521 = or <8 x i32> %r515, %r520 | |
%r522 = lshr <8 x i32> %r521, %r329 | |
%r523 = or <8 x i32> %r522, %r331 | |
%f524 = bitcast <8 x i32> %r523 to <8 x float> | |
%f525 = fsub <8 x float> %f524, %f297 | |
%p526 = and <8 x i1> %p505, %p285 | |
%r527_0 = bitcast i8* %rd335 to i32* | |
%r527_1 = getelementptr i32, i32* %r527_0, <8 x i32> %r289 | |
%r527 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r527_1, i32 4, <8 x i1> %p526, <8 x i32> zeroinitializer) | |
%p528 = icmp ne <8 x i32> %r527, %r289 | |
%p529 = and <8 x i1> %p505, %p528 | |
%p530 = and <8 x i1> %p529, %p285 | |
%rd531_p1 = getelementptr inbounds i8*, i8** %params, i32 47 | |
%rd531 = load i8*, i8** %rd531_p1, align 8, !alias.scope !2 | |
%rd532_p1 = getelementptr inbounds i8*, i8** %params, i32 48 | |
%rd532 = load i8*, i8** %rd532_p1, align 8, !alias.scope !2 | |
br label %l533_start | |
l533_start: | |
; VCall: mitsuba::Emitter::sample_direction() | |
; - target 1 = @func_19ee132d0bd6c9e577b7250c22c38555; | |
%u533_self_ptr_0 = bitcast i8* %rd531 to i64* | |
%u533_self_ptr = getelementptr i64, i64* %u533_self_ptr_0, <8 x i32> %r527 | |
%u533_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u533_self_ptr, i32 8, <8 x i1> %p530, <8 x i64> zeroinitializer) | |
%u533_self_initial = trunc <8 x i64> %u533_self_combined to <8 x i32> | |
%u533_offset_1 = lshr <8 x i64> %u533_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u533_offset = trunc <8 x i64> %u533_offset_1 to <8 x i32> | |
%u533_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u533_in_0_1 = bitcast i8* %u533_in_0_0 to <8 x float> * | |
store <8 x float> %f370, <8 x float>* %u533_in_0_1, align 32 | |
%u533_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u533_in_1_1 = bitcast i8* %u533_in_1_0 to <8 x float> * | |
store <8 x float> %f371, <8 x float>* %u533_in_1_1, align 32 | |
%u533_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u533_in_2_1 = bitcast i8* %u533_in_2_0 to <8 x float> * | |
store <8 x float> %f372, <8 x float>* %u533_in_2_1, align 32 | |
%u533_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u533_in_3_1 = bitcast i8* %u533_in_3_0 to <8 x float> * | |
store <8 x float> %f334, <8 x float>* %u533_in_3_1, align 32 | |
%u533_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u533_in_4_1 = bitcast i8* %u533_in_4_0 to <8 x float> * | |
store <8 x float> %f525, <8 x float>* %u533_in_4_1, align 32 | |
%u533_out = getelementptr i8, i8* %buffer, i32 160 | |
%u533_tmp_0_0 = getelementptr inbounds i8, i8* %u533_out, i64 0 | |
%u533_tmp_0_1 = bitcast i8* %u533_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_0_1, align 32 | |
%u533_tmp_1_0 = getelementptr inbounds i8, i8* %u533_out, i64 32 | |
%u533_tmp_1_1 = bitcast i8* %u533_tmp_1_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_1_1, align 32 | |
%u533_tmp_2_0 = getelementptr inbounds i8, i8* %u533_out, i64 64 | |
%u533_tmp_2_1 = bitcast i8* %u533_tmp_2_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_2_1, align 32 | |
%u533_tmp_8_0 = getelementptr inbounds i8, i8* %u533_out, i64 96 | |
%u533_tmp_8_1 = bitcast i8* %u533_tmp_8_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_8_1, align 32 | |
%u533_tmp_9_0 = getelementptr inbounds i8, i8* %u533_out, i64 128 | |
%u533_tmp_9_1 = bitcast i8* %u533_tmp_9_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_9_1, align 32 | |
%u533_tmp_10_0 = getelementptr inbounds i8, i8* %u533_out, i64 160 | |
%u533_tmp_10_1 = bitcast i8* %u533_tmp_10_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_10_1, align 32 | |
%u533_tmp_11_0 = getelementptr inbounds i8, i8* %u533_out, i64 192 | |
%u533_tmp_11_1 = bitcast i8* %u533_tmp_11_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_11_1, align 32 | |
%u533_tmp_14_0 = getelementptr inbounds i8, i8* %u533_out, i64 224 | |
%u533_tmp_14_1 = bitcast i8* %u533_tmp_14_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_14_1, align 32 | |
%u533_tmp_15_0 = getelementptr inbounds i8, i8* %u533_out, i64 256 | |
%u533_tmp_15_1 = bitcast i8* %u533_tmp_15_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_15_1, align 32 | |
%u533_tmp_16_0 = getelementptr inbounds i8, i8* %u533_out, i64 288 | |
%u533_tmp_16_1 = bitcast i8* %u533_tmp_16_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_16_1, align 32 | |
br label %l533_check | |
l533_check: | |
%u533_self = phi <8 x i32> [ %u533_self_initial, %l533_start ], [ %u533_self_next, %l533_call ] | |
%u533_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u533_self) | |
%u533_valid = icmp ne i32 %u533_next, 0 | |
br i1 %u533_valid, label %l533_call, label %l533_end | |
l533_call: | |
%u533_bcast_0 = insertelement <8 x i32> undef, i32 %u533_next, i32 0 | |
%u533_bcast = shufflevector <8 x i32> %u533_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u533_active = icmp eq <8 x i32> %u533_self, %u533_bcast | |
%u533_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u533_next | |
%u533_func_1 = load i8*, i8** %u533_func_0 | |
%u533_func = bitcast i8* %u533_func_1 to void (<8 x i1>, <8 x i32>, i8*, i8*, <8 x i32>)* | |
call void %u533_func(<8 x i1> %u533_active, <8 x i32> %r527, i8* %buffer, i8* %rd532, <8 x i32> %u533_offset) | |
%u533_self_next = select <8 x i1> %u533_active, <8 x i32> zeroinitializer, <8 x i32> %u533_self | |
br label %l533_check | |
l533_end: | |
%u533_out_0_0 = getelementptr inbounds i8, i8* %u533_out, i64 0 | |
%u533_out_0_1 = bitcast i8* %u533_out_0_0 to <8 x float> * | |
%f534 = load <8 x float>, <8 x float>* %u533_out_0_1, align 32 | |
%u533_out_1_0 = getelementptr inbounds i8, i8* %u533_out, i64 32 | |
%u533_out_1_1 = bitcast i8* %u533_out_1_0 to <8 x float> * | |
%f535 = load <8 x float>, <8 x float>* %u533_out_1_1, align 32 | |
%u533_out_2_0 = getelementptr inbounds i8, i8* %u533_out, i64 64 | |
%u533_out_2_1 = bitcast i8* %u533_out_2_0 to <8 x float> * | |
%f536 = load <8 x float>, <8 x float>* %u533_out_2_1, align 32 | |
%u533_out_8_0 = getelementptr inbounds i8, i8* %u533_out, i64 96 | |
%u533_out_8_1 = bitcast i8* %u533_out_8_0 to <8 x float> * | |
%f537 = load <8 x float>, <8 x float>* %u533_out_8_1, align 32 | |
%u533_out_9_0 = getelementptr inbounds i8, i8* %u533_out, i64 128 | |
%u533_out_9_1 = bitcast i8* %u533_out_9_0 to <8 x float> * | |
%f538 = load <8 x float>, <8 x float>* %u533_out_9_1, align 32 | |
%u533_out_10_0 = getelementptr inbounds i8, i8* %u533_out, i64 160 | |
%u533_out_10_1 = bitcast i8* %u533_out_10_0 to <8 x float> * | |
%f539 = load <8 x float>, <8 x float>* %u533_out_10_1, align 32 | |
%u533_out_11_0 = getelementptr inbounds i8, i8* %u533_out, i64 192 | |
%u533_out_11_1 = bitcast i8* %u533_out_11_0 to <8 x float> * | |
%f540 = load <8 x float>, <8 x float>* %u533_out_11_1, align 32 | |
%u533_out_14_0 = getelementptr inbounds i8, i8* %u533_out, i64 224 | |
%u533_out_14_1 = bitcast i8* %u533_out_14_0 to <8 x float> * | |
%f541 = load <8 x float>, <8 x float>* %u533_out_14_1, align 32 | |
%u533_out_15_0 = getelementptr inbounds i8, i8* %u533_out, i64 256 | |
%u533_out_15_1 = bitcast i8* %u533_out_15_0 to <8 x float> * | |
%f542 = load <8 x float>, <8 x float>* %u533_out_15_1, align 32 | |
%u533_out_16_0 = getelementptr inbounds i8, i8* %u533_out, i64 288 | |
%u533_out_16_1 = bitcast i8* %u533_out_16_0 to <8 x float> * | |
%f543 = load <8 x float>, <8 x float>* %u533_out_16_1, align 32 | |
br label %l533_done | |
l533_done: | |
%p544 = fcmp one <8 x float> %f537, %f290 | |
%p545 = and <8 x i1> %p505, %p544 | |
%f546 = fsub <8 x float> %f534, %f370 | |
%f547 = fsub <8 x float> %f535, %f371 | |
%f548 = fsub <8 x float> %f536, %f372 | |
%f549_0 = bitcast <8 x float> %f370 to <8 x i32> | |
%f549_1 = bitcast <8 x float> %f336 to <8 x i32> | |
%f549_2 = and <8 x i32> %f549_0, %f549_1 | |
%f549 = bitcast <8 x i32> %f549_2 to <8 x float> | |
%f550_0 = bitcast <8 x float> %f371 to <8 x i32> | |
%f550_1 = bitcast <8 x float> %f336 to <8 x i32> | |
%f550_2 = and <8 x i32> %f550_0, %f550_1 | |
%f550 = bitcast <8 x i32> %f550_2 to <8 x float> | |
%f551_0 = bitcast <8 x float> %f372 to <8 x i32> | |
%f551_1 = bitcast <8 x float> %f336 to <8 x i32> | |
%f551_2 = and <8 x i32> %f551_0, %f551_1 | |
%f551 = bitcast <8 x i32> %f551_2 to <8 x float> | |
%f552 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f549, <8 x float> %f550) | |
%f553 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f552, <8 x float> %f551) | |
%f554 = fadd <8 x float> %f297, %f553 | |
%f555 = fmul <8 x float> %f554, %f337 | |
%f556 = fmul <8 x float> %f373, %f546 | |
%f557 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f374, <8 x float> %f547, <8 x float> %f556) | |
%f558 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f375, <8 x float> %f548, <8 x float> %f557) | |
%p559 = fcmp oge <8 x float> %f558, %f290 | |
%f560 = fneg <8 x float> %f555 | |
%f561 = select <8 x i1> %p559, <8 x float> %f555, <8 x float> %f560 | |
%f562 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f373, <8 x float> %f370) | |
%f563 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f374, <8 x float> %f371) | |
%f564 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f375, <8 x float> %f372) | |
%f565 = fsub <8 x float> %f534, %f562 | |
%f566 = fsub <8 x float> %f535, %f563 | |
%f567 = fsub <8 x float> %f536, %f564 | |
%f568 = fmul <8 x float> %f565, %f565 | |
%f569 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f566, <8 x float> %f566, <8 x float> %f568) | |
%f570 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f567, <8 x float> %f567, <8 x float> %f569) | |
%f571 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f570) | |
%f572 = fdiv <8 x float> %f297, %f571 | |
%f573 = fmul <8 x float> %f565, %f572 | |
%f574 = fmul <8 x float> %f566, %f572 | |
%f575 = fmul <8 x float> %f567, %f572 | |
%f576 = fmul <8 x float> %f571, %f338 | |
%p577 = and <8 x i1> %p545, %p285 | |
%r578 = select <8 x i1> %p577, <8 x i32> %r291, <8 x i32> zeroinitializer | |
; -------- Ray test ------- | |
%u579_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u579_in_0_1 = bitcast i8* %u579_in_0_0 to <8 x i32> * | |
store <8 x i32> %r578, <8 x i32>* %u579_in_0_1, align 32 | |
%u579_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u579_in_1_1 = bitcast i8* %u579_in_1_0 to <8 x float> * | |
store <8 x float> %f562, <8 x float>* %u579_in_1_1, align 32 | |
%u579_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u579_in_2_1 = bitcast i8* %u579_in_2_0 to <8 x float> * | |
store <8 x float> %f563, <8 x float>* %u579_in_2_1, align 32 | |
%u579_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u579_in_3_1 = bitcast i8* %u579_in_3_0 to <8 x float> * | |
store <8 x float> %f564, <8 x float>* %u579_in_3_1, align 32 | |
%u579_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u579_in_4_1 = bitcast i8* %u579_in_4_0 to <8 x float> * | |
store <8 x float> %f290, <8 x float>* %u579_in_4_1, align 32 | |
%u579_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u579_in_5_1 = bitcast i8* %u579_in_5_0 to <8 x float> * | |
store <8 x float> %f573, <8 x float>* %u579_in_5_1, align 32 | |
%u579_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u579_in_6_1 = bitcast i8* %u579_in_6_0 to <8 x float> * | |
store <8 x float> %f574, <8 x float>* %u579_in_6_1, align 32 | |
%u579_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224 | |
%u579_in_7_1 = bitcast i8* %u579_in_7_0 to <8 x float> * | |
store <8 x float> %f575, <8 x float>* %u579_in_7_1, align 32 | |
%u579_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256 | |
%u579_in_8_1 = bitcast i8* %u579_in_8_0 to <8 x float> * | |
store <8 x float> %f229, <8 x float>* %u579_in_8_1, align 32 | |
%u579_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u579_in_9_1 = bitcast i8* %u579_in_9_0 to <8 x float> * | |
store <8 x float> %f576, <8 x float>* %u579_in_9_1, align 32 | |
%u579_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320 | |
%u579_in_10_1 = bitcast i8* %u579_in_10_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u579_in_10_1, align 32 | |
%u579_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352 | |
%u579_in_11_1 = bitcast i8* %u579_in_11_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u579_in_11_1, align 32 | |
%u579_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384 | |
%u579_in_12_1 = bitcast i8* %u579_in_12_0 to <8 x i32> * | |
store <8 x i32> %r289, <8 x i32>* %u579_in_12_1, align 32 | |
%u579_in_ctx_0 = getelementptr inbounds i8, i8* %buffer, i32 416 | |
%u579_in_ctx_1 = bitcast i8* %u579_in_ctx_0 to <6 x i32> * | |
store <6 x i32> <i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0>, <6 x i32>* %u579_in_ctx_1, align 4 | |
%u579_func = bitcast i8* %rd339 to void (i8*, i8*, i8*, i8*)* | |
call void %u579_func(i8* %u579_in_0_0, i8* %rd288, i8* %u579_in_ctx_0, i8* %u579_in_1_0) | |
%u579_out_0_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u579_out_0_1 = bitcast i8* %u579_out_0_0 to <8 x float> * | |
%u579_out_0 = load <8 x float>, <8 x float>* %u579_out_0_1, align 32 | |
; ------------------- | |
%f580 = bitcast <8 x float> %u579_out_0 to <8 x float> | |
%p581 = fcmp one <8 x float> %f580, %f576 | |
%p582 = and <8 x i1> %p545, %p581 | |
%f583 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f541 | |
%f584 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f542 | |
%f585 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f543 | |
%f586 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f537 | |
%p587 = fcmp one <8 x float> %f586, %f290 | |
%p588 = and <8 x i1> %p505, %p587 | |
%f589 = fmul <8 x float> %f538, %f377 | |
%f590 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f539, <8 x float> %f378, <8 x float> %f589) | |
%f591 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f540, <8 x float> %f379, <8 x float> %f590) | |
%p592 = and <8 x i1> %p588, %p499 | |
%p593 = and <8 x i1> %p592, %p285 | |
%rd594_p1 = getelementptr inbounds i8*, i8** %params, i32 49 | |
%rd594 = load i8*, i8** %rd594_p1, align 8, !alias.scope !2 | |
%rd595_p1 = getelementptr inbounds i8*, i8** %params, i32 50 | |
%rd595 = load i8*, i8** %rd595_p1, align 8, !alias.scope !2 | |
br label %l596_start | |
l596_start: | |
; VCall: mitsuba::BSDF::eval_pdf() | |
; - target 1 = @func_e9d5179176fa4aaf3f8d2cb1ff4d2c84; | |
; - target 2 = @func_9d53124ccfd48ba20820a9ee3e480cd9; | |
; - target 3 = @func_4448b4670e928f754bf33368168c3f24; | |
%u596_self_ptr_0 = bitcast i8* %rd594 to i64* | |
%u596_self_ptr = getelementptr i64, i64* %u596_self_ptr_0, <8 x i32> %r498 | |
%u596_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u596_self_ptr, i32 8, <8 x i1> %p593, <8 x i64> zeroinitializer) | |
%u596_self_initial = trunc <8 x i64> %u596_self_combined to <8 x i32> | |
%u596_offset_1 = lshr <8 x i64> %u596_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u596_offset = trunc <8 x i64> %u596_offset_1 to <8 x i32> | |
%u596_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u596_in_0_1 = bitcast i8* %u596_in_0_0 to <8 x float> * | |
store <8 x float> %f370, <8 x float>* %u596_in_0_1, align 32 | |
%u596_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u596_in_1_1 = bitcast i8* %u596_in_1_0 to <8 x float> * | |
store <8 x float> %f371, <8 x float>* %u596_in_1_1, align 32 | |
%u596_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u596_in_2_1 = bitcast i8* %u596_in_2_0 to <8 x float> * | |
store <8 x float> %f372, <8 x float>* %u596_in_2_1, align 32 | |
%u596_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u596_in_3_1 = bitcast i8* %u596_in_3_0 to <8 x float> * | |
store <8 x float> %f377, <8 x float>* %u596_in_3_1, align 32 | |
%u596_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u596_in_4_1 = bitcast i8* %u596_in_4_0 to <8 x float> * | |
store <8 x float> %f378, <8 x float>* %u596_in_4_1, align 32 | |
%u596_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u596_in_5_1 = bitcast i8* %u596_in_5_0 to <8 x float> * | |
store <8 x float> %f379, <8 x float>* %u596_in_5_1, align 32 | |
%u596_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u596_in_6_1 = bitcast i8* %u596_in_6_0 to <8 x float> * | |
store <8 x float> %f448, <8 x float>* %u596_in_6_1, align 32 | |
%u596_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224 | |
%u596_in_7_1 = bitcast i8* %u596_in_7_0 to <8 x i32> * | |
store <8 x i32> %r346, <8 x i32>* %u596_in_7_1, align 32 | |
%u596_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256 | |
%u596_in_8_1 = bitcast i8* %u596_in_8_0 to <8 x float> * | |
store <8 x float> %f591, <8 x float>* %u596_in_8_1, align 32 | |
%u596_out = getelementptr i8, i8* %buffer, i32 288 | |
%u596_tmp_0_0 = getelementptr inbounds i8, i8* %u596_out, i64 0 | |
%u596_tmp_0_1 = bitcast i8* %u596_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_0_1, align 32 | |
%u596_tmp_1_0 = getelementptr inbounds i8, i8* %u596_out, i64 32 | |
%u596_tmp_1_1 = bitcast i8* %u596_tmp_1_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_1_1, align 32 | |
%u596_tmp_2_0 = getelementptr inbounds i8, i8* %u596_out, i64 64 | |
%u596_tmp_2_1 = bitcast i8* %u596_tmp_2_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_2_1, align 32 | |
%u596_tmp_3_0 = getelementptr inbounds i8, i8* %u596_out, i64 96 | |
%u596_tmp_3_1 = bitcast i8* %u596_tmp_3_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_3_1, align 32 | |
br label %l596_check | |
l596_check: | |
%u596_self = phi <8 x i32> [ %u596_self_initial, %l596_start ], [ %u596_self_next, %l596_call ] | |
%u596_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u596_self) | |
%u596_valid = icmp ne i32 %u596_next, 0 | |
br i1 %u596_valid, label %l596_call, label %l596_end | |
l596_call: | |
%u596_bcast_0 = insertelement <8 x i32> undef, i32 %u596_next, i32 0 | |
%u596_bcast = shufflevector <8 x i32> %u596_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u596_active = icmp eq <8 x i32> %u596_self, %u596_bcast | |
%u596_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u596_next | |
%u596_func_1 = load i8*, i8** %u596_func_0 | |
%u596_func = bitcast i8* %u596_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)* | |
call void %u596_func(<8 x i1> %u596_active, i8* %buffer, i8* %rd595, <8 x i32> %u596_offset) | |
%u596_self_next = select <8 x i1> %u596_active, <8 x i32> zeroinitializer, <8 x i32> %u596_self | |
br label %l596_check | |
l596_end: | |
%u596_out_0_0 = getelementptr inbounds i8, i8* %u596_out, i64 0 | |
%u596_out_0_1 = bitcast i8* %u596_out_0_0 to <8 x float> * | |
%f597 = load <8 x float>, <8 x float>* %u596_out_0_1, align 32 | |
%u596_out_1_0 = getelementptr inbounds i8, i8* %u596_out, i64 32 | |
%u596_out_1_1 = bitcast i8* %u596_out_1_0 to <8 x float> * | |
%f598 = load <8 x float>, <8 x float>* %u596_out_1_1, align 32 | |
%u596_out_2_0 = getelementptr inbounds i8, i8* %u596_out, i64 64 | |
%u596_out_2_1 = bitcast i8* %u596_out_2_0 to <8 x float> * | |
%f599 = load <8 x float>, <8 x float>* %u596_out_2_1, align 32 | |
%u596_out_3_0 = getelementptr inbounds i8, i8* %u596_out, i64 96 | |
%u596_out_3_1 = bitcast i8* %u596_out_3_0 to <8 x float> * | |
%f600 = load <8 x float>, <8 x float>* %u596_out_3_1, align 32 | |
br label %l596_done | |
l596_done: | |
%f601 = fmul <8 x float> %f586, %f586 | |
%f602 = fmul <8 x float> %f600, %f600 | |
%p603 = fcmp ogt <8 x float> %f601, %f290 | |
%f604 = fadd <8 x float> %f601, %f602 | |
%f605 = fdiv <8 x float> %f601, %f604 | |
%f606 = select <8 x i1> %p603, <8 x float> %f605, <8 x float> zeroinitializer | |
%f607 = fmul <8 x float> %f583, %f597 | |
%f608 = fmul <8 x float> %f584, %f598 | |
%f609 = fmul <8 x float> %f585, %f599 | |
%f610 = fmul <8 x float> %f607, %f606 | |
%f611 = fmul <8 x float> %f608, %f606 | |
%f612 = fmul <8 x float> %f609, %f606 | |
%f613 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f269, <8 x float> %f610, <8 x float> %f493) | |
%f614 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f270, <8 x float> %f611, <8 x float> %f494) | |
%f615 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f271, <8 x float> %f612, <8 x float> %f495) | |
%f616 = select <8 x i1> %p588, <8 x float> %f613, <8 x float> %f493 | |
%f617 = select <8 x i1> %p588, <8 x float> %f614, <8 x float> %f494 | |
%f618 = select <8 x i1> %p588, <8 x float> %f615, <8 x float> %f495 | |
%rd619_0 = mul <8 x i64> %rd508, %rd310 | |
%rd619 = add <8 x i64> %rd619_0, %rd112 | |
%rd620 = select <8 x i1> %p449, <8 x i64> %rd619, <8 x i64> %rd508 | |
%rd621 = lshr <8 x i64> %rd508, %rd312 | |
%rd622 = xor <8 x i64> %rd621, %rd508 | |
%rd623 = lshr <8 x i64> %rd622, %rd315 | |
%r624 = trunc <8 x i64> %rd623 to <8 x i32> | |
%rd625 = lshr <8 x i64> %rd508, %rd318 | |
%r626 = trunc <8 x i64> %rd625 to <8 x i32> | |
%r627 = lshr <8 x i32> %r624, %r626 | |
%r628 = bitcast <8 x i32> %r626 to <8 x i32> | |
%r629 = sub <8 x i32> zeroinitializer, %r628 | |
%r630 = and <8 x i32> %r629, %r324 | |
%r631 = bitcast <8 x i32> %r630 to <8 x i32> | |
%r632 = shl <8 x i32> %r624, %r631 | |
%r633 = or <8 x i32> %r627, %r632 | |
%r634 = lshr <8 x i32> %r633, %r329 | |
%r635 = or <8 x i32> %r634, %r331 | |
%f636 = bitcast <8 x i32> %r635 to <8 x float> | |
%f637 = fsub <8 x float> %f636, %f297 | |
%rd638_0 = mul <8 x i64> %rd620, %rd310 | |
%rd638 = add <8 x i64> %rd638_0, %rd112 | |
%rd639 = select <8 x i1> %p449, <8 x i64> %rd638, <8 x i64> %rd620 | |
%rd640 = lshr <8 x i64> %rd620, %rd312 | |
%rd641 = xor <8 x i64> %rd640, %rd620 | |
%rd642 = lshr <8 x i64> %rd641, %rd315 | |
%r643 = trunc <8 x i64> %rd642 to <8 x i32> | |
%rd644 = lshr <8 x i64> %rd620, %rd318 | |
%r645 = trunc <8 x i64> %rd644 to <8 x i32> | |
%r646 = lshr <8 x i32> %r643, %r645 | |
%r647 = bitcast <8 x i32> %r645 to <8 x i32> | |
%r648 = sub <8 x i32> zeroinitializer, %r647 | |
%r649 = and <8 x i32> %r648, %r324 | |
%r650 = bitcast <8 x i32> %r649 to <8 x i32> | |
%r651 = shl <8 x i32> %r643, %r650 | |
%r652 = or <8 x i32> %r646, %r651 | |
%r653 = lshr <8 x i32> %r652, %r329 | |
%r654 = or <8 x i32> %r653, %r331 | |
%f655 = bitcast <8 x i32> %r654 to <8 x float> | |
%f656 = fsub <8 x float> %f655, %f297 | |
%rd657_0 = mul <8 x i64> %rd639, %rd310 | |
%rd657 = add <8 x i64> %rd657_0, %rd112 | |
%rd658 = select <8 x i1> %p449, <8 x i64> %rd657, <8 x i64> %rd639 | |
%rd659 = lshr <8 x i64> %rd639, %rd312 | |
%rd660 = xor <8 x i64> %rd659, %rd639 | |
%rd661 = lshr <8 x i64> %rd660, %rd315 | |
%r662 = trunc <8 x i64> %rd661 to <8 x i32> | |
%rd663 = lshr <8 x i64> %rd639, %rd318 | |
%r664 = trunc <8 x i64> %rd663 to <8 x i32> | |
%r665 = lshr <8 x i32> %r662, %r664 | |
%r666 = bitcast <8 x i32> %r664 to <8 x i32> | |
%r667 = sub <8 x i32> zeroinitializer, %r666 | |
%r668 = and <8 x i32> %r667, %r324 | |
%r669 = bitcast <8 x i32> %r668 to <8 x i32> | |
%r670 = shl <8 x i32> %r662, %r669 | |
%r671 = or <8 x i32> %r665, %r670 | |
%r672 = lshr <8 x i32> %r671, %r329 | |
%r673 = or <8 x i32> %r672, %r331 | |
%f674 = bitcast <8 x i32> %r673 to <8 x float> | |
%f675 = fsub <8 x float> %f674, %f297 | |
%p676 = and <8 x i1> %p449, %p499 | |
%p677 = and <8 x i1> %p676, %p285 | |
%rd678_p1 = getelementptr inbounds i8*, i8** %params, i32 51 | |
%rd678 = load i8*, i8** %rd678_p1, align 8, !alias.scope !2 | |
%rd679_p1 = getelementptr inbounds i8*, i8** %params, i32 52 | |
%rd679 = load i8*, i8** %rd679_p1, align 8, !alias.scope !2 | |
br label %l680_start | |
l680_start: | |
; VCall: mitsuba::BSDF::sample() | |
; - target 1 = @func_60a1a348aca7bdb47e851f6cedce38a2; | |
; - target 2 = @func_e6fff8d6b222436f71518443ce068358; | |
; - target 3 = @func_48d4a1fb2376a0011da8c81578897b84; | |
%u680_self_ptr_0 = bitcast i8* %rd678 to i64* | |
%u680_self_ptr = getelementptr i64, i64* %u680_self_ptr_0, <8 x i32> %r498 | |
%u680_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u680_self_ptr, i32 8, <8 x i1> %p677, <8 x i64> zeroinitializer) | |
%u680_self_initial = trunc <8 x i64> %u680_self_combined to <8 x i32> | |
%u680_offset_1 = lshr <8 x i64> %u680_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> | |
%u680_offset = trunc <8 x i64> %u680_offset_1 to <8 x i32> | |
%u680_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0 | |
%u680_in_0_1 = bitcast i8* %u680_in_0_0 to <8 x float> * | |
store <8 x float> %f370, <8 x float>* %u680_in_0_1, align 32 | |
%u680_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32 | |
%u680_in_1_1 = bitcast i8* %u680_in_1_0 to <8 x float> * | |
store <8 x float> %f371, <8 x float>* %u680_in_1_1, align 32 | |
%u680_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64 | |
%u680_in_2_1 = bitcast i8* %u680_in_2_0 to <8 x float> * | |
store <8 x float> %f372, <8 x float>* %u680_in_2_1, align 32 | |
%u680_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96 | |
%u680_in_3_1 = bitcast i8* %u680_in_3_0 to <8 x float> * | |
store <8 x float> %f377, <8 x float>* %u680_in_3_1, align 32 | |
%u680_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128 | |
%u680_in_4_1 = bitcast i8* %u680_in_4_0 to <8 x float> * | |
store <8 x float> %f378, <8 x float>* %u680_in_4_1, align 32 | |
%u680_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160 | |
%u680_in_5_1 = bitcast i8* %u680_in_5_0 to <8 x float> * | |
store <8 x float> %f379, <8 x float>* %u680_in_5_1, align 32 | |
%u680_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192 | |
%u680_in_6_1 = bitcast i8* %u680_in_6_0 to <8 x float> * | |
store <8 x float> %f446, <8 x float>* %u680_in_6_1, align 32 | |
%u680_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224 | |
%u680_in_7_1 = bitcast i8* %u680_in_7_0 to <8 x float> * | |
store <8 x float> %f447, <8 x float>* %u680_in_7_1, align 32 | |
%u680_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256 | |
%u680_in_8_1 = bitcast i8* %u680_in_8_0 to <8 x float> * | |
store <8 x float> %f448, <8 x float>* %u680_in_8_1, align 32 | |
%u680_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288 | |
%u680_in_9_1 = bitcast i8* %u680_in_9_0 to <8 x i32> * | |
store <8 x i32> %r346, <8 x i32>* %u680_in_9_1, align 32 | |
%u680_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320 | |
%u680_in_10_1 = bitcast i8* %u680_in_10_0 to <8 x float> * | |
store <8 x float> %f637, <8 x float>* %u680_in_10_1, align 32 | |
%u680_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352 | |
%u680_in_11_1 = bitcast i8* %u680_in_11_0 to <8 x float> * | |
store <8 x float> %f656, <8 x float>* %u680_in_11_1, align 32 | |
%u680_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384 | |
%u680_in_12_1 = bitcast i8* %u680_in_12_0 to <8 x float> * | |
store <8 x float> %f675, <8 x float>* %u680_in_12_1, align 32 | |
%u680_out = getelementptr i8, i8* %buffer, i32 416 | |
%u680_tmp_0_0 = getelementptr inbounds i8, i8* %u680_out, i64 0 | |
%u680_tmp_0_1 = bitcast i8* %u680_tmp_0_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_0_1, align 32 | |
%u680_tmp_1_0 = getelementptr inbounds i8, i8* %u680_out, i64 32 | |
%u680_tmp_1_1 = bitcast i8* %u680_tmp_1_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_1_1, align 32 | |
%u680_tmp_2_0 = getelementptr inbounds i8, i8* %u680_out, i64 64 | |
%u680_tmp_2_1 = bitcast i8* %u680_tmp_2_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_2_1, align 32 | |
%u680_tmp_3_0 = getelementptr inbounds i8, i8* %u680_out, i64 96 | |
%u680_tmp_3_1 = bitcast i8* %u680_tmp_3_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_3_1, align 32 | |
%u680_tmp_4_0 = getelementptr inbounds i8, i8* %u680_out, i64 128 | |
%u680_tmp_4_1 = bitcast i8* %u680_tmp_4_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_4_1, align 32 | |
%u680_tmp_5_0 = getelementptr inbounds i8, i8* %u680_out, i64 160 | |
%u680_tmp_5_1 = bitcast i8* %u680_tmp_5_0 to <8 x i32> * | |
store <8 x i32> zeroinitializer, <8 x i32>* %u680_tmp_5_1, align 32 | |
%u680_tmp_7_0 = getelementptr inbounds i8, i8* %u680_out, i64 192 | |
%u680_tmp_7_1 = bitcast i8* %u680_tmp_7_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_7_1, align 32 | |
%u680_tmp_8_0 = getelementptr inbounds i8, i8* %u680_out, i64 224 | |
%u680_tmp_8_1 = bitcast i8* %u680_tmp_8_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_8_1, align 32 | |
%u680_tmp_9_0 = getelementptr inbounds i8, i8* %u680_out, i64 256 | |
%u680_tmp_9_1 = bitcast i8* %u680_tmp_9_0 to <8 x float> * | |
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_9_1, align 32 | |
br label %l680_check | |
l680_check: | |
%u680_self = phi <8 x i32> [ %u680_self_initial, %l680_start ], [ %u680_self_next, %l680_call ] | |
%u680_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u680_self) | |
%u680_valid = icmp ne i32 %u680_next, 0 | |
br i1 %u680_valid, label %l680_call, label %l680_end | |
l680_call: | |
%u680_bcast_0 = insertelement <8 x i32> undef, i32 %u680_next, i32 0 | |
%u680_bcast = shufflevector <8 x i32> %u680_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer | |
%u680_active = icmp eq <8 x i32> %u680_self, %u680_bcast | |
%u680_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u680_next | |
%u680_func_1 = load i8*, i8** %u680_func_0 | |
%u680_func = bitcast i8* %u680_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)* | |
call void %u680_func(<8 x i1> %u680_active, i8* %buffer, i8* %rd679, <8 x i32> %u680_offset) | |
%u680_self_next = select <8 x i1> %u680_active, <8 x i32> zeroinitializer, <8 x i32> %u680_self | |
br label %l680_check | |
l680_end: | |
%u680_out_0_0 = getelementptr inbounds i8, i8* %u680_out, i64 0 | |
%u680_out_0_1 = bitcast i8* %u680_out_0_0 to <8 x float> * | |
%f681 = load <8 x float>, <8 x float>* %u680_out_0_1, align 32 | |
%u680_out_1_0 = getelementptr inbounds i8, i8* %u680_out, i64 32 | |
%u680_out_1_1 = bitcast i8* %u680_out_1_0 to <8 x float> * | |
%f682 = load <8 x float>, <8 x float>* %u680_out_1_1, align 32 | |
%u680_out_2_0 = getelementptr inbounds i8, i8* %u680_out, i64 64 | |
%u680_out_2_1 = bitcast i8* %u680_out_2_0 to <8 x float> * | |
%f683 = load <8 x float>, <8 x float>* %u680_out_2_1, align 32 | |
%u680_out_3_0 = getelementptr inbounds i8, i8* %u680_out, i64 96 | |
%u680_out_3_1 = bitcast i8* %u680_out_3_0 to <8 x float> * | |
%f684 = load <8 x float>, <8 x float>* %u680_out_3_1, align 32 | |
%u680_out_4_0 = getelementptr inbounds i8, i8* %u680_out, i64 128 | |
%u680_out_4_1 = bitcast i8* %u680_out_4_0 to <8 x float> * | |
%f685 = load <8 x float>, <8 x float>* %u680_out_4_1, align 32 | |
%u680_out_5_0 = getelementptr inbounds i8, i8* %u680_out, i64 160 | |
%u680_out_5_1 = bitcast i8* %u680_out_5_0 to <8 x i32> * | |
%r686 = load <8 x i32>, <8 x i32>* %u680_out_5_1, align 32 | |
%u680_out_7_0 = getelementptr inbounds i8, i8* %u680_out, i64 192 | |
%u680_out_7_1 = bitcast i8* %u680_out_7_0 to <8 x float> * | |
%f687 = load <8 x float>, <8 x float>* %u680_out_7_1, align 32 | |
%u680_out_8_0 = getelementptr inbounds i8, i8* %u680_out, i64 224 | |
%u680_out_8_1 = bitcast i8* %u680_out_8_0 to <8 x float> * | |
%f688 = load <8 x float>, <8 x float>* %u680_out_8_1, align 32 | |
%u680_out_9_0 = getelementptr inbounds i8, i8* %u680_out, i64 256 | |
%u680_out_9_1 = bitcast i8* %u680_out_9_0 to <8 x float> * | |
%f689 = load <8 x float>, <8 x float>* %u680_out_9_1, align 32 | |
br label %l680_done | |
l680_done: | |
%f690 = fmul <8 x float> %f425, %f681 | |
%f691 = fmul <8 x float> %f426, %f681 | |
%f692 = fmul <8 x float> %f427, %f681 | |
%f693 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f432, <8 x float> %f682, <8 x float> %f690) | |
%f694 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f434, <8 x float> %f682, <8 x float> %f691) | |
%f695 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f436, <8 x float> %f682, <8 x float> %f692) | |
%f696 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f683, <8 x float> %f693) | |
%f697 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f683, <8 x float> %f694) | |
%f698 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f683, <8 x float> %f695) | |
%f699 = fmul <8 x float> %f373, %f696 | |
%f700 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f374, <8 x float> %f697, <8 x float> %f699) | |
%f701 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f375, <8 x float> %f698, <8 x float> %f700) | |
%p702 = fcmp oge <8 x float> %f701, %f290 | |
%f703 = select <8 x i1> %p702, <8 x float> %f555, <8 x float> %f560 | |
%f704 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f373, <8 x float> %f370) | |
%f705 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f374, <8 x float> %f371) | |
%f706 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f375, <8 x float> %f372) | |
%f707_1 = insertelement <8 x float> undef, float 0x47efffffe0000000, i32 0 | |
%f707 = shufflevector <8 x float> %f707_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f708 = fmul <8 x float> %f269, %f687 | |
%f709 = fmul <8 x float> %f270, %f688 | |
%f710 = fmul <8 x float> %f271, %f689 | |
%f711 = fmul <8 x float> %f275, %f685 | |
%r712_1 = insertelement <8 x i32> undef, i32 97, i32 0 | |
%r712 = shufflevector <8 x i32> %r712_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r713 = and <8 x i32> %r686, %r712 | |
%p714 = icmp ne <8 x i32> %r713, %r289 | |
%r715 = select <8 x i1> %p385, <8 x i32> %r306, <8 x i32> %r276 | |
%f716 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f708, <8 x float> %f709) | |
%f717 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f716, <8 x float> %f710) | |
%f718 = fmul <8 x float> %f711, %f711 | |
%f719 = fmul <8 x float> %f717, %f718 | |
%f720 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %f719, <8 x float> %f340) | |
%p721 = icmp uge <8 x i32> %r715, %r341 | |
%rd722_0 = mul <8 x i64> %rd658, %rd310 | |
%rd722 = add <8 x i64> %rd722_0, %rd112 | |
%rd723 = lshr <8 x i64> %rd658, %rd312 | |
%rd724 = xor <8 x i64> %rd723, %rd658 | |
%rd725 = lshr <8 x i64> %rd724, %rd315 | |
%r726 = trunc <8 x i64> %rd725 to <8 x i32> | |
%rd727 = lshr <8 x i64> %rd658, %rd318 | |
%r728 = trunc <8 x i64> %rd727 to <8 x i32> | |
%r729 = lshr <8 x i32> %r726, %r728 | |
%r730 = bitcast <8 x i32> %r728 to <8 x i32> | |
%r731 = sub <8 x i32> zeroinitializer, %r730 | |
%r732 = and <8 x i32> %r731, %r324 | |
%r733 = bitcast <8 x i32> %r732 to <8 x i32> | |
%r734 = shl <8 x i32> %r726, %r733 | |
%r735 = or <8 x i32> %r729, %r734 | |
%r736 = lshr <8 x i32> %r735, %r329 | |
%r737 = or <8 x i32> %r736, %r331 | |
%f738 = bitcast <8 x i32> %r737 to <8 x float> | |
%f739 = fsub <8 x float> %f738, %f297 | |
%p740 = fcmp olt <8 x float> %f739, %f720 | |
%f741 = fdiv <8 x float> %f297, %f720 | |
%f742 = fmul <8 x float> %f708, %f741 | |
%f743 = fmul <8 x float> %f709, %f741 | |
%f744 = fmul <8 x float> %f710, %f741 | |
%f745 = select <8 x i1> %p721, <8 x float> %f742, <8 x float> %f708 | |
%f746 = select <8 x i1> %p721, <8 x float> %f743, <8 x float> %f709 | |
%f747 = select <8 x i1> %p721, <8 x float> %f744, <8 x float> %f710 | |
%p748 = xor <8 x i1> %p721, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%p749 = or <8 x i1> %p748, %p740 | |
%p750 = and <8 x i1> %p496, %p749 | |
%p751 = fcmp one <8 x float> %f717, %f290 | |
%p752 = and <8 x i1> %p750, %p751 | |
br label %l_236_tail | |
l_236_tail: | |
%rd237_final = select <8 x i1> %p259, <8 x i64> %rd722, <8 x i64> %rd237 | |
%f238_final = select <8 x i1> %p259, <8 x float> %f704, <8 x float> %f238 | |
%f239_final = select <8 x i1> %p259, <8 x float> %f705, <8 x float> %f239 | |
%f240_final = select <8 x i1> %p259, <8 x float> %f706, <8 x float> %f240 | |
%f241_final = select <8 x i1> %p259, <8 x float> %f696, <8 x float> %f241 | |
%f242_final = select <8 x i1> %p259, <8 x float> %f697, <8 x float> %f242 | |
%f243_final = select <8 x i1> %p259, <8 x float> %f698, <8 x float> %f243 | |
%f244_final = select <8 x i1> %p259, <8 x float> %f707, <8 x float> %f244 | |
%f245_final = select <8 x i1> %p259, <8 x float> %f745, <8 x float> %f245 | |
%f246_final = select <8 x i1> %p259, <8 x float> %f746, <8 x float> %f246 | |
%f247_final = select <8 x i1> %p259, <8 x float> %f747, <8 x float> %f247 | |
%f248_final = select <8 x i1> %p259, <8 x float> %f616, <8 x float> %f248 | |
%f249_final = select <8 x i1> %p259, <8 x float> %f617, <8 x float> %f249 | |
%f250_final = select <8 x i1> %p259, <8 x float> %f618, <8 x float> %f250 | |
%f251_final = select <8 x i1> %p259, <8 x float> %f711, <8 x float> %f251 | |
%r252_final = select <8 x i1> %p259, <8 x i32> %r715, <8 x i32> %r252 | |
%f254_final = select <8 x i1> %p259, <8 x float> %f370, <8 x float> %f254 | |
%f255_final = select <8 x i1> %p259, <8 x float> %f371, <8 x float> %f255 | |
%f256_final = select <8 x i1> %p259, <8 x float> %f372, <8 x float> %f256 | |
%f257_final = select <8 x i1> %p259, <8 x float> %f684, <8 x float> %f257 | |
%p258_final = select <8 x i1> %p259, <8 x i1> %p714, <8 x i1> %p258 | |
%p259_final = select <8 x i1> %p259, <8 x i1> %p752, <8 x i1> %p259 | |
br label %l_236_cond; | |
l_236_done: | |
%f754 = bitcast <8 x float> %f248 to <8 x float> | |
%f755 = bitcast <8 x float> %f249 to <8 x float> | |
%f756 = bitcast <8 x float> %f250 to <8 x float> | |
%f757 = bitcast <8 x float> %f253 to <8 x float> | |
%p758 = fcmp one <8 x float> %f757, %f220 | |
%f759 = select <8 x i1> %p758, <8 x float> %f757, <8 x float> zeroinitializer | |
%f760_1 = insertelement <8 x float> undef, float 0xbfe0000000000000, i32 0 | |
%f760 = shufflevector <8 x float> %f760_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f761 = fadd <8 x float> %f168, %f760 | |
%f762 = fadd <8 x float> %f169, %f760 | |
%f763 = fsub <8 x float> %f761, %f30 | |
%f764 = fsub <8 x float> %f762, %f30 | |
%f765 = fadd <8 x float> %f761, %f30 | |
%f766 = fadd <8 x float> %f762, %f30 | |
%f767 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %f763) | |
%r768 = fptosi <8 x float> %f767 to <8 x i32> | |
%f769 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %f764) | |
%r770 = fptosi <8 x float> %f769 to <8 x i32> | |
%r771_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r771 = shufflevector <8 x i32> %r771_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r772_0 = icmp sgt <8 x i32> %r768, %r771 | |
%r772 = select <8 x i1> %r772_0, <8 x i32> %r768, <8 x i32> %r771 | |
%r773_0 = icmp sgt <8 x i32> %r770, %r771 | |
%r773 = select <8 x i1> %r773_0, <8 x i32> %r770, <8 x i32> %r771 | |
%r774 = bitcast <8 x i32> %r772 to <8 x i32> | |
%r775 = bitcast <8 x i32> %r773 to <8 x i32> | |
%f776 = call <8 x float> @llvm.floor.v8f32(<8 x float> %f765) | |
%r777 = fptosi <8 x float> %f776 to <8 x i32> | |
%f778 = call <8 x float> @llvm.floor.v8f32(<8 x float> %f766) | |
%r779 = fptosi <8 x float> %f778 to <8 x i32> | |
%r780_1 = insertelement <8 x i32> undef, i32 255, i32 0 | |
%r780 = shufflevector <8 x i32> %r780_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r781_0 = icmp slt <8 x i32> %r777, %r780 | |
%r781 = select <8 x i1> %r781_0, <8 x i32> %r777, <8 x i32> %r780 | |
%r782_0 = icmp slt <8 x i32> %r779, %r780 | |
%r782 = select <8 x i1> %r782_0, <8 x i32> %r779, <8 x i32> %r780 | |
%r783 = bitcast <8 x i32> %r781 to <8 x i32> | |
%r784 = bitcast <8 x i32> %r782 to <8 x i32> | |
%r785_0 = mul <8 x i32> %r775, %r121 | |
%r785 = add <8 x i32> %r785_0, %r774 | |
%r786 = mul <8 x i32> %r785, %r40 | |
%p787 = icmp ule <8 x i32> %r774, %r783 | |
%p788 = icmp ule <8 x i32> %r775, %r784 | |
%p789 = and <8 x i1> %p787, %p788 | |
%f790 = uitofp <8 x i32> %r774 to <8 x float> | |
%f791 = uitofp <8 x i32> %r775 to <8 x float> | |
%f792 = fsub <8 x float> %f790, %f761 | |
%f793 = fsub <8 x float> %f791, %f762 | |
br label %l_794_start | |
l_794_start: | |
br label %l_794_cond | |
l_794_cond: ; Loop (ImageBlock::put() [1]) | |
%r795 = phi <8 x i32> [ %r795_final, %l_794_tail ], [ %r219, %l_794_start ] | |
%r796 = phi <8 x i32> [ %r796_final, %l_794_tail ], [ %r786, %l_794_start ] | |
%r797_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r797 = shufflevector <8 x i32> %r797_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p798 = icmp ult <8 x i32> %r795, %r797 | |
%p794 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p798) | |
br i1 %p794, label %l_794_body, label %l_794_done | |
l_794_body: | |
%r800 = phi <8 x i32> [ %r795, %l_794_cond ] | |
%r801 = phi <8 x i32> [ %r796, %l_794_cond ] | |
%r802_0 = trunc i64 %index to i32 | |
%r802_1 = insertelement <8 x i32> undef, i32 %r802_0, i32 0 | |
%r802_2 = shufflevector <8 x i32> %r802_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r802 = add <8 x i32> %r802_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
%p803_0 = trunc i64 %end to i32 | |
%p803_1 = insertelement <8 x i32> undef, i32 %p803_0, i32 0 | |
%p803_2 = shufflevector <8 x i32> %p803_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p803 = icmp ult <8 x i32> %r802, %p803_2 | |
%p804 = and <8 x i1> %p803, %p798 | |
%f805 = uitofp <8 x i32> %r800 to <8 x float> | |
%f806 = fadd <8 x float> %f793, %f805 | |
%f807_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f807 = shufflevector <8 x float> %f807_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f808_0 = bitcast <8 x float> %f806 to <8 x i32> | |
%f808_1 = bitcast <8 x float> %f807 to <8 x i32> | |
%f808_2 = and <8 x i32> %f808_0, %f808_1 | |
%f808 = bitcast <8 x i32> %f808_2 to <8 x float> | |
%f809_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f809 = shufflevector <8 x float> %f809_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f810 = fsub <8 x float> %f809, %f808 | |
%f811_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f811 = shufflevector <8 x float> %f811_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f812 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f811, <8 x float> %f810) | |
%r813 = add <8 x i32> %r775, %r800 | |
%p814 = icmp ule <8 x i32> %r813, %r784 | |
%p815 = and <8 x i1> %p789, %p814 | |
%r816_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r816 = shufflevector <8 x i32> %r816_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
br label %l_817_start | |
l_817_start: | |
br label %l_817_cond | |
l_817_cond: ; Loop (ImageBlock::put() [2]) | |
%r818 = phi <8 x i32> [ %r818_final, %l_817_tail ], [ %r816, %l_817_start ] | |
%r819 = phi <8 x i32> [ %r819_final, %l_817_tail ], [ %r801, %l_817_start ] | |
%r820_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r820 = shufflevector <8 x i32> %r820_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p821 = icmp ult <8 x i32> %r818, %r820 | |
%p817 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p821) | |
br i1 %p817, label %l_817_body, label %l_817_done | |
l_817_body: | |
%r823 = phi <8 x i32> [ %r818, %l_817_cond ] | |
%r824 = phi <8 x i32> [ %r819, %l_817_cond ] | |
%p825 = and <8 x i1> %p804, %p821 | |
%f826 = uitofp <8 x i32> %r823 to <8 x float> | |
%f827 = fadd <8 x float> %f792, %f826 | |
%f828_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f828 = shufflevector <8 x float> %f828_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f829_0 = bitcast <8 x float> %f827 to <8 x i32> | |
%f829_1 = bitcast <8 x float> %f828 to <8 x i32> | |
%f829_2 = and <8 x i32> %f829_0, %f829_1 | |
%f829 = bitcast <8 x i32> %f829_2 to <8 x float> | |
%f830_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f830 = shufflevector <8 x float> %f830_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f831 = fsub <8 x float> %f830, %f829 | |
%f832_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f832 = shufflevector <8 x float> %f832_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f833 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f832, <8 x float> %f831) | |
%f834 = fmul <8 x float> %f833, %f812 | |
%r835 = add <8 x i32> %r774, %r823 | |
%p836 = icmp ule <8 x i32> %r835, %r783 | |
%p837 = and <8 x i1> %p815, %p836 | |
%f838 = fmul <8 x float> %f754, %f834 | |
%rd839_p1 = getelementptr inbounds i8*, i8** %params, i32 53 | |
%rd839 = load i8*, i8** %rd839_p1, align 8, !alias.scope !2 | |
%p840 = and <8 x i1> %p837, %p825 | |
%u841_0 = bitcast i8* %rd839 to float* | |
%u841_1 = getelementptr float, float* %u841_0, <8 x i32> %r824 | |
call void @ek.scatter_add.v8f32(<8 x float*> %u841_1, <8 x float> %f838, <8 x i1> %p840) | |
%r842_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r842 = shufflevector <8 x i32> %r842_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r843 = add <8 x i32> %r824, %r842 | |
%f844 = fmul <8 x float> %f755, %f834 | |
%u845_0 = bitcast i8* %rd839 to float* | |
%u845_1 = getelementptr float, float* %u845_0, <8 x i32> %r843 | |
call void @ek.scatter_add.v8f32(<8 x float*> %u845_1, <8 x float> %f844, <8 x i1> %p840) | |
%r846 = add <8 x i32> %r843, %r842 | |
%f847 = fmul <8 x float> %f756, %f834 | |
%u848_0 = bitcast i8* %rd839 to float* | |
%u848_1 = getelementptr float, float* %u848_0, <8 x i32> %r846 | |
call void @ek.scatter_add.v8f32(<8 x float*> %u848_1, <8 x float> %f847, <8 x i1> %p840) | |
%r849 = add <8 x i32> %r846, %r842 | |
%u850_0 = bitcast i8* %rd839 to float* | |
%u850_1 = getelementptr float, float* %u850_0, <8 x i32> %r849 | |
call void @ek.scatter_add.v8f32(<8 x float*> %u850_1, <8 x float> %f834, <8 x i1> %p840) | |
%r851 = add <8 x i32> %r849, %r842 | |
%f852 = fmul <8 x float> %f759, %f834 | |
%u853_0 = bitcast i8* %rd839 to float* | |
%u853_1 = getelementptr float, float* %u853_0, <8 x i32> %r851 | |
call void @ek.scatter_add.v8f32(<8 x float*> %u853_1, <8 x float> %f852, <8 x i1> %p840) | |
%r854 = add <8 x i32> %r851, %r842 | |
%r855 = add <8 x i32> %r823, %r842 | |
br label %l_817_tail | |
l_817_tail: | |
%r818_final = select <8 x i1> %p821, <8 x i32> %r855, <8 x i32> %r818 | |
%r819_final = select <8 x i1> %p821, <8 x i32> %r854, <8 x i32> %r819 | |
br label %l_817_cond; | |
l_817_done: | |
%r858 = bitcast <8 x i32> %r819 to <8 x i32> | |
%r859_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r859 = shufflevector <8 x i32> %r859_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r860 = add <8 x i32> %r800, %r859 | |
%r861_1 = insertelement <8 x i32> undef, i32 1270, i32 0 | |
%r861 = shufflevector <8 x i32> %r861_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r862 = add <8 x i32> %r858, %r861 | |
br label %l_794_tail | |
l_794_tail: | |
%r795_final = select <8 x i1> %p798, <8 x i32> %r860, <8 x i32> %r795 | |
%r796_final = select <8 x i1> %p798, <8 x i32> %r862, <8 x i32> %r796 | |
br label %l_794_cond; | |
l_794_done: | |
br label %suffix | |
suffix: | |
%index_next = add i64 %index, 8 | |
%cond = icmp uge i64 %index_next, %end | |
br i1 %cond, label %done, label %body, !llvm.loop !2 | |
done: | |
ret void | |
} | |
define void @func_bac3cacf8aebcc87b13f4c43e6cdd822(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Shape::compute_surface_interaction() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 12 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets | |
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*> | |
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 72 | |
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets | |
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*> | |
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f6_p1 = getelementptr inbounds i8, i8* %data, i32 84 | |
%f6_p2 = getelementptr inbounds i8, i8* %f6_p1, <8 x i32> %offsets | |
%f6_p3 = bitcast <8 x i8*> %f6_p2 to <8 x float*> | |
%f6 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f6_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f7_p1 = getelementptr inbounds i8, i8* %data, i32 96 | |
%f7_p2 = getelementptr inbounds i8, i8* %f7_p1, <8 x i32> %offsets | |
%f7_p3 = bitcast <8 x i8*> %f7_p2 to <8 x float*> | |
%f7 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f7_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f8_p1 = getelementptr inbounds i8, i8* %data, i32 68 | |
%f8_p2 = getelementptr inbounds i8, i8* %f8_p1, <8 x i32> %offsets | |
%f8_p3 = bitcast <8 x i8*> %f8_p2 to <8 x float*> | |
%f8 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f8_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f9_p1 = getelementptr inbounds i8, i8* %data, i32 80 | |
%f9_p2 = getelementptr inbounds i8, i8* %f9_p1, <8 x i32> %offsets | |
%f9_p3 = bitcast <8 x i8*> %f9_p2 to <8 x float*> | |
%f9 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f9_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f10_p1 = getelementptr inbounds i8, i8* %data, i32 92 | |
%f10_p2 = getelementptr inbounds i8, i8* %f10_p1, <8 x i32> %offsets | |
%f10_p3 = bitcast <8 x i8*> %f10_p2 to <8 x float*> | |
%f10 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f10_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f11_p1 = getelementptr inbounds i8, i8* %data, i32 64 | |
%f11_p2 = getelementptr inbounds i8, i8* %f11_p1, <8 x i32> %offsets | |
%f11_p3 = bitcast <8 x i8*> %f11_p2 to <8 x float*> | |
%f11 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f11_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f12_p1 = getelementptr inbounds i8, i8* %data, i32 76 | |
%f12_p2 = getelementptr inbounds i8, i8* %f12_p1, <8 x i32> %offsets | |
%f12_p3 = bitcast <8 x i8*> %f12_p2 to <8 x float*> | |
%f12 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f12_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f13_p1 = getelementptr inbounds i8, i8* %data, i32 88 | |
%f13_p2 = getelementptr inbounds i8, i8* %f13_p1, <8 x i32> %offsets | |
%f13_p3 = bitcast <8 x i8*> %f13_p2 to <8 x float*> | |
%f13 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f13_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f14_p1 = getelementptr inbounds i8, i8* %data, i32 40 | |
%f14_p2 = getelementptr inbounds i8, i8* %f14_p1, <8 x i32> %offsets | |
%f14_p3 = bitcast <8 x i8*> %f14_p2 to <8 x float*> | |
%f14 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f14_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f15_p1 = getelementptr inbounds i8, i8* %data, i32 24 | |
%f15_p2 = getelementptr inbounds i8, i8* %f15_p1, <8 x i32> %offsets | |
%f15_p3 = bitcast <8 x i8*> %f15_p2 to <8 x float*> | |
%f15 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f15_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f16_p1 = getelementptr inbounds i8, i8* %data, i32 36 | |
%f16_p2 = getelementptr inbounds i8, i8* %f16_p1, <8 x i32> %offsets | |
%f16_p3 = bitcast <8 x i8*> %f16_p2 to <8 x float*> | |
%f16 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f16_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f17_p1 = getelementptr inbounds i8, i8* %data, i32 20 | |
%f17_p2 = getelementptr inbounds i8, i8* %f17_p1, <8 x i32> %offsets | |
%f17_p3 = bitcast <8 x i8*> %f17_p2 to <8 x float*> | |
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f18_p1 = getelementptr inbounds i8, i8* %data, i32 32 | |
%f18_p2 = getelementptr inbounds i8, i8* %f18_p1, <8 x i32> %offsets | |
%f18_p3 = bitcast <8 x i8*> %f18_p2 to <8 x float*> | |
%f18 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f18_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f19_p1 = getelementptr inbounds i8, i8* %data, i32 16 | |
%f19_p2 = getelementptr inbounds i8, i8* %f19_p1, <8 x i32> %offsets | |
%f19_p3 = bitcast <8 x i8*> %f19_p2 to <8 x float*> | |
%f19 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f19_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f20_p1 = getelementptr inbounds i8, i8* %data, i32 44 | |
%f20_p2 = getelementptr inbounds i8, i8* %f20_p1, <8 x i32> %offsets | |
%f20_p3 = bitcast <8 x i8*> %f20_p2 to <8 x float*> | |
%f20 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f20_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f21_p1 = getelementptr inbounds i8, i8* %data, i32 28 | |
%f21_p2 = getelementptr inbounds i8, i8* %f21_p1, <8 x i32> %offsets | |
%f21_p3 = bitcast <8 x i8*> %f21_p2 to <8 x float*> | |
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f22_i0 = getelementptr inbounds i8, i8* %params, i64 0 | |
%f22_i1 = bitcast i8* %f22_i0 to <8 x float>* | |
%f22 = load <8 x float>, <8 x float>* %f22_i1, align 32 | |
%f23_i0 = getelementptr inbounds i8, i8* %params, i64 32 | |
%f23_i1 = bitcast i8* %f23_i0 to <8 x float>* | |
%f23 = load <8 x float>, <8 x float>* %f23_i1, align 32 | |
%f24_i0 = getelementptr inbounds i8, i8* %params, i64 64 | |
%f24_i1 = bitcast i8* %f24_i0 to <8 x float>* | |
%f24 = load <8 x float>, <8 x float>* %f24_i1, align 32 | |
%f25_i0 = getelementptr inbounds i8, i8* %params, i64 96 | |
%f25_i1 = bitcast i8* %f25_i0 to <8 x float>* | |
%f25 = load <8 x float>, <8 x float>* %f25_i1, align 32 | |
%f26_i0 = getelementptr inbounds i8, i8* %params, i64 128 | |
%f26_i1 = bitcast i8* %f26_i0 to <8 x float>* | |
%f26 = load <8 x float>, <8 x float>* %f26_i1, align 32 | |
%f27_i0 = getelementptr inbounds i8, i8* %params, i64 160 | |
%f27_i1 = bitcast i8* %f27_i0 to <8 x float>* | |
%f27 = load <8 x float>, <8 x float>* %f27_i1, align 32 | |
%f28_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f28_i1 = bitcast i8* %f28_i0 to <8 x float>* | |
%f28 = load <8 x float>, <8 x float>* %f28_i1, align 32 | |
%r29 = bitcast <8 x i32> %self to <8 x i32> | |
%f30_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f31 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f28, <8 x float> %f22) | |
%f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f28, <8 x float> %f23) | |
%f33 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f28, <8 x float> %f24) | |
%f34 = fsub <8 x float> %f31, %f2 | |
%f35 = fsub <8 x float> %f32, %f3 | |
%f36 = fsub <8 x float> %f33, %f4 | |
%f37 = fmul <8 x float> %f34, %f34 | |
%f38 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f35, <8 x float> %f35, <8 x float> %f37) | |
%f39 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f36, <8 x float> %f36, <8 x float> %f38) | |
%f40_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f40 = shufflevector <8 x float> %f40_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f41 = fdiv <8 x float> %f40, %f39 | |
%f42 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f41) | |
%f43 = fmul <8 x float> %f34, %f42 | |
%f44 = fmul <8 x float> %f35, %f42 | |
%f45 = fmul <8 x float> %f36, %f42 | |
%f46 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f43, <8 x float> %f1, <8 x float> %f2) | |
%f47 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f44, <8 x float> %f1, <8 x float> %f3) | |
%f48 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f45, <8 x float> %f1, <8 x float> %f4) | |
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f46, <8 x float> %f20) | |
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f46, <8 x float> %f21) | |
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f47, <8 x float> %f49) | |
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f47, <8 x float> %f50) | |
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f48, <8 x float> %f51) | |
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f48, <8 x float> %f52) | |
%f55_1 = insertelement <8 x float> undef, float 0x401921fb60000000, i32 0 | |
%f55 = shufflevector <8 x float> %f55_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f56 = fneg <8 x float> %f54 | |
%f57 = fmul <8 x float> %f5, %f56 | |
%f58 = fmul <8 x float> %f6, %f56 | |
%f59 = fmul <8 x float> %f7, %f56 | |
%f60 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f53, <8 x float> %f57) | |
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f53, <8 x float> %f58) | |
%f62 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f10, <8 x float> %f53, <8 x float> %f59) | |
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f30, <8 x float> %f60) | |
%f64 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f12, <8 x float> %f30, <8 x float> %f61) | |
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f30, <8 x float> %f62) | |
%f66 = fmul <8 x float> %f63, %f55 | |
%f67 = fmul <8 x float> %f64, %f55 | |
%f68 = fmul <8 x float> %f65, %f55 | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f67, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f47, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f48, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>* | |
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x float> %f43, <8 x float> %out_5_2 | |
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32 | |
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>* | |
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32 | |
%out_6_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_6_2 | |
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>* | |
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r29, <8 x i32> %out_8_2 | |
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32 | |
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>* | |
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32 | |
%out_13_3 = select <8 x i1> %mask, <8 x float> %f68, <8 x float> %out_13_2 | |
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32 | |
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>* | |
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32 | |
%out_14_3 = select <8 x i1> %mask, <8 x float> %f66, <8 x float> %out_14_2 | |
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32 | |
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>* | |
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32 | |
%out_15_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_15_2 | |
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32 | |
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>* | |
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32 | |
%out_16_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_16_2 | |
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32 | |
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704 | |
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>* | |
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32 | |
%out_17_3 = select <8 x i1> %mask, <8 x float> %f43, <8 x float> %out_17_2 | |
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32 | |
ret void; | |
} | |
define void @func_570d1d9103e40ae9598d8f351c2e5c2a(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Shape::compute_surface_interaction() | |
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>* | |
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32 | |
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>* | |
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32 | |
%r3_i0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%r3_i1 = bitcast i8* %r3_i0 to <8 x i32>* | |
%r3 = load <8 x i32>, <8 x i32>* %r3_i1, align 32 | |
%f4_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f5_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f6_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r7 = bitcast <8 x i32> %self to <8 x i32> | |
%p8 = bitcast <8 x i1> %mask to <8 x i1> | |
%r9_1 = insertelement <8 x i32> undef, i32 3, i32 0 | |
%r9 = shufflevector <8 x i32> %r9_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r10 = mul <8 x i32> %r3, %r9 | |
%r11_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r11 = shufflevector <8 x i32> %r11_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r12 = add <8 x i32> %r10, %r11 | |
%r13_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r13 = shufflevector <8 x i32> %r13_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r14 = add <8 x i32> %r10, %r13 | |
%rd15_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%rd15_p2 = getelementptr inbounds i8, i8* %rd15_p1, <8 x i32> %offsets | |
%rd15_p3 = bitcast <8 x i8*> %rd15_p2 to <8 x i64*> | |
%rd15_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd15_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd15 = inttoptr <8 x i64> %rd15_p4 to <8 x i8*> | |
%r16_0 = bitcast <8 x i8*> %rd15 to <8 x i32*> | |
%r16_1 = getelementptr i32, <8 x i32*> %r16_0, <8 x i32> %r10 | |
%r16 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r16_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer) | |
%r17_0 = bitcast <8 x i8*> %rd15 to <8 x i32*> | |
%r17_1 = getelementptr i32, <8 x i32*> %r17_0, <8 x i32> %r12 | |
%r17 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r17_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer) | |
%r18_0 = bitcast <8 x i8*> %rd15 to <8 x i32*> | |
%r18_1 = getelementptr i32, <8 x i32*> %r18_0, <8 x i32> %r14 | |
%r18 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r18_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer) | |
%r19 = mul <8 x i32> %r16, %r9 | |
%r20 = add <8 x i32> %r19, %r11 | |
%r21 = add <8 x i32> %r19, %r13 | |
%rd22_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%rd22_p2 = getelementptr inbounds i8, i8* %rd22_p1, <8 x i32> %offsets | |
%rd22_p3 = bitcast <8 x i8*> %rd22_p2 to <8 x i64*> | |
%rd22_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd22_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd22 = inttoptr <8 x i64> %rd22_p4 to <8 x i8*> | |
%f23_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f23_1 = getelementptr float, <8 x float*> %f23_0, <8 x i32> %r19 | |
%f23 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f23_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f24_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f24_1 = getelementptr float, <8 x float*> %f24_0, <8 x i32> %r20 | |
%f24 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f24_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f25_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f25_1 = getelementptr float, <8 x float*> %f25_0, <8 x i32> %r21 | |
%f25 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f25_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%r26 = mul <8 x i32> %r17, %r9 | |
%r27 = add <8 x i32> %r26, %r11 | |
%r28 = add <8 x i32> %r26, %r13 | |
%f29_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f29_1 = getelementptr float, <8 x float*> %f29_0, <8 x i32> %r26 | |
%f29 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f29_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f30_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f30_1 = getelementptr float, <8 x float*> %f30_0, <8 x i32> %r27 | |
%f30 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f30_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f31_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f31_1 = getelementptr float, <8 x float*> %f31_0, <8 x i32> %r28 | |
%f31 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f31_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%r32 = mul <8 x i32> %r18, %r9 | |
%r33 = add <8 x i32> %r32, %r11 | |
%r34 = add <8 x i32> %r32, %r13 | |
%f35_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f35_1 = getelementptr float, <8 x float*> %f35_0, <8 x i32> %r32 | |
%f35 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f35_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f36_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f36_1 = getelementptr float, <8 x float*> %f36_0, <8 x i32> %r33 | |
%f36 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f36_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f37_0 = bitcast <8 x i8*> %rd22 to <8 x float*> | |
%f37_1 = getelementptr float, <8 x float*> %f37_0, <8 x i32> %r34 | |
%f37 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f37_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f38 = fsub <8 x float> %f5, %f1 | |
%f39 = fsub <8 x float> %f38, %f2 | |
%f40 = fsub <8 x float> %f29, %f23 | |
%f41 = fsub <8 x float> %f30, %f24 | |
%f42 = fsub <8 x float> %f31, %f25 | |
%f43 = fsub <8 x float> %f35, %f23 | |
%f44 = fsub <8 x float> %f36, %f24 | |
%f45 = fsub <8 x float> %f37, %f25 | |
%f46 = fmul <8 x float> %f35, %f2 | |
%f47 = fmul <8 x float> %f36, %f2 | |
%f48 = fmul <8 x float> %f37, %f2 | |
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f29, <8 x float> %f1, <8 x float> %f46) | |
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f30, <8 x float> %f1, <8 x float> %f47) | |
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f31, <8 x float> %f1, <8 x float> %f48) | |
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f23, <8 x float> %f39, <8 x float> %f49) | |
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f24, <8 x float> %f39, <8 x float> %f50) | |
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f39, <8 x float> %f51) | |
%f55 = fmul <8 x float> %f42, %f44 | |
%f56 = fmul <8 x float> %f40, %f45 | |
%f57 = fmul <8 x float> %f41, %f43 | |
%f58 = fneg <8 x float> %f55 | |
%f59 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f41, <8 x float> %f45, <8 x float> %f58) | |
%f60 = fneg <8 x float> %f56 | |
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f42, <8 x float> %f43, <8 x float> %f60) | |
%f62 = fneg <8 x float> %f57 | |
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f40, <8 x float> %f44, <8 x float> %f62) | |
%f64 = fmul <8 x float> %f59, %f59 | |
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f61, <8 x float> %f64) | |
%f66 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f63, <8 x float> %f65) | |
%f67 = fdiv <8 x float> %f5, %f66 | |
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67) | |
%f69 = fmul <8 x float> %f59, %f68 | |
%f70 = fmul <8 x float> %f61, %f68 | |
%f71 = fmul <8 x float> %f63, %f68 | |
%p72 = fcmp oge <8 x float> %f71, %f4 | |
%f73 = select <8 x i1> %p72, <8 x float> %f5, <8 x float> %f6 | |
%f74 = fadd <8 x float> %f73, %f71 | |
%f75 = fdiv <8 x float> %f5, %f74 | |
%f76 = fneg <8 x float> %f75 | |
%f77 = fmul <8 x float> %f69, %f70 | |
%f78 = fmul <8 x float> %f77, %f76 | |
%f79 = fmul <8 x float> %f69, %f69 | |
%f80 = fmul <8 x float> %f79, %f76 | |
%f81 = fneg <8 x float> %f80 | |
%f82 = select <8 x i1> %p72, <8 x float> %f80, <8 x float> %f81 | |
%f83 = fadd <8 x float> %f82, %f5 | |
%f84 = fneg <8 x float> %f78 | |
%f85 = select <8 x i1> %p72, <8 x float> %f78, <8 x float> %f84 | |
%f86 = fneg <8 x float> %f69 | |
%f87 = select <8 x i1> %p72, <8 x float> %f86, <8 x float> %f69 | |
%rd88_p1 = getelementptr inbounds i8, i8* %data, i32 16 | |
%rd88_p2 = getelementptr inbounds i8, i8* %rd88_p1, <8 x i32> %offsets | |
%rd88_p3 = bitcast <8 x i8*> %rd88_p2 to <8 x i64*> | |
%rd88_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd88_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd88 = inttoptr <8 x i64> %rd88_p4 to <8 x i8*> | |
%f89_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f89_1 = getelementptr float, <8 x float*> %f89_0, <8 x i32> %r19 | |
%f89 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f89_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f90_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f90_1 = getelementptr float, <8 x float*> %f90_0, <8 x i32> %r20 | |
%f90 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f90_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f91_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f91_1 = getelementptr float, <8 x float*> %f91_0, <8 x i32> %r21 | |
%f91 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f91_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f92_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f92_1 = getelementptr float, <8 x float*> %f92_0, <8 x i32> %r26 | |
%f92 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f92_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f93_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f93_1 = getelementptr float, <8 x float*> %f93_0, <8 x i32> %r27 | |
%f93 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f93_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f94_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f94_1 = getelementptr float, <8 x float*> %f94_0, <8 x i32> %r28 | |
%f94 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f94_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f95_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f95_1 = getelementptr float, <8 x float*> %f95_0, <8 x i32> %r32 | |
%f95 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f95_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f96_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f96_1 = getelementptr float, <8 x float*> %f96_0, <8 x i32> %r33 | |
%f96 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f96_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f97_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f97_1 = getelementptr float, <8 x float*> %f97_0, <8 x i32> %r34 | |
%f97 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f97_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer) | |
%f98 = fmul <8 x float> %f89, %f39 | |
%f99 = fmul <8 x float> %f90, %f39 | |
%f100 = fmul <8 x float> %f91, %f39 | |
%f101 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f92, <8 x float> %f1, <8 x float> %f98) | |
%f102 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f93, <8 x float> %f1, <8 x float> %f99) | |
%f103 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f94, <8 x float> %f1, <8 x float> %f100) | |
%f104 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f95, <8 x float> %f2, <8 x float> %f101) | |
%f105 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f2, <8 x float> %f102) | |
%f106 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f2, <8 x float> %f103) | |
%f107 = fmul <8 x float> %f104, %f104 | |
%f108 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f105, <8 x float> %f105, <8 x float> %f107) | |
%f109 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f106, <8 x float> %f106, <8 x float> %f108) | |
%f110 = fdiv <8 x float> %f5, %f109 | |
%f111 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f110) | |
%f112 = fmul <8 x float> %f104, %f111 | |
%f113 = fmul <8 x float> %f105, %f111 | |
%f114 = fmul <8 x float> %f106, %f111 | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f85, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f54, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>* | |
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x float> %f69, <8 x float> %out_5_2 | |
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32 | |
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>* | |
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32 | |
%out_6_3 = select <8 x i1> %mask, <8 x float> %f70, <8 x float> %out_6_2 | |
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f71, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>* | |
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r7, <8 x i32> %out_8_2 | |
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32 | |
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>* | |
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32 | |
%out_13_3 = select <8 x i1> %mask, <8 x float> %f87, <8 x float> %out_13_2 | |
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32 | |
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>* | |
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32 | |
%out_14_3 = select <8 x i1> %mask, <8 x float> %f83, <8 x float> %out_14_2 | |
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32 | |
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>* | |
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32 | |
%out_15_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_15_2 | |
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32 | |
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>* | |
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32 | |
%out_16_3 = select <8 x i1> %mask, <8 x float> %f113, <8 x float> %out_16_2 | |
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32 | |
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704 | |
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>* | |
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32 | |
%out_17_3 = select <8 x i1> %mask, <8 x float> %f112, <8 x float> %out_17_2 | |
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32 | |
ret void; | |
} | |
define void @func_e17fd895e12cc8bec8c4e55075a069b3(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Shape::compute_surface_interaction() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 12 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets | |
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*> | |
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 72 | |
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets | |
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*> | |
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f6_p1 = getelementptr inbounds i8, i8* %data, i32 84 | |
%f6_p2 = getelementptr inbounds i8, i8* %f6_p1, <8 x i32> %offsets | |
%f6_p3 = bitcast <8 x i8*> %f6_p2 to <8 x float*> | |
%f6 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f6_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f7_p1 = getelementptr inbounds i8, i8* %data, i32 96 | |
%f7_p2 = getelementptr inbounds i8, i8* %f7_p1, <8 x i32> %offsets | |
%f7_p3 = bitcast <8 x i8*> %f7_p2 to <8 x float*> | |
%f7 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f7_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f8_p1 = getelementptr inbounds i8, i8* %data, i32 68 | |
%f8_p2 = getelementptr inbounds i8, i8* %f8_p1, <8 x i32> %offsets | |
%f8_p3 = bitcast <8 x i8*> %f8_p2 to <8 x float*> | |
%f8 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f8_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f9_p1 = getelementptr inbounds i8, i8* %data, i32 80 | |
%f9_p2 = getelementptr inbounds i8, i8* %f9_p1, <8 x i32> %offsets | |
%f9_p3 = bitcast <8 x i8*> %f9_p2 to <8 x float*> | |
%f9 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f9_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f10_p1 = getelementptr inbounds i8, i8* %data, i32 92 | |
%f10_p2 = getelementptr inbounds i8, i8* %f10_p1, <8 x i32> %offsets | |
%f10_p3 = bitcast <8 x i8*> %f10_p2 to <8 x float*> | |
%f10 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f10_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f11_p1 = getelementptr inbounds i8, i8* %data, i32 64 | |
%f11_p2 = getelementptr inbounds i8, i8* %f11_p1, <8 x i32> %offsets | |
%f11_p3 = bitcast <8 x i8*> %f11_p2 to <8 x float*> | |
%f11 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f11_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f12_p1 = getelementptr inbounds i8, i8* %data, i32 76 | |
%f12_p2 = getelementptr inbounds i8, i8* %f12_p1, <8 x i32> %offsets | |
%f12_p3 = bitcast <8 x i8*> %f12_p2 to <8 x float*> | |
%f12 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f12_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f13_p1 = getelementptr inbounds i8, i8* %data, i32 88 | |
%f13_p2 = getelementptr inbounds i8, i8* %f13_p1, <8 x i32> %offsets | |
%f13_p3 = bitcast <8 x i8*> %f13_p2 to <8 x float*> | |
%f13 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f13_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f14_p1 = getelementptr inbounds i8, i8* %data, i32 40 | |
%f14_p2 = getelementptr inbounds i8, i8* %f14_p1, <8 x i32> %offsets | |
%f14_p3 = bitcast <8 x i8*> %f14_p2 to <8 x float*> | |
%f14 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f14_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f15_p1 = getelementptr inbounds i8, i8* %data, i32 24 | |
%f15_p2 = getelementptr inbounds i8, i8* %f15_p1, <8 x i32> %offsets | |
%f15_p3 = bitcast <8 x i8*> %f15_p2 to <8 x float*> | |
%f15 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f15_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f16_p1 = getelementptr inbounds i8, i8* %data, i32 36 | |
%f16_p2 = getelementptr inbounds i8, i8* %f16_p1, <8 x i32> %offsets | |
%f16_p3 = bitcast <8 x i8*> %f16_p2 to <8 x float*> | |
%f16 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f16_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f17_p1 = getelementptr inbounds i8, i8* %data, i32 20 | |
%f17_p2 = getelementptr inbounds i8, i8* %f17_p1, <8 x i32> %offsets | |
%f17_p3 = bitcast <8 x i8*> %f17_p2 to <8 x float*> | |
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f18_p1 = getelementptr inbounds i8, i8* %data, i32 32 | |
%f18_p2 = getelementptr inbounds i8, i8* %f18_p1, <8 x i32> %offsets | |
%f18_p3 = bitcast <8 x i8*> %f18_p2 to <8 x float*> | |
%f18 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f18_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f19_p1 = getelementptr inbounds i8, i8* %data, i32 16 | |
%f19_p2 = getelementptr inbounds i8, i8* %f19_p1, <8 x i32> %offsets | |
%f19_p3 = bitcast <8 x i8*> %f19_p2 to <8 x float*> | |
%f19 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f19_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f20_p1 = getelementptr inbounds i8, i8* %data, i32 44 | |
%f20_p2 = getelementptr inbounds i8, i8* %f20_p1, <8 x i32> %offsets | |
%f20_p3 = bitcast <8 x i8*> %f20_p2 to <8 x float*> | |
%f20 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f20_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f21_p1 = getelementptr inbounds i8, i8* %data, i32 28 | |
%f21_p2 = getelementptr inbounds i8, i8* %f21_p1, <8 x i32> %offsets | |
%f21_p3 = bitcast <8 x i8*> %f21_p2 to <8 x float*> | |
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f22_i0 = getelementptr inbounds i8, i8* %params, i64 0 | |
%f22_i1 = bitcast i8* %f22_i0 to <8 x float>* | |
%f22 = load <8 x float>, <8 x float>* %f22_i1, align 32 | |
%f23_i0 = getelementptr inbounds i8, i8* %params, i64 32 | |
%f23_i1 = bitcast i8* %f23_i0 to <8 x float>* | |
%f23 = load <8 x float>, <8 x float>* %f23_i1, align 32 | |
%f24_i0 = getelementptr inbounds i8, i8* %params, i64 64 | |
%f24_i1 = bitcast i8* %f24_i0 to <8 x float>* | |
%f24 = load <8 x float>, <8 x float>* %f24_i1, align 32 | |
%f25_i0 = getelementptr inbounds i8, i8* %params, i64 96 | |
%f25_i1 = bitcast i8* %f25_i0 to <8 x float>* | |
%f25 = load <8 x float>, <8 x float>* %f25_i1, align 32 | |
%f26_i0 = getelementptr inbounds i8, i8* %params, i64 128 | |
%f26_i1 = bitcast i8* %f26_i0 to <8 x float>* | |
%f26 = load <8 x float>, <8 x float>* %f26_i1, align 32 | |
%f27_i0 = getelementptr inbounds i8, i8* %params, i64 160 | |
%f27_i1 = bitcast i8* %f27_i0 to <8 x float>* | |
%f27 = load <8 x float>, <8 x float>* %f27_i1, align 32 | |
%f28_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f28_i1 = bitcast i8* %f28_i0 to <8 x float>* | |
%f28 = load <8 x float>, <8 x float>* %f28_i1, align 32 | |
%f29_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f29 = shufflevector <8 x float> %f29_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f30 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f28, <8 x float> %f22) | |
%f31 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f28, <8 x float> %f23) | |
%f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f28, <8 x float> %f24) | |
%f33_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f33 = shufflevector <8 x float> %f33_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f34_1 = insertelement <8 x float> undef, float 0x401921fb60000000, i32 0 | |
%f34 = shufflevector <8 x float> %f34_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r35 = bitcast <8 x i32> %self to <8 x i32> | |
%f36 = fsub <8 x float> %f30, %f2 | |
%f37 = fsub <8 x float> %f31, %f3 | |
%f38 = fsub <8 x float> %f32, %f4 | |
%f39 = fmul <8 x float> %f36, %f36 | |
%f40 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f37, <8 x float> %f37, <8 x float> %f39) | |
%f41 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f38, <8 x float> %f38, <8 x float> %f40) | |
%f42 = fdiv <8 x float> %f33, %f41 | |
%f43 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f42) | |
%f44 = fmul <8 x float> %f36, %f43 | |
%f45 = fmul <8 x float> %f37, %f43 | |
%f46 = fmul <8 x float> %f38, %f43 | |
%f47 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f44, <8 x float> %f1, <8 x float> %f2) | |
%f48 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f45, <8 x float> %f1, <8 x float> %f3) | |
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f46, <8 x float> %f1, <8 x float> %f4) | |
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f47, <8 x float> %f20) | |
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f47, <8 x float> %f21) | |
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f48, <8 x float> %f50) | |
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f48, <8 x float> %f51) | |
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f49, <8 x float> %f52) | |
%f55 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f49, <8 x float> %f53) | |
%f56 = fneg <8 x float> %f55 | |
%f57 = fmul <8 x float> %f5, %f56 | |
%f58 = fmul <8 x float> %f6, %f56 | |
%f59 = fmul <8 x float> %f7, %f56 | |
%f60 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f54, <8 x float> %f57) | |
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f54, <8 x float> %f58) | |
%f62 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f10, <8 x float> %f54, <8 x float> %f59) | |
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f29, <8 x float> %f60) | |
%f64 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f12, <8 x float> %f29, <8 x float> %f61) | |
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f29, <8 x float> %f62) | |
%f66 = fmul <8 x float> %f63, %f34 | |
%f67 = fmul <8 x float> %f64, %f34 | |
%f68 = fmul <8 x float> %f65, %f34 | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f67, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f47, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f48, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f49, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>* | |
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_5_2 | |
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32 | |
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>* | |
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32 | |
%out_6_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_6_2 | |
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>* | |
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r35, <8 x i32> %out_8_2 | |
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32 | |
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>* | |
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32 | |
%out_13_3 = select <8 x i1> %mask, <8 x float> %f68, <8 x float> %out_13_2 | |
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32 | |
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>* | |
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32 | |
%out_14_3 = select <8 x i1> %mask, <8 x float> %f66, <8 x float> %out_14_2 | |
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32 | |
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>* | |
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32 | |
%out_15_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_15_2 | |
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32 | |
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>* | |
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32 | |
%out_16_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_16_2 | |
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32 | |
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704 | |
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>* | |
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32 | |
%out_17_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_17_2 | |
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32 | |
ret void; | |
} | |
define void @func_dcfca7b5b523c613a93412a4c52b1f7a(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Shape::compute_surface_interaction() | |
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>* | |
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32 | |
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>* | |
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32 | |
%r3_i0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%r3_i1 = bitcast i8* %r3_i0 to <8 x i32>* | |
%r3 = load <8 x i32>, <8 x i32>* %r3_i1, align 32 | |
%f4_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f5_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f6_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p7 = bitcast <8 x i1> %mask to <8 x i1> | |
%r8_1 = insertelement <8 x i32> undef, i32 3, i32 0 | |
%r8 = shufflevector <8 x i32> %r8_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r9 = mul <8 x i32> %r3, %r8 | |
%r10_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r10 = shufflevector <8 x i32> %r10_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r11 = add <8 x i32> %r9, %r10 | |
%r12_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r13 = add <8 x i32> %r9, %r12 | |
%f14 = fsub <8 x float> %f5, %f1 | |
%f15 = fsub <8 x float> %f14, %f2 | |
%r16 = bitcast <8 x i32> %self to <8 x i32> | |
%rd17_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%rd17_p2 = getelementptr inbounds i8, i8* %rd17_p1, <8 x i32> %offsets | |
%rd17_p3 = bitcast <8 x i8*> %rd17_p2 to <8 x i64*> | |
%rd17_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd17_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd17 = inttoptr <8 x i64> %rd17_p4 to <8 x i8*> | |
%r18_0 = bitcast <8 x i8*> %rd17 to <8 x i32*> | |
%r18_1 = getelementptr i32, <8 x i32*> %r18_0, <8 x i32> %r9 | |
%r18 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r18_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer) | |
%r19_0 = bitcast <8 x i8*> %rd17 to <8 x i32*> | |
%r19_1 = getelementptr i32, <8 x i32*> %r19_0, <8 x i32> %r11 | |
%r19 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r19_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer) | |
%r20_0 = bitcast <8 x i8*> %rd17 to <8 x i32*> | |
%r20_1 = getelementptr i32, <8 x i32*> %r20_0, <8 x i32> %r13 | |
%r20 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r20_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer) | |
%r21 = mul <8 x i32> %r18, %r8 | |
%r22 = add <8 x i32> %r21, %r10 | |
%r23 = add <8 x i32> %r21, %r12 | |
%rd24_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%rd24_p2 = getelementptr inbounds i8, i8* %rd24_p1, <8 x i32> %offsets | |
%rd24_p3 = bitcast <8 x i8*> %rd24_p2 to <8 x i64*> | |
%rd24_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd24_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd24 = inttoptr <8 x i64> %rd24_p4 to <8 x i8*> | |
%f25_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f25_1 = getelementptr float, <8 x float*> %f25_0, <8 x i32> %r21 | |
%f25 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f25_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f26_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f26_1 = getelementptr float, <8 x float*> %f26_0, <8 x i32> %r22 | |
%f26 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f26_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f27_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f27_1 = getelementptr float, <8 x float*> %f27_0, <8 x i32> %r23 | |
%f27 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f27_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%r28 = mul <8 x i32> %r19, %r8 | |
%r29 = add <8 x i32> %r28, %r10 | |
%r30 = add <8 x i32> %r28, %r12 | |
%f31_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f31_1 = getelementptr float, <8 x float*> %f31_0, <8 x i32> %r28 | |
%f31 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f31_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f32_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f32_1 = getelementptr float, <8 x float*> %f32_0, <8 x i32> %r29 | |
%f32 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f32_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f33_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f33_1 = getelementptr float, <8 x float*> %f33_0, <8 x i32> %r30 | |
%f33 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f33_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%r34 = mul <8 x i32> %r20, %r8 | |
%r35 = add <8 x i32> %r34, %r10 | |
%r36 = add <8 x i32> %r34, %r12 | |
%f37_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f37_1 = getelementptr float, <8 x float*> %f37_0, <8 x i32> %r34 | |
%f37 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f37_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f38_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f38_1 = getelementptr float, <8 x float*> %f38_0, <8 x i32> %r35 | |
%f38 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f38_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f39_0 = bitcast <8 x i8*> %rd24 to <8 x float*> | |
%f39_1 = getelementptr float, <8 x float*> %f39_0, <8 x i32> %r36 | |
%f39 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f39_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f40 = fsub <8 x float> %f31, %f25 | |
%f41 = fsub <8 x float> %f32, %f26 | |
%f42 = fsub <8 x float> %f33, %f27 | |
%f43 = fsub <8 x float> %f37, %f25 | |
%f44 = fsub <8 x float> %f38, %f26 | |
%f45 = fsub <8 x float> %f39, %f27 | |
%f46 = fmul <8 x float> %f37, %f2 | |
%f47 = fmul <8 x float> %f38, %f2 | |
%f48 = fmul <8 x float> %f39, %f2 | |
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f31, <8 x float> %f1, <8 x float> %f46) | |
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f32, <8 x float> %f1, <8 x float> %f47) | |
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f33, <8 x float> %f1, <8 x float> %f48) | |
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f15, <8 x float> %f49) | |
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f15, <8 x float> %f50) | |
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f15, <8 x float> %f51) | |
%f55 = fmul <8 x float> %f42, %f44 | |
%f56 = fmul <8 x float> %f40, %f45 | |
%f57 = fmul <8 x float> %f41, %f43 | |
%f58 = fneg <8 x float> %f55 | |
%f59 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f41, <8 x float> %f45, <8 x float> %f58) | |
%f60 = fneg <8 x float> %f56 | |
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f42, <8 x float> %f43, <8 x float> %f60) | |
%f62 = fneg <8 x float> %f57 | |
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f40, <8 x float> %f44, <8 x float> %f62) | |
%f64 = fmul <8 x float> %f59, %f59 | |
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f61, <8 x float> %f64) | |
%f66 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f63, <8 x float> %f65) | |
%f67 = fdiv <8 x float> %f5, %f66 | |
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67) | |
%f69 = fmul <8 x float> %f59, %f68 | |
%f70 = fmul <8 x float> %f61, %f68 | |
%f71 = fmul <8 x float> %f63, %f68 | |
%p72 = fcmp oge <8 x float> %f71, %f4 | |
%f73 = select <8 x i1> %p72, <8 x float> %f5, <8 x float> %f6 | |
%f74 = fadd <8 x float> %f73, %f71 | |
%f75 = fdiv <8 x float> %f5, %f74 | |
%f76 = fneg <8 x float> %f75 | |
%f77 = fmul <8 x float> %f69, %f70 | |
%f78 = fmul <8 x float> %f77, %f76 | |
%f79 = fmul <8 x float> %f69, %f69 | |
%f80 = fmul <8 x float> %f79, %f76 | |
%f81 = fneg <8 x float> %f80 | |
%f82 = select <8 x i1> %p72, <8 x float> %f80, <8 x float> %f81 | |
%f83 = fadd <8 x float> %f82, %f5 | |
%f84 = fneg <8 x float> %f78 | |
%f85 = select <8 x i1> %p72, <8 x float> %f78, <8 x float> %f84 | |
%f86 = fneg <8 x float> %f69 | |
%f87 = select <8 x i1> %p72, <8 x float> %f86, <8 x float> %f69 | |
%rd88_p1 = getelementptr inbounds i8, i8* %data, i32 16 | |
%rd88_p2 = getelementptr inbounds i8, i8* %rd88_p1, <8 x i32> %offsets | |
%rd88_p3 = bitcast <8 x i8*> %rd88_p2 to <8 x i64*> | |
%rd88_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd88_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd88 = inttoptr <8 x i64> %rd88_p4 to <8 x i8*> | |
%f89_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f89_1 = getelementptr float, <8 x float*> %f89_0, <8 x i32> %r21 | |
%f89 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f89_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f90_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f90_1 = getelementptr float, <8 x float*> %f90_0, <8 x i32> %r22 | |
%f90 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f90_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f91_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f91_1 = getelementptr float, <8 x float*> %f91_0, <8 x i32> %r23 | |
%f91 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f91_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f92_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f92_1 = getelementptr float, <8 x float*> %f92_0, <8 x i32> %r28 | |
%f92 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f92_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f93_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f93_1 = getelementptr float, <8 x float*> %f93_0, <8 x i32> %r29 | |
%f93 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f93_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f94_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f94_1 = getelementptr float, <8 x float*> %f94_0, <8 x i32> %r30 | |
%f94 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f94_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f95_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f95_1 = getelementptr float, <8 x float*> %f95_0, <8 x i32> %r34 | |
%f95 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f95_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f96_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f96_1 = getelementptr float, <8 x float*> %f96_0, <8 x i32> %r35 | |
%f96 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f96_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f97_0 = bitcast <8 x i8*> %rd88 to <8 x float*> | |
%f97_1 = getelementptr float, <8 x float*> %f97_0, <8 x i32> %r36 | |
%f97 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f97_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer) | |
%f98 = fmul <8 x float> %f89, %f15 | |
%f99 = fmul <8 x float> %f90, %f15 | |
%f100 = fmul <8 x float> %f91, %f15 | |
%f101 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f92, <8 x float> %f1, <8 x float> %f98) | |
%f102 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f93, <8 x float> %f1, <8 x float> %f99) | |
%f103 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f94, <8 x float> %f1, <8 x float> %f100) | |
%f104 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f95, <8 x float> %f2, <8 x float> %f101) | |
%f105 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f2, <8 x float> %f102) | |
%f106 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f2, <8 x float> %f103) | |
%f107 = fmul <8 x float> %f104, %f104 | |
%f108 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f105, <8 x float> %f105, <8 x float> %f107) | |
%f109 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f106, <8 x float> %f106, <8 x float> %f108) | |
%f110 = fdiv <8 x float> %f5, %f109 | |
%f111 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f110) | |
%f112 = fmul <8 x float> %f104, %f111 | |
%f113 = fmul <8 x float> %f105, %f111 | |
%f114 = fmul <8 x float> %f106, %f111 | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f85, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f54, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>* | |
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x float> %f69, <8 x float> %out_5_2 | |
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32 | |
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>* | |
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32 | |
%out_6_3 = select <8 x i1> %mask, <8 x float> %f70, <8 x float> %out_6_2 | |
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f71, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>* | |
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r16, <8 x i32> %out_8_2 | |
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32 | |
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>* | |
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32 | |
%out_13_3 = select <8 x i1> %mask, <8 x float> %f87, <8 x float> %out_13_2 | |
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32 | |
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>* | |
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32 | |
%out_14_3 = select <8 x i1> %mask, <8 x float> %f83, <8 x float> %out_14_2 | |
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32 | |
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>* | |
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32 | |
%out_15_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_15_2 | |
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32 | |
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>* | |
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32 | |
%out_16_3 = select <8 x i1> %mask, <8 x float> %f113, <8 x float> %out_16_2 | |
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32 | |
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704 | |
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>* | |
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32 | |
%out_17_3 = select <8 x i1> %mask, <8 x float> %f112, <8 x float> %out_17_2 | |
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32 | |
ret void; | |
} | |
define void @func_bf888b7a279d5208759c322da4df8aa9(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Emitter::pdf_direction() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 0 | |
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>* | |
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32 | |
%f3_i0 = getelementptr inbounds i8, i8* %params, i64 32 | |
%f3_i1 = bitcast i8* %f3_i0 to <8 x float>* | |
%f3 = load <8 x float>, <8 x float>* %f3_i1, align 32 | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 64 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 96 | |
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>* | |
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32 | |
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 128 | |
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>* | |
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32 | |
%f7_i0 = getelementptr inbounds i8, i8* %params, i64 160 | |
%f7_i1 = bitcast i8* %f7_i0 to <8 x float>* | |
%f7 = load <8 x float>, <8 x float>* %f7_i1, align 32 | |
%f8_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f8_i1 = bitcast i8* %f8_i0 to <8 x float>* | |
%f8 = load <8 x float>, <8 x float>* %f8_i1, align 32 | |
%f9 = fmul <8 x float> %f5, %f2 | |
%f10 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f6, <8 x float> %f3, <8 x float> %f9) | |
%f11 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f7, <8 x float> %f4, <8 x float> %f10) | |
%f12_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f12 = shufflevector <8 x float> %f12_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p13 = fcmp olt <8 x float> %f11, %f12 | |
%f14_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f14 = shufflevector <8 x float> %f14_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f15_0 = bitcast <8 x float> %f11 to <8 x i32> | |
%f15_1 = bitcast <8 x float> %f14 to <8 x i32> | |
%f15_2 = and <8 x i32> %f15_0, %f15_1 | |
%f15 = bitcast <8 x i32> %f15_2 to <8 x float> | |
%p16 = fcmp one <8 x float> %f15, %f12 | |
%f17 = fmul <8 x float> %f8, %f8 | |
%f18 = fdiv <8 x float> %f17, %f15 | |
%f19 = select <8 x i1> %p16, <8 x float> %f18, <8 x float> zeroinitializer | |
%f20 = fmul <8 x float> %f1, %f19 | |
%f21 = select <8 x i1> %p13, <8 x float> %f20, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
ret void; | |
} | |
define void @func_972f6c492a7ec6284cf60b520b91aba3(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Emitter::eval() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 96 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p6 = fcmp ogt <8 x float> %f4, %f5 | |
%f7 = select <8 x i1> %p6, <8 x float> %f1, <8 x float> zeroinitializer | |
%f8 = select <8 x i1> %p6, <8 x float> %f2, <8 x float> zeroinitializer | |
%f9 = select <8 x i1> %p6, <8 x float> %f3, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 160 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f7, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f8, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f9, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
ret void; | |
} | |
define void @func_19ee132d0bd6c9e577b7250c22c38555(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::Emitter::sample_direction() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 48 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 52 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 56 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 24 | |
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets | |
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*> | |
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 28 | |
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets | |
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*> | |
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 96 | |
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>* | |
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32 | |
%f7_i0 = getelementptr inbounds i8, i8* %params, i64 0 | |
%f7_i1 = bitcast i8* %f7_i0 to <8 x float>* | |
%f7 = load <8 x float>, <8 x float>* %f7_i1, align 32 | |
%f8_i0 = getelementptr inbounds i8, i8* %params, i64 32 | |
%f8_i1 = bitcast i8* %f8_i0 to <8 x float>* | |
%f8 = load <8 x float>, <8 x float>* %f8_i1, align 32 | |
%f9_i0 = getelementptr inbounds i8, i8* %params, i64 64 | |
%f9_i1 = bitcast i8* %f9_i0 to <8 x float>* | |
%f9 = load <8 x float>, <8 x float>* %f9_i1, align 32 | |
%f10_i0 = getelementptr inbounds i8, i8* %params, i64 128 | |
%f10_i1 = bitcast i8* %f10_i0 to <8 x float>* | |
%f10 = load <8 x float>, <8 x float>* %f10_i1, align 32 | |
%p11 = bitcast <8 x i1> %mask to <8 x i1> | |
%r12_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%f13_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0 | |
%f13 = shufflevector <8 x float> %f13_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f14 = fmul <8 x float> %f10, %f4 | |
%r15_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r15 = shufflevector <8 x i32> %r15_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%rd16_p1 = getelementptr inbounds i8, i8* %data, i32 16 | |
%rd16_p2 = getelementptr inbounds i8, i8* %rd16_p1, <8 x i32> %offsets | |
%rd16_p3 = bitcast <8 x i8*> %rd16_p2 to <8 x i64*> | |
%rd16_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd16_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd16 = inttoptr <8 x i64> %rd16_p4 to <8 x i8*> | |
%f17_0 = bitcast <8 x i8*> %rd16 to <8 x float*> | |
%f17_1 = getelementptr float, <8 x float*> %f17_0, <8 x i32> %r12 | |
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%p18 = fcmp olt <8 x float> %f17, %f14 | |
%r19 = select <8 x i1> %p18, <8 x i32> %r15, <8 x i32> zeroinitializer | |
%rd20_p1 = getelementptr inbounds i8, i8* %data, i32 32 | |
%rd20_p2 = getelementptr inbounds i8, i8* %rd20_p1, <8 x i32> %offsets | |
%rd20_p3 = bitcast <8 x i8*> %rd20_p2 to <8 x i64*> | |
%rd20_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd20_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd20 = inttoptr <8 x i64> %rd20_p4 to <8 x i8*> | |
%f21_0 = bitcast <8 x i8*> %rd20 to <8 x float*> | |
%f21_1 = getelementptr float, <8 x float*> %f21_0, <8 x i32> %r19 | |
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f22 = fmul <8 x float> %f21, %f5 | |
%r23 = sub <8 x i32> %r19, %r15 | |
%p24 = icmp ugt <8 x i32> %r19, %r12 | |
%p25 = and <8 x i1> %p24, %p11 | |
%f26_0 = bitcast <8 x i8*> %rd16 to <8 x float*> | |
%f26_1 = getelementptr float, <8 x float*> %f26_0, <8 x i32> %r23 | |
%f26 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f26_1, i32 4, <8 x i1> %p25, <8 x float> zeroinitializer) | |
%f27 = fmul <8 x float> %f26, %f5 | |
%f28 = fsub <8 x float> %f10, %f27 | |
%f29 = fdiv <8 x float> %f28, %f22 | |
%r30_1 = insertelement <8 x i32> undef, i32 3, i32 0 | |
%r30 = shufflevector <8 x i32> %r30_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r31 = mul <8 x i32> %r19, %r30 | |
%r32 = add <8 x i32> %r31, %r15 | |
%r33_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r34 = add <8 x i32> %r31, %r33 | |
%rd35_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%rd35_p2 = getelementptr inbounds i8, i8* %rd35_p1, <8 x i32> %offsets | |
%rd35_p3 = bitcast <8 x i8*> %rd35_p2 to <8 x i64*> | |
%rd35_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd35_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd35 = inttoptr <8 x i64> %rd35_p4 to <8 x i8*> | |
%r36_0 = bitcast <8 x i8*> %rd35 to <8 x i32*> | |
%r36_1 = getelementptr i32, <8 x i32*> %r36_0, <8 x i32> %r31 | |
%r36 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r36_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer) | |
%r37_0 = bitcast <8 x i8*> %rd35 to <8 x i32*> | |
%r37_1 = getelementptr i32, <8 x i32*> %r37_0, <8 x i32> %r32 | |
%r37 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r37_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer) | |
%r38_0 = bitcast <8 x i8*> %rd35 to <8 x i32*> | |
%r38_1 = getelementptr i32, <8 x i32*> %r38_0, <8 x i32> %r34 | |
%r38 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r38_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer) | |
%r39 = mul <8 x i32> %r36, %r30 | |
%r40 = add <8 x i32> %r39, %r15 | |
%r41 = add <8 x i32> %r39, %r33 | |
%rd42_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%rd42_p2 = getelementptr inbounds i8, i8* %rd42_p1, <8 x i32> %offsets | |
%rd42_p3 = bitcast <8 x i8*> %rd42_p2 to <8 x i64*> | |
%rd42_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd42_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd42 = inttoptr <8 x i64> %rd42_p4 to <8 x i8*> | |
%f43_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f43_1 = getelementptr float, <8 x float*> %f43_0, <8 x i32> %r39 | |
%f43 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f43_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f44_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f44_1 = getelementptr float, <8 x float*> %f44_0, <8 x i32> %r40 | |
%f44 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f44_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f45_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f45_1 = getelementptr float, <8 x float*> %f45_0, <8 x i32> %r41 | |
%f45 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f45_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%r46 = mul <8 x i32> %r37, %r30 | |
%r47 = add <8 x i32> %r46, %r15 | |
%r48 = add <8 x i32> %r46, %r33 | |
%f49_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f49_1 = getelementptr float, <8 x float*> %f49_0, <8 x i32> %r46 | |
%f49 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f49_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f50_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f50_1 = getelementptr float, <8 x float*> %f50_0, <8 x i32> %r47 | |
%f50 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f50_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f51_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f51_1 = getelementptr float, <8 x float*> %f51_0, <8 x i32> %r48 | |
%f51 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f51_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%r52 = mul <8 x i32> %r38, %r30 | |
%r53 = add <8 x i32> %r52, %r15 | |
%r54 = add <8 x i32> %r52, %r33 | |
%f55_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f55_1 = getelementptr float, <8 x float*> %f55_0, <8 x i32> %r52 | |
%f55 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f55_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f56_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f56_1 = getelementptr float, <8 x float*> %f56_0, <8 x i32> %r53 | |
%f56 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f56_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f57_0 = bitcast <8 x i8*> %rd42 to <8 x float*> | |
%f57_1 = getelementptr float, <8 x float*> %f57_0, <8 x i32> %r54 | |
%f57 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f57_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f58 = fsub <8 x float> %f49, %f43 | |
%f59 = fsub <8 x float> %f50, %f44 | |
%f60 = fsub <8 x float> %f51, %f45 | |
%f61 = fsub <8 x float> %f55, %f43 | |
%f62 = fsub <8 x float> %f56, %f44 | |
%f63 = fsub <8 x float> %f57, %f45 | |
%f64_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f64 = shufflevector <8 x float> %f64_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f65 = fsub <8 x float> %f64, %f6 | |
%f66_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f66 = shufflevector <8 x float> %f66_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f67 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f65, <8 x float> %f66) | |
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67) | |
%f69 = fsub <8 x float> %f64, %f68 | |
%f70 = fmul <8 x float> %f68, %f29 | |
%f71 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f70, <8 x float> %f43) | |
%f72 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f62, <8 x float> %f70, <8 x float> %f44) | |
%f73 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f70, <8 x float> %f45) | |
%f74 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f58, <8 x float> %f69, <8 x float> %f71) | |
%f75 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f69, <8 x float> %f72) | |
%f76 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f60, <8 x float> %f69, <8 x float> %f73) | |
%rd77_p1 = getelementptr inbounds i8, i8* %data, i32 40 | |
%rd77_p2 = getelementptr inbounds i8, i8* %rd77_p1, <8 x i32> %offsets | |
%rd77_p3 = bitcast <8 x i8*> %rd77_p2 to <8 x i64*> | |
%rd77_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd77_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer) | |
%rd77 = inttoptr <8 x i64> %rd77_p4 to <8 x i8*> | |
%f78_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f78_1 = getelementptr float, <8 x float*> %f78_0, <8 x i32> %r39 | |
%f78 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f78_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f79_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f79_1 = getelementptr float, <8 x float*> %f79_0, <8 x i32> %r40 | |
%f79 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f79_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f80_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f80_1 = getelementptr float, <8 x float*> %f80_0, <8 x i32> %r41 | |
%f80 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f80_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f81_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f81_1 = getelementptr float, <8 x float*> %f81_0, <8 x i32> %r46 | |
%f81 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f81_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f82_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f82_1 = getelementptr float, <8 x float*> %f82_0, <8 x i32> %r47 | |
%f82 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f82_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f83_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f83_1 = getelementptr float, <8 x float*> %f83_0, <8 x i32> %r48 | |
%f83 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f83_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f84_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f84_1 = getelementptr float, <8 x float*> %f84_0, <8 x i32> %r52 | |
%f84 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f84_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f85_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f85_1 = getelementptr float, <8 x float*> %f85_0, <8 x i32> %r53 | |
%f85 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f85_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f86_0 = bitcast <8 x i8*> %rd77 to <8 x float*> | |
%f86_1 = getelementptr float, <8 x float*> %f86_0, <8 x i32> %r54 | |
%f86 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f86_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer) | |
%f87 = fsub <8 x float> %f64, %f69 | |
%f88 = fsub <8 x float> %f87, %f70 | |
%f89 = fmul <8 x float> %f84, %f70 | |
%f90 = fmul <8 x float> %f85, %f70 | |
%f91 = fmul <8 x float> %f86, %f70 | |
%f92 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f81, <8 x float> %f69, <8 x float> %f89) | |
%f93 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f82, <8 x float> %f69, <8 x float> %f90) | |
%f94 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f83, <8 x float> %f69, <8 x float> %f91) | |
%f95 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f78, <8 x float> %f88, <8 x float> %f92) | |
%f96 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f79, <8 x float> %f88, <8 x float> %f93) | |
%f97 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f80, <8 x float> %f88, <8 x float> %f94) | |
%f98 = fmul <8 x float> %f95, %f95 | |
%f99 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f96, <8 x float> %f98) | |
%f100 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f97, <8 x float> %f99) | |
%f101 = fdiv <8 x float> %f64, %f100 | |
%f102 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f101) | |
%f103 = fmul <8 x float> %f95, %f102 | |
%f104 = fmul <8 x float> %f96, %f102 | |
%f105 = fmul <8 x float> %f97, %f102 | |
%f106 = fsub <8 x float> %f74, %f7 | |
%f107 = fsub <8 x float> %f75, %f8 | |
%f108 = fsub <8 x float> %f76, %f9 | |
%f109 = fmul <8 x float> %f106, %f106 | |
%f110 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f107, <8 x float> %f107, <8 x float> %f109) | |
%f111 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f108, <8 x float> %f108, <8 x float> %f110) | |
%f112 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f111) | |
%f113 = fdiv <8 x float> %f64, %f112 | |
%f114 = fmul <8 x float> %f106, %f113 | |
%f115 = fmul <8 x float> %f107, %f113 | |
%f116 = fmul <8 x float> %f108, %f113 | |
%f117 = fmul <8 x float> %f114, %f103 | |
%f118 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f115, <8 x float> %f104, <8 x float> %f117) | |
%f119 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f116, <8 x float> %f105, <8 x float> %f118) | |
%f120_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f120 = shufflevector <8 x float> %f120_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f121_0 = bitcast <8 x float> %f119 to <8 x i32> | |
%f121_1 = bitcast <8 x float> %f120 to <8 x i32> | |
%f121_2 = and <8 x i32> %f121_0, %f121_1 | |
%f121 = bitcast <8 x i32> %f121_2 to <8 x float> | |
%f122 = fdiv <8 x float> %f111, %f121 | |
%f123_0 = bitcast <8 x float> %f122 to <8 x i32> | |
%f123_1 = bitcast <8 x float> %f120 to <8 x i32> | |
%f123_2 = and <8 x i32> %f123_0, %f123_1 | |
%f123 = bitcast <8 x i32> %f123_2 to <8 x float> | |
%p124 = fcmp olt <8 x float> %f123, %f13 | |
%f125 = select <8 x i1> %p124, <8 x float> %f122, <8 x float> zeroinitializer | |
%f126 = fmul <8 x float> %f5, %f125 | |
%p127 = fcmp olt <8 x float> %f119, %f66 | |
%p128 = fcmp one <8 x float> %f126, %f66 | |
%p129 = and <8 x i1> %p127, %p128 | |
%f130 = fdiv <8 x float> %f64, %f126 | |
%f131 = fmul <8 x float> %f1, %f130 | |
%f132 = fmul <8 x float> %f2, %f130 | |
%f133 = fmul <8 x float> %f3, %f130 | |
%f134 = select <8 x i1> %p129, <8 x float> %f131, <8 x float> zeroinitializer | |
%f135 = select <8 x i1> %p129, <8 x float> %f132, <8 x float> zeroinitializer | |
%f136 = select <8 x i1> %p129, <8 x float> %f133, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 160 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f74, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f75, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f76, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>* | |
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x float> %f126, <8 x float> %out_8_2 | |
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32 | |
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>* | |
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32 | |
%out_9_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_9_2 | |
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32 | |
%out_10_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_10_1 = bitcast i8* %out_10_0 to <8 x float>* | |
%out_10_2 = load <8 x float>, <8 x float>* %out_10_1, align 32 | |
%out_10_3 = select <8 x i1> %mask, <8 x float> %f115, <8 x float> %out_10_2 | |
store <8 x float> %out_10_3, <8 x float>* %out_10_1, align 32 | |
%out_11_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_11_1 = bitcast i8* %out_11_0 to <8 x float>* | |
%out_11_2 = load <8 x float>, <8 x float>* %out_11_1, align 32 | |
%out_11_3 = select <8 x i1> %mask, <8 x float> %f116, <8 x float> %out_11_2 | |
store <8 x float> %out_11_3, <8 x float>* %out_11_1, align 32 | |
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>* | |
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32 | |
%out_14_3 = select <8 x i1> %mask, <8 x float> %f134, <8 x float> %out_14_2 | |
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32 | |
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>* | |
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32 | |
%out_15_3 = select <8 x i1> %mask, <8 x float> %f135, <8 x float> %out_15_2 | |
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32 | |
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>* | |
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32 | |
%out_16_3 = select <8 x i1> %mask, <8 x float> %f136, <8 x float> %out_16_2 | |
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32 | |
ret void; | |
} | |
define void @func_e9d5179176fa4aaf3f8d2cb1ff4d2c84(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::eval_pdf() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>* | |
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32 | |
%f6_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p7 = fcmp ogt <8 x float> %f4, %f6 | |
%p8 = fcmp ogt <8 x float> %f5, %f6 | |
%p9 = and <8 x i1> %p7, %p8 | |
%f10_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11 = fmul <8 x float> %f1, %f10 | |
%f12 = fmul <8 x float> %f2, %f10 | |
%f13 = fmul <8 x float> %f3, %f10 | |
%f14 = fmul <8 x float> %f11, %f5 | |
%f15 = fmul <8 x float> %f12, %f5 | |
%f16 = fmul <8 x float> %f13, %f5 | |
%f17 = fmul <8 x float> %f10, %f5 | |
%f18 = select <8 x i1> %p9, <8 x float> %f14, <8 x float> zeroinitializer | |
%f19 = select <8 x i1> %p9, <8 x float> %f15, <8 x float> zeroinitializer | |
%f20 = select <8 x i1> %p9, <8 x float> %f16, <8 x float> zeroinitializer | |
%f21 = select <8 x i1> %p9, <8 x float> %f17, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f18, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f19, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f20, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
ret void; | |
} | |
define void @func_9d53124ccfd48ba20820a9ee3e480cd9(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::eval_pdf() | |
%f1_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f1 = shufflevector <8 x float> %f1_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
ret void; | |
} | |
define void @func_4448b4670e928f754bf33368168c3f24(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::eval_pdf() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>* | |
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32 | |
%f6_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p7 = fcmp ogt <8 x float> %f4, %f6 | |
%p8 = fcmp ogt <8 x float> %f5, %f6 | |
%p9 = and <8 x i1> %p7, %p8 | |
%f10_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11 = fmul <8 x float> %f10, %f5 | |
%f12 = select <8 x i1> %p9, <8 x float> %f11, <8 x float> zeroinitializer | |
%f13 = fmul <8 x float> %f1, %f10 | |
%f14 = fmul <8 x float> %f2, %f10 | |
%f15 = fmul <8 x float> %f3, %f10 | |
%f16 = fmul <8 x float> %f13, %f5 | |
%f17 = fmul <8 x float> %f14, %f5 | |
%f18 = fmul <8 x float> %f15, %f5 | |
%f19 = select <8 x i1> %p9, <8 x float> %f16, <8 x float> zeroinitializer | |
%f20 = select <8 x i1> %p9, <8 x float> %f17, <8 x float> zeroinitializer | |
%f21 = select <8 x i1> %p9, <8 x float> %f18, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f19, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f20, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f12, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
ret void; | |
} | |
define void @func_60a1a348aca7bdb47e851f6cedce38a2(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::sample() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 352 | |
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>* | |
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32 | |
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 384 | |
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>* | |
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32 | |
%f7_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p8 = fcmp ogt <8 x float> %f4, %f7 | |
%f9_1 = insertelement <8 x float> undef, float 0x4000000000000000, i32 0 | |
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f10_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f5, <8 x float> %f10) | |
%f12 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f6, <8 x float> %f10) | |
%p13 = fcmp oeq <8 x float> %f11, %f7 | |
%p14 = fcmp oeq <8 x float> %f12, %f7 | |
%p15 = and <8 x i1> %p13, %p14 | |
%f16_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f16 = shufflevector <8 x float> %f16_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f17_0 = bitcast <8 x float> %f11 to <8 x i32> | |
%f17_1 = bitcast <8 x float> %f16 to <8 x i32> | |
%f17_2 = and <8 x i32> %f17_0, %f17_1 | |
%f17 = bitcast <8 x i32> %f17_2 to <8 x float> | |
%f18_0 = bitcast <8 x float> %f12 to <8 x i32> | |
%f18_1 = bitcast <8 x float> %f16 to <8 x i32> | |
%f18_2 = and <8 x i32> %f18_0, %f18_1 | |
%f18 = bitcast <8 x i32> %f18_2 to <8 x float> | |
%p19 = fcmp olt <8 x float> %f17, %f18 | |
%f20 = select <8 x i1> %p19, <8 x float> %f12, <8 x float> %f11 | |
%f21 = select <8 x i1> %p19, <8 x float> %f11, <8 x float> %f12 | |
%f22_1 = insertelement <8 x float> undef, float 0x3fe921fb60000000, i32 0 | |
%f22 = shufflevector <8 x float> %f22_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f23 = fmul <8 x float> %f22, %f21 | |
%f24 = fdiv <8 x float> %f23, %f20 | |
%f25_1 = insertelement <8 x float> undef, float 0x3ff921fb60000000, i32 0 | |
%f25 = shufflevector <8 x float> %f25_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f26 = fsub <8 x float> %f25, %f24 | |
%f27 = select <8 x i1> %p19, <8 x float> %f26, <8 x float> %f24 | |
%f28 = select <8 x i1> %p15, <8 x float> %f7, <8 x float> %f27 | |
%f29_0 = bitcast <8 x float> %f28 to <8 x i32> | |
%f29_1 = bitcast <8 x float> %f16 to <8 x i32> | |
%f29_2 = and <8 x i32> %f29_0, %f29_1 | |
%f29 = bitcast <8 x i32> %f29_2 to <8 x float> | |
%f30_1 = insertelement <8 x float> undef, float 0x3ff45f3060000000, i32 0 | |
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f31 = fmul <8 x float> %f29, %f30 | |
%r32 = fptosi <8 x float> %f31 to <8 x i32> | |
%r33_1 = insertelement <8 x i32> undef, i32 1, i32 0 | |
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r34 = add <8 x i32> %r32, %r33 | |
%r35_1 = insertelement <8 x i32> undef, i32 4294967294, i32 0 | |
%r35 = shufflevector <8 x i32> %r35_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r36 = and <8 x i32> %r34, %r35 | |
%f37 = sitofp <8 x i32> %r36 to <8 x float> | |
%r38_1 = insertelement <8 x i32> undef, i32 29, i32 0 | |
%r38 = shufflevector <8 x i32> %r38_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r39 = shl <8 x i32> %r36, %r38 | |
%f40 = bitcast <8 x i32> %r39 to <8 x float> | |
%f41_0 = bitcast <8 x float> %f40 to <8 x i32> | |
%f41_1 = bitcast <8 x float> %f28 to <8 x i32> | |
%f41_2 = xor <8 x i32> %f41_0, %f41_1 | |
%f41 = bitcast <8 x i32> %f41_2 to <8 x float> | |
%r42_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r43 = sub <8 x i32> %r36, %r42 | |
%r44 = xor <8 x i32> %r43, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> | |
%r45 = shl <8 x i32> %r44, %r38 | |
%f46 = bitcast <8 x i32> %r45 to <8 x float> | |
%f47_1 = insertelement <8 x float> undef, float 0x3fe9200000000000, i32 0 | |
%f47 = shufflevector <8 x float> %f47_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f48 = fmul <8 x float> %f37, %f47 | |
%f49 = fsub <8 x float> %f29, %f48 | |
%f50_1 = insertelement <8 x float> undef, float 0x3f2fb40000000000, i32 0 | |
%f50 = shufflevector <8 x float> %f50_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f51 = fmul <8 x float> %f37, %f50 | |
%f52 = fsub <8 x float> %f49, %f51 | |
%f53_1 = insertelement <8 x float> undef, float 0x3e64442d20000000, i32 0 | |
%f53 = shufflevector <8 x float> %f53_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f54 = fmul <8 x float> %f37, %f53 | |
%f55 = fsub <8 x float> %f52, %f54 | |
%f56 = fmul <8 x float> %f55, %f55 | |
%f57_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0 | |
%f57 = shufflevector <8 x float> %f57_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p58 = fcmp oeq <8 x float> %f29, %f57 | |
%f59_0 = sext <8 x i1> %p58 to <8 x i32> | |
%f59_1 = bitcast <8 x float> %f56 to <8 x i32> | |
%f59_2 = or <8 x i32> %f59_0, %f59_1 | |
%f59 = bitcast <8 x i32> %f59_2 to <8 x float> | |
%f60_1 = insertelement <8 x float> undef, float 0xbfc5555460000000, i32 0 | |
%f60 = shufflevector <8 x float> %f60_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f61_1 = insertelement <8 x float> undef, float 0x3f811073c0000000, i32 0 | |
%f61 = shufflevector <8 x float> %f61_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f62_1 = insertelement <8 x float> undef, float 0xbf29943f20000000, i32 0 | |
%f62 = shufflevector <8 x float> %f62_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f61, <8 x float> %f60) | |
%f64 = fmul <8 x float> %f59, %f59 | |
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f64, <8 x float> %f62, <8 x float> %f63) | |
%f66 = fmul <8 x float> %f65, %f59 | |
%f67_1 = insertelement <8 x float> undef, float 0x3fa55554a0000000, i32 0 | |
%f67 = shufflevector <8 x float> %f67_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f68_1 = insertelement <8 x float> undef, float 0xbf56c0c340000000, i32 0 | |
%f68 = shufflevector <8 x float> %f68_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f69_1 = insertelement <8 x float> undef, float 0x3ef99eb9c0000000, i32 0 | |
%f69 = shufflevector <8 x float> %f69_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f70 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f68, <8 x float> %f67) | |
%f71 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f64, <8 x float> %f69, <8 x float> %f70) | |
%f72 = fmul <8 x float> %f71, %f59 | |
%f73 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f66, <8 x float> %f55, <8 x float> %f55) | |
%f74_1 = insertelement <8 x float> undef, float 0xbfe0000000000000, i32 0 | |
%f74 = shufflevector <8 x float> %f74_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f75_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f75 = shufflevector <8 x float> %f75_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f76 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f74, <8 x float> %f75) | |
%f77 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f72, <8 x float> %f59, <8 x float> %f76) | |
%r78 = and <8 x i32> %r36, %r42 | |
%r79_1 = insertelement <8 x i32> undef, i32 0, i32 0 | |
%r79 = shufflevector <8 x i32> %r79_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p80 = icmp eq <8 x i32> %r78, %r79 | |
%f81 = select <8 x i1> %p80, <8 x float> %f73, <8 x float> %f77 | |
%f82_1 = insertelement <8 x float> undef, float 0x8000000000000000, i32 0 | |
%f82 = shufflevector <8 x float> %f82_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f83_0 = bitcast <8 x float> %f82 to <8 x i32> | |
%f83_1 = bitcast <8 x float> %f41 to <8 x i32> | |
%f83_2 = and <8 x i32> %f83_0, %f83_1 | |
%f83 = bitcast <8 x i32> %f83_2 to <8 x float> | |
%f84_0 = bitcast <8 x float> %f81 to <8 x i32> | |
%f84_1 = bitcast <8 x float> %f83 to <8 x i32> | |
%f84_2 = xor <8 x i32> %f84_0, %f84_1 | |
%f84 = bitcast <8 x i32> %f84_2 to <8 x float> | |
%f85 = select <8 x i1> %p80, <8 x float> %f77, <8 x float> %f73 | |
%f86_0 = bitcast <8 x float> %f82 to <8 x i32> | |
%f86_1 = bitcast <8 x float> %f46 to <8 x i32> | |
%f86_2 = and <8 x i32> %f86_0, %f86_1 | |
%f86 = bitcast <8 x i32> %f86_2 to <8 x float> | |
%f87_0 = bitcast <8 x float> %f85 to <8 x i32> | |
%f87_1 = bitcast <8 x float> %f86 to <8 x i32> | |
%f87_2 = xor <8 x i32> %f87_0, %f87_1 | |
%f87 = bitcast <8 x i32> %f87_2 to <8 x float> | |
%f88 = fmul <8 x float> %f20, %f87 | |
%f89 = fmul <8 x float> %f20, %f84 | |
%f90 = fmul <8 x float> %f88, %f88 | |
%f91 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f89, <8 x float> %f89, <8 x float> %f90) | |
%f92 = fsub <8 x float> %f75, %f91 | |
%f93 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f92, <8 x float> %f7) | |
%f94 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f93) | |
%f95_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0 | |
%f95 = shufflevector <8 x float> %f95_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f96 = fmul <8 x float> %f95, %f94 | |
%r97_1 = insertelement <8 x i32> undef, i32 2, i32 0 | |
%r97 = shufflevector <8 x i32> %r97_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%p98 = fcmp ogt <8 x float> %f96, %f7 | |
%p99 = and <8 x i1> %p8, %p98 | |
%f100 = select <8 x i1> %p99, <8 x float> %f1, <8 x float> zeroinitializer | |
%f101 = select <8 x i1> %p99, <8 x float> %f2, <8 x float> zeroinitializer | |
%f102 = select <8 x i1> %p99, <8 x float> %f3, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f88, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f89, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f94, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f96, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f75, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>* | |
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r97, <8 x i32> %out_5_2 | |
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f100, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>* | |
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x float> %f101, <8 x float> %out_8_2 | |
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32 | |
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>* | |
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32 | |
%out_9_3 = select <8 x i1> %mask, <8 x float> %f102, <8 x float> %out_9_2 | |
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32 | |
ret void; | |
} | |
define void @func_e6fff8d6b222436f71518443ce068358(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::sample() | |
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>* | |
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32 | |
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>* | |
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32 | |
%f3_i0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%f3_i1 = bitcast i8* %f3_i0 to <8 x float>* | |
%f3 = load <8 x float>, <8 x float>* %f3_i1, align 32 | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 320 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f6_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0 | |
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f7_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f8_1 = insertelement <8 x float> undef, float 0x3ff8112280000000, i32 0 | |
%f8 = shufflevector <8 x float> %f8_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p9 = fcmp oge <8 x float> %f1, %f5 | |
%f10_1 = insertelement <8 x float> undef, float 0x3fe5462520000000, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11 = select <8 x i1> %p9, <8 x float> %f8, <8 x float> %f10 | |
%f12 = select <8 x i1> %p9, <8 x float> %f10, <8 x float> %f8 | |
%f13 = fneg <8 x float> %f1 | |
%f14 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f1, <8 x float> %f13, <8 x float> %f7) | |
%f15 = fmul <8 x float> %f12, %f12 | |
%f16 = fneg <8 x float> %f15 | |
%f17 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f16, <8 x float> %f7) | |
%f18_0 = bitcast <8 x float> %f1 to <8 x i32> | |
%f18_1 = bitcast <8 x float> %f6 to <8 x i32> | |
%f18_2 = and <8 x i32> %f18_0, %f18_1 | |
%f18 = bitcast <8 x i32> %f18_2 to <8 x float> | |
%f19 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f17, <8 x float> %f5) | |
%f20 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f19) | |
%p21 = fcmp oeq <8 x float> %f18, %f5 | |
%f22 = fneg <8 x float> %f20 | |
%f23 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f22, <8 x float> %f18) | |
%f24 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f20, <8 x float> %f18) | |
%f25 = fdiv <8 x float> %f23, %f24 | |
%f26 = fneg <8 x float> %f18 | |
%f27 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f26, <8 x float> %f20) | |
%f28 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f18, <8 x float> %f20) | |
%f29 = fdiv <8 x float> %f27, %f28 | |
%f30 = fmul <8 x float> %f25, %f25 | |
%f31 = fmul <8 x float> %f29, %f29 | |
%f32 = fadd <8 x float> %f30, %f31 | |
%f33_1 = insertelement <8 x float> undef, float 0x3fe0000000000000, i32 0 | |
%f33 = shufflevector <8 x float> %f33_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f34 = fmul <8 x float> %f33, %f32 | |
%f35 = select <8 x i1> %p21, <8 x float> %f7, <8 x float> %f34 | |
%f36 = select <8 x i1> %p9, <8 x float> %f22, <8 x float> %f20 | |
%f37 = fsub <8 x float> %f7, %f35 | |
%p38 = fcmp ole <8 x float> %f4, %f35 | |
%f39 = select <8 x i1> %p38, <8 x float> %f35, <8 x float> %f37 | |
%p40 = xor <8 x i1> %p38, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> | |
%r41_1 = insertelement <8 x i32> undef, i32 32, i32 0 | |
%r41 = shufflevector <8 x i32> %r41_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r42_1 = insertelement <8 x i32> undef, i32 64, i32 0 | |
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%r43 = select <8 x i1> %p38, <8 x i32> %r41, <8 x i32> %r42 | |
%f44 = fneg <8 x float> %f2 | |
%f45 = fneg <8 x float> %f3 | |
%f46 = fneg <8 x float> %f12 | |
%f47 = fmul <8 x float> %f46, %f2 | |
%f48 = fmul <8 x float> %f46, %f3 | |
%f49 = select <8 x i1> %p38, <8 x float> %f44, <8 x float> %f47 | |
%f50 = select <8 x i1> %p38, <8 x float> %f45, <8 x float> %f48 | |
%f51 = select <8 x i1> %p38, <8 x float> %f1, <8 x float> %f36 | |
%f52 = select <8 x i1> %p38, <8 x float> %f7, <8 x float> %f11 | |
%f53 = select <8 x i1> %p40, <8 x float> %f15, <8 x float> %f7 | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f49, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f50, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f39, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>* | |
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r43, <8 x i32> %out_5_2 | |
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>* | |
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_8_2 | |
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32 | |
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>* | |
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32 | |
%out_9_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_9_2 | |
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32 | |
ret void; | |
} | |
define void @func_48d4a1fb2376a0011da8c81578897b84(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 { | |
entry: | |
; VCall: mitsuba::BSDF::sample() | |
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0 | |
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets | |
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*> | |
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4 | |
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets | |
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*> | |
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8 | |
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets | |
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*> | |
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer) | |
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 256 | |
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>* | |
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32 | |
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 192 | |
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>* | |
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32 | |
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 224 | |
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>* | |
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32 | |
%f7_1 = insertelement <8 x float> undef, float 0x0, i32 0 | |
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%p8 = fcmp ogt <8 x float> %f4, %f7 | |
%f9_1 = insertelement <8 x float> undef, float 0x4000000000000000, i32 0 | |
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f10_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0 | |
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f11_1 = insertelement <8 x float> undef, float 0x3fe0000000000000, i32 0 | |
%f11 = shufflevector <8 x float> %f11_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%r12_1 = insertelement <8 x i32> undef, i32 32, i32 0 | |
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer | |
%f13 = fneg <8 x float> %f5 | |
%f14 = fneg <8 x float> %f6 | |
%f15 = fmul <8 x float> %f4, %f4 | |
%f16 = fsub <8 x float> %f10, %f15 | |
%f17 = fmul <8 x float> %f16, %f16 | |
%f18 = fmul <8 x float> %f2, %f2 | |
%f19 = fmul <8 x float> %f3, %f3 | |
%f20 = fsub <8 x float> %f18, %f19 | |
%f21 = fsub <8 x float> %f20, %f16 | |
%f22 = fmul <8 x float> %f21, %f21 | |
%f23_1 = insertelement <8 x float> undef, float 0x4010000000000000, i32 0 | |
%f23 = shufflevector <8 x float> %f23_1, <8 x float> undef, <8 x i32> zeroinitializer | |
%f24 = fmul <8 x float> %f23, %f3 | |
%f25 = fmul <8 x float> %f24, %f3 | |
%f26 = fmul <8 x float> %f25, %f2 | |
%f27 = fmul <8 x float> %f26, %f2 | |
%f28 = fadd <8 x float> %f22, %f27 | |
%f29 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f28, <8 x float> %f7) | |
%f30 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f29) | |
%f31 = fadd <8 x float> %f30, %f21 | |
%f32 = fmul <8 x float> %f11, %f31 | |
%f33 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f32, <8 x float> %f7) | |
%f34 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f33) | |
%f35 = fadd <8 x float> %f30, %f15 | |
%f36 = fmul <8 x float> %f9, %f4 | |
%f37 = fmul <8 x float> %f36, %f34 | |
%f38 = fsub <8 x float> %f35, %f37 | |
%f39 = fadd <8 x float> %f35, %f37 | |
%f40 = fdiv <8 x float> %f38, %f39 | |
%f41 = fmul <8 x float> %f30, %f15 | |
%f42 = fadd <8 x float> %f41, %f17 | |
%f43 = fmul <8 x float> %f37, %f16 | |
%f44 = fsub <8 x float> %f42, %f43 | |
%f45 = fmul <8 x float> %f40, %f44 | |
%f46 = fadd <8 x float> %f42, %f43 | |
%f47 = fdiv <8 x float> %f45, %f46 | |
%f48 = fadd <8 x float> %f40, %f47 | |
%f49 = fmul <8 x float> %f11, %f48 | |
%f50 = fmul <8 x float> %f1, %f49 | |
%f51 = select <8 x i1> %p8, <8 x float> %f50, <8 x float> zeroinitializer | |
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416 | |
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>* | |
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32 | |
%out_0_3 = select <8 x i1> %mask, <8 x float> %f13, <8 x float> %out_0_2 | |
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32 | |
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448 | |
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>* | |
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32 | |
%out_1_3 = select <8 x i1> %mask, <8 x float> %f14, <8 x float> %out_1_2 | |
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32 | |
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480 | |
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>* | |
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32 | |
%out_2_3 = select <8 x i1> %mask, <8 x float> %f4, <8 x float> %out_2_2 | |
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32 | |
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512 | |
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>* | |
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32 | |
%out_3_3 = select <8 x i1> %mask, <8 x float> %f10, <8 x float> %out_3_2 | |
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32 | |
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544 | |
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>* | |
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32 | |
%out_4_3 = select <8 x i1> %mask, <8 x float> %f10, <8 x float> %out_4_2 | |
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32 | |
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576 | |
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>* | |
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32 | |
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r12, <8 x i32> %out_5_2 | |
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32 | |
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608 | |
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>* | |
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32 | |
%out_7_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_7_2 | |
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32 | |
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640 | |
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>* | |
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32 | |
%out_8_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_8_2 | |
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32 | |
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672 | |
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>* | |
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32 | |
%out_9_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_9_2 | |
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32 | |
ret void; | |
} | |
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) | |
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) | |
declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>) | |
declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>) | |
declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*>, i32, <8 x i1>, <8 x float>) | |
declare <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>) | |
declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) | |
@callables = internal local_unnamed_addr global i8** null, align 8 | |
define void @set_callables(i8** %ptr) local_unnamed_addr #0 { | |
store i8** %ptr, i8*** @callables | |
ret void | |
} | |
declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) | |
declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) | |
declare <8 x float> @llvm.ceil.v8f32(<8 x float>) | |
declare <8 x float> @llvm.floor.v8f32(<8 x float>) | |
define internal void @ek.scatter_add.v8f32(<8 x float*> %ptrs, <8 x float> %value, <8 x i1> %active_in) #0 { | |
L0: | |
br label %L1 | |
L1: | |
%index = phi i32 [ 0, %L0 ], [ %index_next, %L3 ] | |
%active = phi <8 x i1> [ %active_in, %L0 ], [ %active_next_2, %L3 ] | |
%active_i = extractelement <8 x i1> %active, i32 %index | |
br i1 %active_i, label %L2, label %L3 | |
L2: | |
%ptr_0 = extractelement <8 x float *> %ptrs, i32 %index | |
%ptr_1 = insertelement <8 x float *> undef, float* %ptr_0, i32 0 | |
%ptr_2 = shufflevector <8 x float *> %ptr_1, <8 x float *> undef, <8 x i32> zeroinitializer | |
%ptr_eq = icmp eq <8 x float *> %ptrs, %ptr_2 | |
%active_cur = and <8 x i1> %ptr_eq, %active | |
%value_cur = select <8 x i1> %active_cur, <8 x float> %value, <8 x float> zeroinitializer | |
%sum = call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %value_cur) | |
atomicrmw fadd float* %ptr_0, float %sum monotonic | |
%active_next = xor <8 x i1> %active, %active_cur | |
%active_red = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %active_next) | |
br i1 %active_red, label %L3, label %L4 | |
L3: | |
%active_next_2 = phi <8 x i1> [ %active, %L1 ], [ %active_next, %L2 ] | |
%index_next = add nuw nsw i32 %index, 1 | |
%cond_2 = icmp eq i32 %index_next, 8 | |
br i1 %cond_2, label %L4, label %L1 | |
L4: | |
ret void | |
} | |
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) | |
!0 = !{!0} | |
!1 = !{!1, !0} | |
!2 = !{!1} | |
!3 = !{!"llvm.loop.unroll.disable", !"llvm.loop.vectorize.enable", i1 0} | |
attributes #0 = { norecurse nounwind "frame-pointer"="none" "no-builtins" "no-stack-arg-probe" "target-cpu"="skylake" "target-features"="-vzeroupper,-avx512pf,-tsxldtrk,+cx16,+sahf,-tbm,-avx512ifma,-sha,+crc32,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-avx512bf16,-amx-tile,-uintr,-gfni,+popcnt,-ptwrite,+aes,-avx512bitalg,-movdiri,-widekl,+xsaves,-avx512er,-avxvnni,-avx512fp16,-avx512vnni,-amx-bf16,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,+xsavec,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-kl,-movdir64b,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,-serialize,-hreset,+invpcid,-avx512cd,+avx,-vaes,-amx-int8,+cx8,+fma,-rtm,+bmi,-enqcmd,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+fxsr,-wbnoinvd,+sse,+lzcnt,+pclmul,-rdpru,+f16c,+ssse3,+sgx,-prefetchwt1,+cmov,-avx512vbmi,-shstk,+movbe,-avx512vp2intersect,+xsaveopt,-avx512dq,+sse2,+adx,+sse3" } | |
drjit_32c09595272bc74a586d2fe8f7e4726e:469:31: error: use of undefined value '%f253_final' | |
%f253 = phi <8 x float> [ %f253_final, %l_236_tail ], [ %f220, %l_236_start ] | |
^ | |
Aborted (core dumped) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment