Skip to content

Instantly share code, notes, and snippets.

@arpit15
Created September 16, 2022 14:40
Show Gist options
  • Save arpit15/117ba1b0a1ba350d9d417928c67095ab to your computer and use it in GitHub Desktop.
Save arpit15/117ba1b0a1ba350d9d417928c67095ab to your computer and use it in GitHub Desktop.
mitsuba3 python path integrator dump
python customplugins/path_integrator_refined.py 2>&1 crashdump.txt
Critical Dr.Jit compiler failure: jit_llvm_compile(): parsing failed. Please see the LLVM IR and error message below:
define void @drjit_32c09595272bc74a586d2fe8f7e4726e(i64 %start, i64 %end, i8** noalias %params) #0 {
entry:
%callables = load i8**, i8*** @callables
%buffer = alloca i8, i32 736, align 32
br label %body
body:
%index = phi i64 [ %index_next, %suffix ], [ %start, %entry ]
%f1_p1 = getelementptr inbounds i8*, i8** %params, i32 3
%f1_p2 = load i8*, i8** %f1_p1, align 8, !alias.scope !2
%f1_p3 = bitcast i8* %f1_p2 to float*
%f1_0 = load float, float* %f1_p3, align 4, !alias.scope !2
%f1_1 = insertelement <8 x float> undef, float %f1_0, i32 0
%f1 = shufflevector <8 x float> %f1_1, <8 x float> undef, <8 x i32> zeroinitializer
%f2_p1 = getelementptr inbounds i8*, i8** %params, i32 4
%f2_p2 = load i8*, i8** %f2_p1, align 8, !alias.scope !2
%f2_p3 = bitcast i8* %f2_p2 to float*
%f2_0 = load float, float* %f2_p3, align 4, !alias.scope !2
%f2_1 = insertelement <8 x float> undef, float %f2_0, i32 0
%f2 = shufflevector <8 x float> %f2_1, <8 x float> undef, <8 x i32> zeroinitializer
%f3_p1 = getelementptr inbounds i8*, i8** %params, i32 5
%f3_p2 = load i8*, i8** %f3_p1, align 8, !alias.scope !2
%f3_p3 = bitcast i8* %f3_p2 to float*
%f3_0 = load float, float* %f3_p3, align 4, !alias.scope !2
%f3_1 = insertelement <8 x float> undef, float %f3_0, i32 0
%f3 = shufflevector <8 x float> %f3_1, <8 x float> undef, <8 x i32> zeroinitializer
%f4_p1 = getelementptr inbounds i8*, i8** %params, i32 6
%f4_p2 = load i8*, i8** %f4_p1, align 8, !alias.scope !2
%f4_p3 = bitcast i8* %f4_p2 to float*
%f4_0 = load float, float* %f4_p3, align 4, !alias.scope !2
%f4_1 = insertelement <8 x float> undef, float %f4_0, i32 0
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer
%f5_p1 = getelementptr inbounds i8*, i8** %params, i32 7
%f5_p2 = load i8*, i8** %f5_p1, align 8, !alias.scope !2
%f5_p3 = bitcast i8* %f5_p2 to float*
%f5_0 = load float, float* %f5_p3, align 4, !alias.scope !2
%f5_1 = insertelement <8 x float> undef, float %f5_0, i32 0
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer
%f6_p1 = getelementptr inbounds i8*, i8** %params, i32 8
%f6_p2 = load i8*, i8** %f6_p1, align 8, !alias.scope !2
%f6_p3 = bitcast i8* %f6_p2 to float*
%f6_0 = load float, float* %f6_p3, align 4, !alias.scope !2
%f6_1 = insertelement <8 x float> undef, float %f6_0, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%f7_p1 = getelementptr inbounds i8*, i8** %params, i32 9
%f7_p2 = load i8*, i8** %f7_p1, align 8, !alias.scope !2
%f7_p3 = bitcast i8* %f7_p2 to float*
%f7_0 = load float, float* %f7_p3, align 4, !alias.scope !2
%f7_1 = insertelement <8 x float> undef, float %f7_0, i32 0
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer
%f8_p1 = getelementptr inbounds i8*, i8** %params, i32 10
%f8_p2 = load i8*, i8** %f8_p1, align 8, !alias.scope !2
%f8_p3 = bitcast i8* %f8_p2 to float*
%f8_0 = load float, float* %f8_p3, align 4, !alias.scope !2
%f8_1 = insertelement <8 x float> undef, float %f8_0, i32 0
%f8 = shufflevector <8 x float> %f8_1, <8 x float> undef, <8 x i32> zeroinitializer
%f9_p1 = getelementptr inbounds i8*, i8** %params, i32 11
%f9_p2 = load i8*, i8** %f9_p1, align 8, !alias.scope !2
%f9_p3 = bitcast i8* %f9_p2 to float*
%f9_0 = load float, float* %f9_p3, align 4, !alias.scope !2
%f9_1 = insertelement <8 x float> undef, float %f9_0, i32 0
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer
%f10_p1 = getelementptr inbounds i8*, i8** %params, i32 12
%f10_p2 = load i8*, i8** %f10_p1, align 8, !alias.scope !2
%f10_p3 = bitcast i8* %f10_p2 to float*
%f10_0 = load float, float* %f10_p3, align 4, !alias.scope !2
%f10_1 = insertelement <8 x float> undef, float %f10_0, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11_p1 = getelementptr inbounds i8*, i8** %params, i32 13
%f11_p2 = load i8*, i8** %f11_p1, align 8, !alias.scope !2
%f11_p3 = bitcast i8* %f11_p2 to float*
%f11_0 = load float, float* %f11_p3, align 4, !alias.scope !2
%f11_1 = insertelement <8 x float> undef, float %f11_0, i32 0
%f11 = shufflevector <8 x float> %f11_1, <8 x float> undef, <8 x i32> zeroinitializer
%f12_p1 = getelementptr inbounds i8*, i8** %params, i32 14
%f12_p2 = load i8*, i8** %f12_p1, align 8, !alias.scope !2
%f12_p3 = bitcast i8* %f12_p2 to float*
%f12_0 = load float, float* %f12_p3, align 4, !alias.scope !2
%f12_1 = insertelement <8 x float> undef, float %f12_0, i32 0
%f12 = shufflevector <8 x float> %f12_1, <8 x float> undef, <8 x i32> zeroinitializer
%f13_p1 = getelementptr inbounds i8*, i8** %params, i32 15
%f13_p2 = load i8*, i8** %f13_p1, align 8, !alias.scope !2
%f13_p3 = bitcast i8* %f13_p2 to float*
%f13_0 = load float, float* %f13_p3, align 4, !alias.scope !2
%f13_1 = insertelement <8 x float> undef, float %f13_0, i32 0
%f13 = shufflevector <8 x float> %f13_1, <8 x float> undef, <8 x i32> zeroinitializer
%f14_p1 = getelementptr inbounds i8*, i8** %params, i32 16
%f14_p2 = load i8*, i8** %f14_p1, align 8, !alias.scope !2
%f14_p3 = bitcast i8* %f14_p2 to float*
%f14_0 = load float, float* %f14_p3, align 4, !alias.scope !2
%f14_1 = insertelement <8 x float> undef, float %f14_0, i32 0
%f14 = shufflevector <8 x float> %f14_1, <8 x float> undef, <8 x i32> zeroinitializer
%f15_p1 = getelementptr inbounds i8*, i8** %params, i32 17
%f15_p2 = load i8*, i8** %f15_p1, align 8, !alias.scope !2
%f15_p3 = bitcast i8* %f15_p2 to float*
%f15_0 = load float, float* %f15_p3, align 4, !alias.scope !2
%f15_1 = insertelement <8 x float> undef, float %f15_0, i32 0
%f15 = shufflevector <8 x float> %f15_1, <8 x float> undef, <8 x i32> zeroinitializer
%f16_p1 = getelementptr inbounds i8*, i8** %params, i32 18
%f16_p2 = load i8*, i8** %f16_p1, align 8, !alias.scope !2
%f16_p3 = bitcast i8* %f16_p2 to float*
%f16_0 = load float, float* %f16_p3, align 4, !alias.scope !2
%f16_1 = insertelement <8 x float> undef, float %f16_0, i32 0
%f16 = shufflevector <8 x float> %f16_1, <8 x float> undef, <8 x i32> zeroinitializer
%f17_p1 = getelementptr inbounds i8*, i8** %params, i32 19
%f17_p2 = load i8*, i8** %f17_p1, align 8, !alias.scope !2
%f17_p3 = bitcast i8* %f17_p2 to float*
%f17_0 = load float, float* %f17_p3, align 4, !alias.scope !2
%f17_1 = insertelement <8 x float> undef, float %f17_0, i32 0
%f17 = shufflevector <8 x float> %f17_1, <8 x float> undef, <8 x i32> zeroinitializer
%f18_p1 = getelementptr inbounds i8*, i8** %params, i32 20
%f18_p2 = load i8*, i8** %f18_p1, align 8, !alias.scope !2
%f18_p3 = bitcast i8* %f18_p2 to float*
%f18_0 = load float, float* %f18_p3, align 4, !alias.scope !2
%f18_1 = insertelement <8 x float> undef, float %f18_0, i32 0
%f18 = shufflevector <8 x float> %f18_1, <8 x float> undef, <8 x i32> zeroinitializer
%f19_p1 = getelementptr inbounds i8*, i8** %params, i32 21
%f19_p2 = load i8*, i8** %f19_p1, align 8, !alias.scope !2
%f19_p3 = bitcast i8* %f19_p2 to float*
%f19_0 = load float, float* %f19_p3, align 4, !alias.scope !2
%f19_1 = insertelement <8 x float> undef, float %f19_0, i32 0
%f19 = shufflevector <8 x float> %f19_1, <8 x float> undef, <8 x i32> zeroinitializer
%f20_p1 = getelementptr inbounds i8*, i8** %params, i32 22
%f20_p2 = load i8*, i8** %f20_p1, align 8, !alias.scope !2
%f20_p3 = bitcast i8* %f20_p2 to float*
%f20_0 = load float, float* %f20_p3, align 4, !alias.scope !2
%f20_1 = insertelement <8 x float> undef, float %f20_0, i32 0
%f20 = shufflevector <8 x float> %f20_1, <8 x float> undef, <8 x i32> zeroinitializer
%f21_p1 = getelementptr inbounds i8*, i8** %params, i32 23
%f21_p2 = load i8*, i8** %f21_p1, align 8, !alias.scope !2
%f21_p3 = bitcast i8* %f21_p2 to float*
%f21_0 = load float, float* %f21_p3, align 4, !alias.scope !2
%f21_1 = insertelement <8 x float> undef, float %f21_0, i32 0
%f21 = shufflevector <8 x float> %f21_1, <8 x float> undef, <8 x i32> zeroinitializer
%f22_p1 = getelementptr inbounds i8*, i8** %params, i32 24
%f22_p2 = load i8*, i8** %f22_p1, align 8, !alias.scope !2
%f22_p3 = bitcast i8* %f22_p2 to float*
%f22_0 = load float, float* %f22_p3, align 4, !alias.scope !2
%f22_1 = insertelement <8 x float> undef, float %f22_0, i32 0
%f22 = shufflevector <8 x float> %f22_1, <8 x float> undef, <8 x i32> zeroinitializer
%f23_p1 = getelementptr inbounds i8*, i8** %params, i32 25
%f23_p2 = load i8*, i8** %f23_p1, align 8, !alias.scope !2
%f23_p3 = bitcast i8* %f23_p2 to float*
%f23_0 = load float, float* %f23_p3, align 4, !alias.scope !2
%f23_1 = insertelement <8 x float> undef, float %f23_0, i32 0
%f23 = shufflevector <8 x float> %f23_1, <8 x float> undef, <8 x i32> zeroinitializer
%f24_p1 = getelementptr inbounds i8*, i8** %params, i32 26
%f24_p2 = load i8*, i8** %f24_p1, align 8, !alias.scope !2
%f24_p3 = bitcast i8* %f24_p2 to float*
%f24_0 = load float, float* %f24_p3, align 4, !alias.scope !2
%f24_1 = insertelement <8 x float> undef, float %f24_0, i32 0
%f24 = shufflevector <8 x float> %f24_1, <8 x float> undef, <8 x i32> zeroinitializer
%f25_p1 = getelementptr inbounds i8*, i8** %params, i32 27
%f25_p2 = load i8*, i8** %f25_p1, align 8, !alias.scope !2
%f25_p3 = bitcast i8* %f25_p2 to float*
%f25_0 = load float, float* %f25_p3, align 4, !alias.scope !2
%f25_1 = insertelement <8 x float> undef, float %f25_0, i32 0
%f25 = shufflevector <8 x float> %f25_1, <8 x float> undef, <8 x i32> zeroinitializer
%f26_p1 = getelementptr inbounds i8*, i8** %params, i32 28
%f26_p2 = load i8*, i8** %f26_p1, align 8, !alias.scope !2
%f26_p3 = bitcast i8* %f26_p2 to float*
%f26_0 = load float, float* %f26_p3, align 4, !alias.scope !2
%f26_1 = insertelement <8 x float> undef, float %f26_0, i32 0
%f26 = shufflevector <8 x float> %f26_1, <8 x float> undef, <8 x i32> zeroinitializer
%f27_p1 = getelementptr inbounds i8*, i8** %params, i32 29
%f27_p2 = load i8*, i8** %f27_p1, align 8, !alias.scope !2
%f27_p3 = bitcast i8* %f27_p2 to float*
%f27_0 = load float, float* %f27_p3, align 4, !alias.scope !2
%f27_1 = insertelement <8 x float> undef, float %f27_0, i32 0
%f27 = shufflevector <8 x float> %f27_1, <8 x float> undef, <8 x i32> zeroinitializer
%f28_p1 = getelementptr inbounds i8*, i8** %params, i32 30
%f28_p2 = load i8*, i8** %f28_p1, align 8, !alias.scope !2
%f28_p3 = bitcast i8* %f28_p2 to float*
%f28_0 = load float, float* %f28_p3, align 4, !alias.scope !2
%f28_1 = insertelement <8 x float> undef, float %f28_0, i32 0
%f28 = shufflevector <8 x float> %f28_1, <8 x float> undef, <8 x i32> zeroinitializer
%f29_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f29 = shufflevector <8 x float> %f29_1, <8 x float> undef, <8 x i32> zeroinitializer
%f30_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer
%r31_0 = trunc i64 %index to i32
%r31_1 = insertelement <8 x i32> undef, i32 %r31_0, i32 0
%r31_2 = shufflevector <8 x i32> %r31_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r31 = add <8 x i32> %r31_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%r32_p1 = getelementptr inbounds i8*, i8** %params, i32 31
%r32_p2 = load i8*, i8** %r32_p1, align 8, !alias.scope !2
%r32_p3 = bitcast i8* %r32_p2 to i32*
%r32_0 = load i32, i32* %r32_p3, align 4, !alias.scope !2
%r32_1 = insertelement <8 x i32> undef, i32 %r32_0, i32 0
%r32 = shufflevector <8 x i32> %r32_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r33_1 = insertelement <8 x i32> undef, i32 2654435769, i32 0
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r34_1 = insertelement <8 x i32> undef, i32 4, i32 0
%r34 = shufflevector <8 x i32> %r34_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r35 = shl <8 x i32> %r31, %r34
%r36_1 = insertelement <8 x i32> undef, i32 2738958700, i32 0
%r36 = shufflevector <8 x i32> %r36_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r37 = add <8 x i32> %r35, %r36
%r38 = add <8 x i32> %r31, %r33
%r39 = xor <8 x i32> %r37, %r38
%r40_1 = insertelement <8 x i32> undef, i32 5, i32 0
%r40 = shufflevector <8 x i32> %r40_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r41 = lshr <8 x i32> %r31, %r40
%r42_1 = insertelement <8 x i32> undef, i32 3355524772, i32 0
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r43 = add <8 x i32> %r41, %r42
%r44 = xor <8 x i32> %r39, %r43
%r45 = add <8 x i32> %r32, %r44
%r46 = shl <8 x i32> %r45, %r34
%r47_1 = insertelement <8 x i32> undef, i32 2911926141, i32 0
%r47 = shufflevector <8 x i32> %r47_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r48 = add <8 x i32> %r46, %r47
%r49 = add <8 x i32> %r45, %r33
%r50 = xor <8 x i32> %r48, %r49
%r51 = lshr <8 x i32> %r45, %r40
%r52_1 = insertelement <8 x i32> undef, i32 2123724318, i32 0
%r52 = shufflevector <8 x i32> %r52_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r53 = add <8 x i32> %r51, %r52
%r54 = xor <8 x i32> %r50, %r53
%r55 = add <8 x i32> %r31, %r54
%r56_1 = insertelement <8 x i32> undef, i32 1013904242, i32 0
%r56 = shufflevector <8 x i32> %r56_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r57 = shl <8 x i32> %r55, %r34
%r58 = add <8 x i32> %r57, %r36
%r59 = add <8 x i32> %r55, %r56
%r60 = xor <8 x i32> %r58, %r59
%r61 = lshr <8 x i32> %r55, %r40
%r62 = add <8 x i32> %r61, %r42
%r63 = xor <8 x i32> %r60, %r62
%r64 = add <8 x i32> %r45, %r63
%r65 = shl <8 x i32> %r64, %r34
%r66 = add <8 x i32> %r65, %r47
%r67 = add <8 x i32> %r64, %r56
%r68 = xor <8 x i32> %r66, %r67
%r69 = lshr <8 x i32> %r64, %r40
%r70 = add <8 x i32> %r69, %r52
%r71 = xor <8 x i32> %r68, %r70
%r72 = add <8 x i32> %r55, %r71
%r73_1 = insertelement <8 x i32> undef, i32 3668340011, i32 0
%r73 = shufflevector <8 x i32> %r73_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r74 = shl <8 x i32> %r72, %r34
%r75 = add <8 x i32> %r74, %r36
%r76 = add <8 x i32> %r72, %r73
%r77 = xor <8 x i32> %r75, %r76
%r78 = lshr <8 x i32> %r72, %r40
%r79 = add <8 x i32> %r78, %r42
%r80 = xor <8 x i32> %r77, %r79
%r81 = add <8 x i32> %r64, %r80
%r82 = shl <8 x i32> %r81, %r34
%r83 = add <8 x i32> %r82, %r47
%r84 = add <8 x i32> %r81, %r73
%r85 = xor <8 x i32> %r83, %r84
%r86 = lshr <8 x i32> %r81, %r40
%r87 = add <8 x i32> %r86, %r52
%r88 = xor <8 x i32> %r85, %r87
%r89 = add <8 x i32> %r72, %r88
%r90_1 = insertelement <8 x i32> undef, i32 2027808484, i32 0
%r90 = shufflevector <8 x i32> %r90_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r91 = shl <8 x i32> %r89, %r34
%r92 = add <8 x i32> %r91, %r36
%r93 = add <8 x i32> %r89, %r90
%r94 = xor <8 x i32> %r92, %r93
%r95 = lshr <8 x i32> %r89, %r40
%r96 = add <8 x i32> %r95, %r42
%r97 = xor <8 x i32> %r94, %r96
%r98 = add <8 x i32> %r81, %r97
%r99 = shl <8 x i32> %r98, %r34
%r100 = add <8 x i32> %r99, %r47
%r101 = add <8 x i32> %r98, %r90
%r102 = xor <8 x i32> %r100, %r101
%r103 = lshr <8 x i32> %r98, %r40
%r104 = add <8 x i32> %r103, %r52
%r105 = xor <8 x i32> %r102, %r104
%r106 = add <8 x i32> %r89, %r105
%rd107 = zext <8 x i32> %r98 to <8 x i64>
%rd108 = zext <8 x i32> %r106 to <8 x i64>
%rd109_1 = insertelement <8 x i64> undef, i64 0, i32 0
%rd109 = shufflevector <8 x i64> %rd109_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd110_1 = insertelement <8 x i64> undef, i64 1, i32 0
%rd110 = shufflevector <8 x i64> %rd110_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd111 = shl <8 x i64> %rd108, %rd110
%rd112 = or <8 x i64> %rd111, %rd110
%rd113_1 = insertelement <8 x i64> undef, i64 6364136223846793005, i32 0
%rd113 = shufflevector <8 x i64> %rd113_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd114_0 = mul <8 x i64> %rd109, %rd113
%rd114 = add <8 x i64> %rd114_0, %rd112
%rd115 = add <8 x i64> %rd114, %rd107
%rd116_0 = mul <8 x i64> %rd115, %rd113
%rd116 = add <8 x i64> %rd116_0, %rd112
%r117_p1 = getelementptr inbounds i8*, i8** %params, i32 32
%r117_p2 = load i8*, i8** %r117_p1, align 8, !alias.scope !2
%r117_p3 = bitcast i8* %r117_p2 to i32*
%r117_0 = load i32, i32* %r117_p3, align 4, !alias.scope !2
%r117_1 = insertelement <8 x i32> undef, i32 %r117_0, i32 0
%r117 = shufflevector <8 x i32> %r117_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r118 = lshr <8 x i32> %r31, %r117
%r119_1 = insertelement <8 x i32> undef, i32 8, i32 0
%r119 = shufflevector <8 x i32> %r119_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r120 = lshr <8 x i32> %r118, %r119
%r121_1 = insertelement <8 x i32> undef, i32 256, i32 0
%r121 = shufflevector <8 x i32> %r121_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r122 = sub <8 x i32> zeroinitializer, %r120
%r123_0 = mul <8 x i32> %r121, %r122
%r123 = add <8 x i32> %r123_0, %r118
%f124 = uitofp <8 x i32> %r123 to <8 x float>
%f125 = uitofp <8 x i32> %r120 to <8 x float>
%rd126_0 = mul <8 x i64> %rd116, %rd113
%rd126 = add <8 x i64> %rd126_0, %rd112
%rd127_1 = insertelement <8 x i64> undef, i64 18, i32 0
%rd127 = shufflevector <8 x i64> %rd127_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd128 = lshr <8 x i64> %rd116, %rd127
%rd129 = xor <8 x i64> %rd128, %rd116
%rd130_1 = insertelement <8 x i64> undef, i64 27, i32 0
%rd130 = shufflevector <8 x i64> %rd130_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd131 = lshr <8 x i64> %rd129, %rd130
%r132 = trunc <8 x i64> %rd131 to <8 x i32>
%rd133_1 = insertelement <8 x i64> undef, i64 59, i32 0
%rd133 = shufflevector <8 x i64> %rd133_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd134 = lshr <8 x i64> %rd116, %rd133
%r135 = trunc <8 x i64> %rd134 to <8 x i32>
%r136 = lshr <8 x i32> %r132, %r135
%r137 = bitcast <8 x i32> %r135 to <8 x i32>
%r138 = sub <8 x i32> zeroinitializer, %r137
%r139_1 = insertelement <8 x i32> undef, i32 31, i32 0
%r139 = shufflevector <8 x i32> %r139_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r140 = and <8 x i32> %r138, %r139
%r141 = bitcast <8 x i32> %r140 to <8 x i32>
%r142 = shl <8 x i32> %r132, %r141
%r143 = or <8 x i32> %r136, %r142
%r144_1 = insertelement <8 x i32> undef, i32 9, i32 0
%r144 = shufflevector <8 x i32> %r144_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r145 = lshr <8 x i32> %r143, %r144
%r146_1 = insertelement <8 x i32> undef, i32 1065353216, i32 0
%r146 = shufflevector <8 x i32> %r146_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r147 = or <8 x i32> %r145, %r146
%f148 = bitcast <8 x i32> %r147 to <8 x float>
%f149 = fsub <8 x float> %f148, %f30
%rd150_0 = mul <8 x i64> %rd126, %rd113
%rd150 = add <8 x i64> %rd150_0, %rd112
%rd151 = lshr <8 x i64> %rd126, %rd127
%rd152 = xor <8 x i64> %rd151, %rd126
%rd153 = lshr <8 x i64> %rd152, %rd130
%r154 = trunc <8 x i64> %rd153 to <8 x i32>
%rd155 = lshr <8 x i64> %rd126, %rd133
%r156 = trunc <8 x i64> %rd155 to <8 x i32>
%r157 = lshr <8 x i32> %r154, %r156
%r158 = bitcast <8 x i32> %r156 to <8 x i32>
%r159 = sub <8 x i32> zeroinitializer, %r158
%r160 = and <8 x i32> %r159, %r139
%r161 = bitcast <8 x i32> %r160 to <8 x i32>
%r162 = shl <8 x i32> %r154, %r161
%r163 = or <8 x i32> %r157, %r162
%r164 = lshr <8 x i32> %r163, %r144
%r165 = or <8 x i32> %r164, %r146
%f166 = bitcast <8 x i32> %r165 to <8 x float>
%f167 = fsub <8 x float> %f166, %f30
%f168 = fadd <8 x float> %f124, %f149
%f169 = fadd <8 x float> %f125, %f167
%f170_1 = insertelement <8 x float> undef, float 0x3f70000000000000, i32 0
%f170 = shufflevector <8 x float> %f170_1, <8 x float> undef, <8 x i32> zeroinitializer
%f171_1 = insertelement <8 x float> undef, float 0x8000000000000000, i32 0
%f171 = shufflevector <8 x float> %f171_1, <8 x float> undef, <8 x i32> zeroinitializer
%f172 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f168, <8 x float> %f170, <8 x float> %f171)
%f173 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f169, <8 x float> %f170, <8 x float> %f171)
%f174 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f172, <8 x float> %f25)
%f175 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f172, <8 x float> %f26)
%f176 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f172, <8 x float> %f27)
%f177 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f172, <8 x float> %f28)
%f178 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f173, <8 x float> %f174)
%f179 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f173, <8 x float> %f175)
%f180 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f173, <8 x float> %f176)
%f181 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f20, <8 x float> %f173, <8 x float> %f177)
%f182 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f21, <8 x float> %f29, <8 x float> %f178)
%f183 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f22, <8 x float> %f29, <8 x float> %f179)
%f184 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f23, <8 x float> %f29, <8 x float> %f180)
%f185 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f24, <8 x float> %f29, <8 x float> %f181)
%f186 = fdiv <8 x float> %f30, %f185
%f187 = fmul <8 x float> %f182, %f186
%f188 = fmul <8 x float> %f183, %f186
%f189 = fmul <8 x float> %f184, %f186
%f190 = fmul <8 x float> %f187, %f187
%f191 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f188, <8 x float> %f188, <8 x float> %f190)
%f192 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f189, <8 x float> %f189, <8 x float> %f191)
%f193 = fdiv <8 x float> %f30, %f192
%f194 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f193)
%f195 = fmul <8 x float> %f187, %f194
%f196 = fmul <8 x float> %f188, %f194
%f197 = fmul <8 x float> %f189, %f194
%f198 = fmul <8 x float> %f1, %f195
%f199 = fmul <8 x float> %f2, %f195
%f200 = fmul <8 x float> %f3, %f195
%f201 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f4, <8 x float> %f196, <8 x float> %f198)
%f202 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f5, <8 x float> %f196, <8 x float> %f199)
%f203 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f6, <8 x float> %f196, <8 x float> %f200)
%f204 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f7, <8 x float> %f197, <8 x float> %f201)
%f205 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f197, <8 x float> %f202)
%f206 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f197, <8 x float> %f203)
%f207 = fdiv <8 x float> %f30, %f197
%f208_1 = insertelement <8 x float> undef, float 0x3f50624de0000000, i32 0
%f208 = shufflevector <8 x float> %f208_1, <8 x float> undef, <8 x i32> zeroinitializer
%f209 = fmul <8 x float> %f208, %f207
%f210_1 = insertelement <8 x float> undef, float 0x4059000000000000, i32 0
%f210 = shufflevector <8 x float> %f210_1, <8 x float> undef, <8 x i32> zeroinitializer
%f211 = fmul <8 x float> %f210, %f207
%f212 = fmul <8 x float> %f204, %f209
%f213 = fmul <8 x float> %f205, %f209
%f214 = fmul <8 x float> %f206, %f209
%f215 = fadd <8 x float> %f10, %f212
%f216 = fadd <8 x float> %f11, %f213
%f217 = fadd <8 x float> %f12, %f214
%f218 = fsub <8 x float> %f211, %f209
%r219_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r219 = shufflevector <8 x i32> %r219_1, <8 x i32> undef, <8 x i32> zeroinitializer
%f220_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0
%f220 = shufflevector <8 x float> %f220_1, <8 x float> undef, <8 x i32> zeroinitializer
%p221_1 = insertelement <8 x i1> undef, i1 1, i32 0
%p221 = shufflevector <8 x i1> %p221_1, <8 x i1> undef, <8 x i32> zeroinitializer
%r222_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r222 = shufflevector <8 x i32> %r222_1, <8 x i32> undef, <8 x i32> zeroinitializer
%f223_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f223 = shufflevector <8 x float> %f223_1, <8 x float> undef, <8 x i32> zeroinitializer
%p224_1 = insertelement <8 x i1> undef, i1 1, i32 0
%p224 = shufflevector <8 x i1> %p224_1, <8 x i1> undef, <8 x i32> zeroinitializer
%f225_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f225 = shufflevector <8 x float> %f225_1, <8 x float> undef, <8 x i32> zeroinitializer
%f226_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f226 = shufflevector <8 x float> %f226_1, <8 x float> undef, <8 x i32> zeroinitializer
%f227_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f227 = shufflevector <8 x float> %f227_1, <8 x float> undef, <8 x i32> zeroinitializer
%f228_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f228 = shufflevector <8 x float> %f228_1, <8 x float> undef, <8 x i32> zeroinitializer
%f229_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f229 = shufflevector <8 x float> %f229_1, <8 x float> undef, <8 x i32> zeroinitializer
%f230_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f230 = shufflevector <8 x float> %f230_1, <8 x float> undef, <8 x i32> zeroinitializer
%f231_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f231 = shufflevector <8 x float> %f231_1, <8 x float> undef, <8 x i32> zeroinitializer
%f232_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f232 = shufflevector <8 x float> %f232_1, <8 x float> undef, <8 x i32> zeroinitializer
%f233_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f233 = shufflevector <8 x float> %f233_1, <8 x float> undef, <8 x i32> zeroinitializer
%f234_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f234 = shufflevector <8 x float> %f234_1, <8 x float> undef, <8 x i32> zeroinitializer
%f235_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f235 = shufflevector <8 x float> %f235_1, <8 x float> undef, <8 x i32> zeroinitializer
br label %l_236_start
l_236_start:
br label %l_236_cond
l_236_cond: ; Loop (Path Tracer)
%rd237 = phi <8 x i64> [ %rd237_final, %l_236_tail ], [ %rd150, %l_236_start ]
%f238 = phi <8 x float> [ %f238_final, %l_236_tail ], [ %f215, %l_236_start ]
%f239 = phi <8 x float> [ %f239_final, %l_236_tail ], [ %f216, %l_236_start ]
%f240 = phi <8 x float> [ %f240_final, %l_236_tail ], [ %f217, %l_236_start ]
%f241 = phi <8 x float> [ %f241_final, %l_236_tail ], [ %f204, %l_236_start ]
%f242 = phi <8 x float> [ %f242_final, %l_236_tail ], [ %f205, %l_236_start ]
%f243 = phi <8 x float> [ %f243_final, %l_236_tail ], [ %f206, %l_236_start ]
%f244 = phi <8 x float> [ %f244_final, %l_236_tail ], [ %f218, %l_236_start ]
%f245 = phi <8 x float> [ %f245_final, %l_236_tail ], [ %f233, %l_236_start ]
%f246 = phi <8 x float> [ %f246_final, %l_236_tail ], [ %f234, %l_236_start ]
%f247 = phi <8 x float> [ %f247_final, %l_236_tail ], [ %f235, %l_236_start ]
%f248 = phi <8 x float> [ %f248_final, %l_236_tail ], [ %f230, %l_236_start ]
%f249 = phi <8 x float> [ %f249_final, %l_236_tail ], [ %f231, %l_236_start ]
%f250 = phi <8 x float> [ %f250_final, %l_236_tail ], [ %f232, %l_236_start ]
%f251 = phi <8 x float> [ %f251_final, %l_236_tail ], [ %f223, %l_236_start ]
%r252 = phi <8 x i32> [ %r252_final, %l_236_tail ], [ %r222, %l_236_start ]
%f253 = phi <8 x float> [ %f253_final, %l_236_tail ], [ %f220, %l_236_start ]
%f254 = phi <8 x float> [ %f254_final, %l_236_tail ], [ %f226, %l_236_start ]
%f255 = phi <8 x float> [ %f255_final, %l_236_tail ], [ %f227, %l_236_start ]
%f256 = phi <8 x float> [ %f256_final, %l_236_tail ], [ %f228, %l_236_start ]
%f257 = phi <8 x float> [ %f257_final, %l_236_tail ], [ %f225, %l_236_start ]
%p258 = phi <8 x i1> [ %p258_final, %l_236_tail ], [ %p224, %l_236_start ]
%p259 = phi <8 x i1> [ %p259_final, %l_236_tail ], [ %p221, %l_236_start ]
%p236 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p259)
br i1 %p236, label %l_236_body, label %l_236_done
l_236_body:
%rd261 = phi <8 x i64> [ %rd237, %l_236_cond ]
%f262 = phi <8 x float> [ %f238, %l_236_cond ]
%f263 = phi <8 x float> [ %f239, %l_236_cond ]
%f264 = phi <8 x float> [ %f240, %l_236_cond ]
%f265 = phi <8 x float> [ %f241, %l_236_cond ]
%f266 = phi <8 x float> [ %f242, %l_236_cond ]
%f267 = phi <8 x float> [ %f243, %l_236_cond ]
%f268 = phi <8 x float> [ %f244, %l_236_cond ]
%f269 = phi <8 x float> [ %f245, %l_236_cond ]
%f270 = phi <8 x float> [ %f246, %l_236_cond ]
%f271 = phi <8 x float> [ %f247, %l_236_cond ]
%f272 = phi <8 x float> [ %f248, %l_236_cond ]
%f273 = phi <8 x float> [ %f249, %l_236_cond ]
%f274 = phi <8 x float> [ %f250, %l_236_cond ]
%f275 = phi <8 x float> [ %f251, %l_236_cond ]
%r276 = phi <8 x i32> [ %r252, %l_236_cond ]
%f277 = phi <8 x float> [ %f254, %l_236_cond ]
%f278 = phi <8 x float> [ %f255, %l_236_cond ]
%f279 = phi <8 x float> [ %f256, %l_236_cond ]
%f280 = phi <8 x float> [ %f257, %l_236_cond ]
%p281 = phi <8 x i1> [ %p258, %l_236_cond ]
%p282 = phi <8 x i1> [ %p259, %l_236_cond ]
%r283_0 = trunc i64 %index to i32
%r283_1 = insertelement <8 x i32> undef, i32 %r283_0, i32 0
%r283_2 = shufflevector <8 x i32> %r283_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r283 = add <8 x i32> %r283_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%p284_0 = trunc i64 %end to i32
%p284_1 = insertelement <8 x i32> undef, i32 %p284_0, i32 0
%p284_2 = shufflevector <8 x i32> %p284_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p284 = icmp ult <8 x i32> %r283, %p284_2
%p285 = and <8 x i1> %p284, %p259
%p286_1 = insertelement <8 x i1> undef, i1 0, i32 0
%p286 = shufflevector <8 x i1> %p286_1, <8 x i1> undef, <8 x i32> zeroinitializer
%rd287_p1 = getelementptr inbounds i8*, i8** %params, i32 33
%rd287 = load i8*, i8** %rd287_p1, align 8, !alias.scope !2
%rd288_p1 = getelementptr inbounds i8*, i8** %params, i32 34
%rd288 = load i8*, i8** %rd288_p1, align 8, !alias.scope !2
%r289_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r289 = shufflevector <8 x i32> %r289_1, <8 x i32> undef, <8 x i32> zeroinitializer
%f290_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f290 = shufflevector <8 x float> %f290_1, <8 x float> undef, <8 x i32> zeroinitializer
%r291_1 = insertelement <8 x i32> undef, i32 4294967295, i32 0
%r291 = shufflevector <8 x i32> %r291_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r292_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r292 = shufflevector <8 x i32> %r292_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r293 = select <8 x i1> %p285, <8 x i32> %r291, <8 x i32> zeroinitializer
%f294_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0
%f294 = shufflevector <8 x float> %f294_1, <8 x float> undef, <8 x i32> zeroinitializer
%r295_1 = insertelement <8 x i32> undef, i32 4294967295, i32 0
%r295 = shufflevector <8 x i32> %r295_1, <8 x i32> undef, <8 x i32> zeroinitializer
%rd296_p1 = getelementptr inbounds i8*, i8** %params, i32 35
%rd296 = load i8*, i8** %rd296_p1, align 8, !alias.scope !2
%f297_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f297 = shufflevector <8 x float> %f297_1, <8 x float> undef, <8 x i32> zeroinitializer
%f298_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0
%f298 = shufflevector <8 x float> %f298_1, <8 x float> undef, <8 x i32> zeroinitializer
%f299 = fneg <8 x float> %f265
%f300 = fneg <8 x float> %f266
%f301 = fneg <8 x float> %f267
%p302 = xor <8 x i1> %p281, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%f303 = fmul <8 x float> %f280, %f280
%p304 = fcmp ogt <8 x float> %f303, %f290
%r305_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r305 = shufflevector <8 x i32> %r305_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r306 = add <8 x i32> %r276, %r305
%r307_1 = insertelement <8 x i32> undef, i32 6, i32 0
%r307 = shufflevector <8 x i32> %r307_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p308 = icmp ult <8 x i32> %r306, %r307
%r309_1 = insertelement <8 x i32> undef, i32 30, i32 0
%r309 = shufflevector <8 x i32> %r309_1, <8 x i32> undef, <8 x i32> zeroinitializer
%rd310_1 = insertelement <8 x i64> undef, i64 6364136223846793005, i32 0
%rd310 = shufflevector <8 x i64> %rd310_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd311_0 = mul <8 x i64> %rd261, %rd310
%rd311 = add <8 x i64> %rd311_0, %rd112
%rd312_1 = insertelement <8 x i64> undef, i64 18, i32 0
%rd312 = shufflevector <8 x i64> %rd312_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd313 = lshr <8 x i64> %rd261, %rd312
%rd314 = xor <8 x i64> %rd313, %rd261
%rd315_1 = insertelement <8 x i64> undef, i64 27, i32 0
%rd315 = shufflevector <8 x i64> %rd315_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd316 = lshr <8 x i64> %rd314, %rd315
%r317 = trunc <8 x i64> %rd316 to <8 x i32>
%rd318_1 = insertelement <8 x i64> undef, i64 59, i32 0
%rd318 = shufflevector <8 x i64> %rd318_1, <8 x i64> undef, <8 x i32> zeroinitializer
%rd319 = lshr <8 x i64> %rd261, %rd318
%r320 = trunc <8 x i64> %rd319 to <8 x i32>
%r321 = lshr <8 x i32> %r317, %r320
%r322 = bitcast <8 x i32> %r320 to <8 x i32>
%r323 = sub <8 x i32> zeroinitializer, %r322
%r324_1 = insertelement <8 x i32> undef, i32 31, i32 0
%r324 = shufflevector <8 x i32> %r324_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r325 = and <8 x i32> %r323, %r324
%r326 = bitcast <8 x i32> %r325 to <8 x i32>
%r327 = shl <8 x i32> %r317, %r326
%r328 = or <8 x i32> %r321, %r327
%r329_1 = insertelement <8 x i32> undef, i32 9, i32 0
%r329 = shufflevector <8 x i32> %r329_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r330 = lshr <8 x i32> %r328, %r329
%r331_1 = insertelement <8 x i32> undef, i32 1065353216, i32 0
%r331 = shufflevector <8 x i32> %r331_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r332 = or <8 x i32> %r330, %r331
%f333 = bitcast <8 x i32> %r332 to <8 x float>
%f334 = fsub <8 x float> %f333, %f297
%rd335_p1 = getelementptr inbounds i8*, i8** %params, i32 36
%rd335 = load i8*, i8** %rd335_p1, align 8, !alias.scope !2
%f336_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f336 = shufflevector <8 x float> %f336_1, <8 x float> undef, <8 x i32> zeroinitializer
%f337_1 = insertelement <8 x float> undef, float 0x3f17700000000000, i32 0
%f337 = shufflevector <8 x float> %f337_1, <8 x float> undef, <8 x i32> zeroinitializer
%f338_1 = insertelement <8 x float> undef, float 0x3feff8ad00000000, i32 0
%f338 = shufflevector <8 x float> %f338_1, <8 x float> undef, <8 x i32> zeroinitializer
%rd339_p1 = getelementptr inbounds i8*, i8** %params, i32 37
%rd339 = load i8*, i8** %rd339_p1, align 8, !alias.scope !2
%f340_1 = insertelement <8 x float> undef, float 0x3fee666660000000, i32 0
%f340 = shufflevector <8 x float> %f340_1, <8 x float> undef, <8 x i32> zeroinitializer
%r341_1 = insertelement <8 x i32> undef, i32 5, i32 0
%r341 = shufflevector <8 x i32> %r341_1, <8 x i32> undef, <8 x i32> zeroinitializer
; -------- Ray trace -------
%u342_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u342_in_0_1 = bitcast i8* %u342_in_0_0 to <8 x i32> *
store <8 x i32> %r293, <8 x i32>* %u342_in_0_1, align 32
%u342_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u342_in_1_1 = bitcast i8* %u342_in_1_0 to <8 x float> *
store <8 x float> %f262, <8 x float>* %u342_in_1_1, align 32
%u342_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u342_in_2_1 = bitcast i8* %u342_in_2_0 to <8 x float> *
store <8 x float> %f263, <8 x float>* %u342_in_2_1, align 32
%u342_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u342_in_3_1 = bitcast i8* %u342_in_3_0 to <8 x float> *
store <8 x float> %f264, <8 x float>* %u342_in_3_1, align 32
%u342_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u342_in_4_1 = bitcast i8* %u342_in_4_0 to <8 x float> *
store <8 x float> %f290, <8 x float>* %u342_in_4_1, align 32
%u342_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u342_in_5_1 = bitcast i8* %u342_in_5_0 to <8 x float> *
store <8 x float> %f265, <8 x float>* %u342_in_5_1, align 32
%u342_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u342_in_6_1 = bitcast i8* %u342_in_6_0 to <8 x float> *
store <8 x float> %f266, <8 x float>* %u342_in_6_1, align 32
%u342_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224
%u342_in_7_1 = bitcast i8* %u342_in_7_0 to <8 x float> *
store <8 x float> %f267, <8 x float>* %u342_in_7_1, align 32
%u342_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256
%u342_in_8_1 = bitcast i8* %u342_in_8_0 to <8 x float> *
store <8 x float> %f229, <8 x float>* %u342_in_8_1, align 32
%u342_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u342_in_9_1 = bitcast i8* %u342_in_9_0 to <8 x float> *
store <8 x float> %f268, <8 x float>* %u342_in_9_1, align 32
%u342_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320
%u342_in_10_1 = bitcast i8* %u342_in_10_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u342_in_10_1, align 32
%u342_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352
%u342_in_11_1 = bitcast i8* %u342_in_11_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u342_in_11_1, align 32
%u342_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384
%u342_in_12_1 = bitcast i8* %u342_in_12_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u342_in_12_1, align 32
%u342_in_geomid_0 = getelementptr inbounds i8, i8* %buffer, i32 608
%u342_in_geomid_1 = bitcast i8* %u342_in_geomid_0 to <8 x i32> *
store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %u342_in_geomid_1, align 32
%u342_in_ctx_0 = getelementptr inbounds i8, i8* %buffer, i32 672
%u342_in_ctx_1 = bitcast i8* %u342_in_ctx_0 to <6 x i32> *
store <6 x i32> <i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0>, <6 x i32>* %u342_in_ctx_1, align 4
%u342_func = bitcast i8* %rd287 to void (i8*, i8*, i8*, i8*)*
call void %u342_func(i8* %u342_in_0_0, i8* %rd288, i8* %u342_in_ctx_0, i8* %u342_in_1_0)
%u342_out_0_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u342_out_0_1 = bitcast i8* %u342_out_0_0 to <8 x float> *
%u342_out_0 = load <8 x float>, <8 x float>* %u342_out_0_1, align 32
%u342_out_1_0 = getelementptr inbounds i8, i8* %buffer, i32 512
%u342_out_1_1 = bitcast i8* %u342_out_1_0 to <8 x float> *
%u342_out_1 = load <8 x float>, <8 x float>* %u342_out_1_1, align 32
%u342_out_2_0 = getelementptr inbounds i8, i8* %buffer, i32 544
%u342_out_2_1 = bitcast i8* %u342_out_2_0 to <8 x float> *
%u342_out_2 = load <8 x float>, <8 x float>* %u342_out_2_1, align 32
%u342_out_3_0 = getelementptr inbounds i8, i8* %buffer, i32 576
%u342_out_3_1 = bitcast i8* %u342_out_3_0 to <8 x i32> *
%u342_out_3 = load <8 x i32>, <8 x i32>* %u342_out_3_1, align 32
%u342_out_4_0 = getelementptr inbounds i8, i8* %buffer, i32 608
%u342_out_4_1 = bitcast i8* %u342_out_4_0 to <8 x i32> *
%u342_out_4 = load <8 x i32>, <8 x i32>* %u342_out_4_1, align 32
%u342_out_5_0 = getelementptr inbounds i8, i8* %buffer, i32 640
%u342_out_5_1 = bitcast i8* %u342_out_5_0 to <8 x i32> *
%u342_out_5 = load <8 x i32>, <8 x i32>* %u342_out_5_1, align 32
; -------------------
%f343 = bitcast <8 x float> %u342_out_0 to <8 x float>
%f344 = bitcast <8 x float> %u342_out_1 to <8 x float>
%f345 = bitcast <8 x float> %u342_out_2 to <8 x float>
%r346 = bitcast <8 x i32> %u342_out_3 to <8 x i32>
%r347 = bitcast <8 x i32> %u342_out_4 to <8 x i32>
%r348 = bitcast <8 x i32> %u342_out_5 to <8 x i32>
%p349 = fcmp one <8 x float> %f343, %f268
%f350 = select <8 x i1> %p349, <8 x float> %f343, <8 x float> %f294
%p351 = icmp ne <8 x i32> %r348, %r295
%p352 = and <8 x i1> %p349, %p351
%r353 = select <8 x i1> %p352, <8 x i32> %r348, <8 x i32> %r347
%p354 = and <8 x i1> %p349, %p285
%r355_0 = bitcast i8* %rd296 to i32*
%r355_1 = getelementptr i32, i32* %r355_0, <8 x i32> %r353
%r355 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r355_1, i32 4, <8 x i1> %p354, <8 x i32> zeroinitializer)
%r356 = select <8 x i1> %p352, <8 x i32> %r355, <8 x i32> zeroinitializer
%p357 = xor <8 x i1> %p352, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r358 = select <8 x i1> %p357, <8 x i32> %r355, <8 x i32> zeroinitializer
%p359 = fcmp one <8 x float> %f350, %f294
%p360 = icmp eq <8 x i32> %r356, %r289
%r361 = select <8 x i1> %p360, <8 x i32> %r358, <8 x i32> %r356
%p362 = icmp ne <8 x i32> %r361, %r289
%p363 = and <8 x i1> %p359, %p362
%p364 = and <8 x i1> %p363, %p285
%f365 = bitcast <8 x float> %f350 to <8 x float>
%rd366_p1 = getelementptr inbounds i8*, i8** %params, i32 38
%rd366 = load i8*, i8** %rd366_p1, align 8, !alias.scope !2
%rd367_p1 = getelementptr inbounds i8*, i8** %params, i32 39
%rd367 = load i8*, i8** %rd367_p1, align 8, !alias.scope !2
br label %l368_start
l368_start:
; VCall: mitsuba::Shape::compute_surface_interaction()
; - target 1 = @func_bac3cacf8aebcc87b13f4c43e6cdd822;
; - target 2 = @func_570d1d9103e40ae9598d8f351c2e5c2a;
; - target 3 = @func_e17fd895e12cc8bec8c4e55075a069b3;
; - target 4 = @func_dcfca7b5b523c613a93412a4c52b1f7a;
%u368_self_ptr_0 = bitcast i8* %rd366 to i64*
%u368_self_ptr = getelementptr i64, i64* %u368_self_ptr_0, <8 x i32> %r361
%u368_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u368_self_ptr, i32 8, <8 x i1> %p364, <8 x i64> zeroinitializer)
%u368_self_initial = trunc <8 x i64> %u368_self_combined to <8 x i32>
%u368_offset_1 = lshr <8 x i64> %u368_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u368_offset = trunc <8 x i64> %u368_offset_1 to <8 x i32>
%u368_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u368_in_0_1 = bitcast i8* %u368_in_0_0 to <8 x float> *
store <8 x float> %f262, <8 x float>* %u368_in_0_1, align 32
%u368_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u368_in_1_1 = bitcast i8* %u368_in_1_0 to <8 x float> *
store <8 x float> %f263, <8 x float>* %u368_in_1_1, align 32
%u368_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u368_in_2_1 = bitcast i8* %u368_in_2_0 to <8 x float> *
store <8 x float> %f264, <8 x float>* %u368_in_2_1, align 32
%u368_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u368_in_3_1 = bitcast i8* %u368_in_3_0 to <8 x float> *
store <8 x float> %f265, <8 x float>* %u368_in_3_1, align 32
%u368_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u368_in_4_1 = bitcast i8* %u368_in_4_0 to <8 x float> *
store <8 x float> %f266, <8 x float>* %u368_in_4_1, align 32
%u368_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u368_in_5_1 = bitcast i8* %u368_in_5_0 to <8 x float> *
store <8 x float> %f267, <8 x float>* %u368_in_5_1, align 32
%u368_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u368_in_6_1 = bitcast i8* %u368_in_6_0 to <8 x float> *
store <8 x float> %f350, <8 x float>* %u368_in_6_1, align 32
%u368_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224
%u368_in_7_1 = bitcast i8* %u368_in_7_0 to <8 x float> *
store <8 x float> %f344, <8 x float>* %u368_in_7_1, align 32
%u368_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256
%u368_in_8_1 = bitcast i8* %u368_in_8_0 to <8 x float> *
store <8 x float> %f345, <8 x float>* %u368_in_8_1, align 32
%u368_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u368_in_9_1 = bitcast i8* %u368_in_9_0 to <8 x i32> *
store <8 x i32> %r346, <8 x i32>* %u368_in_9_1, align 32
%u368_out = getelementptr i8, i8* %buffer, i32 320
%u368_tmp_0_0 = getelementptr inbounds i8, i8* %u368_out, i64 0
%u368_tmp_0_1 = bitcast i8* %u368_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_0_1, align 32
%u368_tmp_2_0 = getelementptr inbounds i8, i8* %u368_out, i64 32
%u368_tmp_2_1 = bitcast i8* %u368_tmp_2_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_2_1, align 32
%u368_tmp_3_0 = getelementptr inbounds i8, i8* %u368_out, i64 64
%u368_tmp_3_1 = bitcast i8* %u368_tmp_3_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_3_1, align 32
%u368_tmp_4_0 = getelementptr inbounds i8, i8* %u368_out, i64 96
%u368_tmp_4_1 = bitcast i8* %u368_tmp_4_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_4_1, align 32
%u368_tmp_5_0 = getelementptr inbounds i8, i8* %u368_out, i64 128
%u368_tmp_5_1 = bitcast i8* %u368_tmp_5_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_5_1, align 32
%u368_tmp_6_0 = getelementptr inbounds i8, i8* %u368_out, i64 160
%u368_tmp_6_1 = bitcast i8* %u368_tmp_6_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_6_1, align 32
%u368_tmp_7_0 = getelementptr inbounds i8, i8* %u368_out, i64 192
%u368_tmp_7_1 = bitcast i8* %u368_tmp_7_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_7_1, align 32
%u368_tmp_8_0 = getelementptr inbounds i8, i8* %u368_out, i64 224
%u368_tmp_8_1 = bitcast i8* %u368_tmp_8_0 to <8 x i32> *
store <8 x i32> zeroinitializer, <8 x i32>* %u368_tmp_8_1, align 32
%u368_tmp_13_0 = getelementptr inbounds i8, i8* %u368_out, i64 256
%u368_tmp_13_1 = bitcast i8* %u368_tmp_13_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_13_1, align 32
%u368_tmp_14_0 = getelementptr inbounds i8, i8* %u368_out, i64 288
%u368_tmp_14_1 = bitcast i8* %u368_tmp_14_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_14_1, align 32
%u368_tmp_15_0 = getelementptr inbounds i8, i8* %u368_out, i64 320
%u368_tmp_15_1 = bitcast i8* %u368_tmp_15_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_15_1, align 32
%u368_tmp_16_0 = getelementptr inbounds i8, i8* %u368_out, i64 352
%u368_tmp_16_1 = bitcast i8* %u368_tmp_16_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_16_1, align 32
%u368_tmp_17_0 = getelementptr inbounds i8, i8* %u368_out, i64 384
%u368_tmp_17_1 = bitcast i8* %u368_tmp_17_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u368_tmp_17_1, align 32
br label %l368_check
l368_check:
%u368_self = phi <8 x i32> [ %u368_self_initial, %l368_start ], [ %u368_self_next, %l368_call ]
%u368_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u368_self)
%u368_valid = icmp ne i32 %u368_next, 0
br i1 %u368_valid, label %l368_call, label %l368_end
l368_call:
%u368_bcast_0 = insertelement <8 x i32> undef, i32 %u368_next, i32 0
%u368_bcast = shufflevector <8 x i32> %u368_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u368_active = icmp eq <8 x i32> %u368_self, %u368_bcast
%u368_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u368_next
%u368_func_1 = load i8*, i8** %u368_func_0
%u368_func = bitcast i8* %u368_func_1 to void (<8 x i1>, <8 x i32>, i8*, i8*, <8 x i32>)*
call void %u368_func(<8 x i1> %u368_active, <8 x i32> %r361, i8* %buffer, i8* %rd367, <8 x i32> %u368_offset)
%u368_self_next = select <8 x i1> %u368_active, <8 x i32> zeroinitializer, <8 x i32> %u368_self
br label %l368_check
l368_end:
%u368_out_0_0 = getelementptr inbounds i8, i8* %u368_out, i64 0
%u368_out_0_1 = bitcast i8* %u368_out_0_0 to <8 x float> *
%f381 = load <8 x float>, <8 x float>* %u368_out_0_1, align 32
%u368_out_2_0 = getelementptr inbounds i8, i8* %u368_out, i64 32
%u368_out_2_1 = bitcast i8* %u368_out_2_0 to <8 x float> *
%f370 = load <8 x float>, <8 x float>* %u368_out_2_1, align 32
%u368_out_3_0 = getelementptr inbounds i8, i8* %u368_out, i64 64
%u368_out_3_1 = bitcast i8* %u368_out_3_0 to <8 x float> *
%f371 = load <8 x float>, <8 x float>* %u368_out_3_1, align 32
%u368_out_4_0 = getelementptr inbounds i8, i8* %u368_out, i64 96
%u368_out_4_1 = bitcast i8* %u368_out_4_0 to <8 x float> *
%f372 = load <8 x float>, <8 x float>* %u368_out_4_1, align 32
%u368_out_5_0 = getelementptr inbounds i8, i8* %u368_out, i64 128
%u368_out_5_1 = bitcast i8* %u368_out_5_0 to <8 x float> *
%f373 = load <8 x float>, <8 x float>* %u368_out_5_1, align 32
%u368_out_6_0 = getelementptr inbounds i8, i8* %u368_out, i64 160
%u368_out_6_1 = bitcast i8* %u368_out_6_0 to <8 x float> *
%f374 = load <8 x float>, <8 x float>* %u368_out_6_1, align 32
%u368_out_7_0 = getelementptr inbounds i8, i8* %u368_out, i64 192
%u368_out_7_1 = bitcast i8* %u368_out_7_0 to <8 x float> *
%f375 = load <8 x float>, <8 x float>* %u368_out_7_1, align 32
%u368_out_8_0 = getelementptr inbounds i8, i8* %u368_out, i64 224
%u368_out_8_1 = bitcast i8* %u368_out_8_0 to <8 x i32> *
%r376 = load <8 x i32>, <8 x i32>* %u368_out_8_1, align 32
%u368_out_13_0 = getelementptr inbounds i8, i8* %u368_out, i64 256
%u368_out_13_1 = bitcast i8* %u368_out_13_0 to <8 x float> *
%f382 = load <8 x float>, <8 x float>* %u368_out_13_1, align 32
%u368_out_14_0 = getelementptr inbounds i8, i8* %u368_out, i64 288
%u368_out_14_1 = bitcast i8* %u368_out_14_0 to <8 x float> *
%f380 = load <8 x float>, <8 x float>* %u368_out_14_1, align 32
%u368_out_15_0 = getelementptr inbounds i8, i8* %u368_out, i64 320
%u368_out_15_1 = bitcast i8* %u368_out_15_0 to <8 x float> *
%f379 = load <8 x float>, <8 x float>* %u368_out_15_1, align 32
%u368_out_16_0 = getelementptr inbounds i8, i8* %u368_out, i64 352
%u368_out_16_1 = bitcast i8* %u368_out_16_0 to <8 x float> *
%f378 = load <8 x float>, <8 x float>* %u368_out_16_1, align 32
%u368_out_17_0 = getelementptr inbounds i8, i8* %u368_out, i64 384
%u368_out_17_1 = bitcast i8* %u368_out_17_0 to <8 x float> *
%f377 = load <8 x float>, <8 x float>* %u368_out_17_1, align 32
br label %l368_done
l368_done:
%f369 = select <8 x i1> %p364, <8 x float> %f365, <8 x float> zeroinitializer
%p383 = xor <8 x i1> %p359, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%f384 = select <8 x i1> %p383, <8 x float> %f294, <8 x float> %f369
%p385 = fcmp one <8 x float> %f384, %f294
%p386 = and <8 x i1> %p359, %p385
%p387 = xor <8 x i1> %p386, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r388 = select <8 x i1> %p387, <8 x i32> %r289, <8 x i32> %r376
%f389 = fmul <8 x float> %f377, %f380
%f390 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f381, <8 x float> %f389)
%f391 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f382, <8 x float> %f390)
%f392 = fneg <8 x float> %f391
%f393 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f392, <8 x float> %f380)
%f394 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f392, <8 x float> %f381)
%f395 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f392, <8 x float> %f382)
%f396 = fmul <8 x float> %f393, %f393
%f397 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f394, <8 x float> %f394, <8 x float> %f396)
%f398 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f395, <8 x float> %f395, <8 x float> %f397)
%f399 = fdiv <8 x float> %f297, %f398
%f400 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f399)
%f401 = fmul <8 x float> %f393, %f400
%f402 = fmul <8 x float> %f394, %f400
%f403 = fmul <8 x float> %f395, %f400
%p404 = fcmp oeq <8 x float> %f380, %f290
%p405 = fcmp oeq <8 x float> %f381, %f290
%p406 = fcmp oeq <8 x float> %f382, %f290
%p407 = and <8 x i1> %p404, %p405
%p408 = and <8 x i1> %p407, %p406
%p409 = fcmp oge <8 x float> %f379, %f290
%f410 = select <8 x i1> %p409, <8 x float> %f297, <8 x float> %f298
%f411 = fadd <8 x float> %f410, %f379
%f412 = fdiv <8 x float> %f297, %f411
%f413 = fneg <8 x float> %f412
%f414 = fmul <8 x float> %f377, %f378
%f415 = fmul <8 x float> %f414, %f413
%f416 = fmul <8 x float> %f377, %f377
%f417 = fmul <8 x float> %f416, %f413
%f418 = fneg <8 x float> %f417
%f419 = select <8 x i1> %p409, <8 x float> %f417, <8 x float> %f418
%f420 = fadd <8 x float> %f419, %f297
%f421 = fneg <8 x float> %f415
%f422 = select <8 x i1> %p409, <8 x float> %f415, <8 x float> %f421
%f423 = fneg <8 x float> %f377
%f424 = select <8 x i1> %p409, <8 x float> %f423, <8 x float> %f377
%f425 = select <8 x i1> %p408, <8 x float> %f420, <8 x float> %f401
%f426 = select <8 x i1> %p408, <8 x float> %f422, <8 x float> %f402
%f427 = select <8 x i1> %p408, <8 x float> %f424, <8 x float> %f403
%f428 = fmul <8 x float> %f379, %f426
%f429 = fmul <8 x float> %f377, %f427
%f430 = fmul <8 x float> %f378, %f425
%f431 = fneg <8 x float> %f428
%f432 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f427, <8 x float> %f431)
%f433 = fneg <8 x float> %f429
%f434 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f425, <8 x float> %f433)
%f435 = fneg <8 x float> %f430
%f436 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f426, <8 x float> %f435)
%f437 = fmul <8 x float> %f299, %f425
%f438 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f426, <8 x float> %f437)
%f439 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f427, <8 x float> %f438)
%f440 = fmul <8 x float> %f299, %f432
%f441 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f434, <8 x float> %f440)
%f442 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f436, <8 x float> %f441)
%f443 = fmul <8 x float> %f299, %f377
%f444 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f300, <8 x float> %f378, <8 x float> %f443)
%f445 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f301, <8 x float> %f379, <8 x float> %f444)
%f446 = select <8 x i1> %p386, <8 x float> %f439, <8 x float> %f299
%f447 = select <8 x i1> %p386, <8 x float> %f442, <8 x float> %f300
%f448 = select <8 x i1> %p386, <8 x float> %f445, <8 x float> %f301
%p449 = and <8 x i1> %p385, %p282
%p450 = icmp ne <8 x i32> %r388, %r289
%f451 = fsub <8 x float> %f370, %f277
%f452 = fsub <8 x float> %f371, %f278
%f453 = fsub <8 x float> %f372, %f279
%f454 = fmul <8 x float> %f451, %f451
%f455 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f452, <8 x float> %f452, <8 x float> %f454)
%f456 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f453, <8 x float> %f453, <8 x float> %f455)
%f457 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f456)
%f458 = fdiv <8 x float> %f297, %f457
%f459 = fmul <8 x float> %f451, %f458
%f460 = fmul <8 x float> %f452, %f458
%f461 = fmul <8 x float> %f453, %f458
%f462 = fneg <8 x float> %f446
%f463 = fneg <8 x float> %f447
%f464 = fneg <8 x float> %f448
%f465 = select <8 x i1> %p385, <8 x float> %f459, <8 x float> %f462
%f466 = select <8 x i1> %p385, <8 x float> %f460, <8 x float> %f463
%f467 = select <8 x i1> %p385, <8 x float> %f461, <8 x float> %f464
%p468 = and <8 x i1> %p450, %p285
%rd469_p1 = getelementptr inbounds i8*, i8** %params, i32 40
%rd469 = load i8*, i8** %rd469_p1, align 8, !alias.scope !2
%r470_0 = bitcast i8* %rd469 to i32*
%r470_1 = getelementptr i32, i32* %r470_0, <8 x i32> %r388
%r470 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r470_1, i32 4, <8 x i1> %p468, <8 x i32> zeroinitializer)
%p471 = icmp ne <8 x i32> %r470, %r289
%p472 = and <8 x i1> %p302, %p471
%p473 = and <8 x i1> %p472, %p285
%rd474_p1 = getelementptr inbounds i8*, i8** %params, i32 41
%rd474 = load i8*, i8** %rd474_p1, align 8, !alias.scope !2
%rd475_p1 = getelementptr inbounds i8*, i8** %params, i32 42
%rd475 = load i8*, i8** %rd475_p1, align 8, !alias.scope !2
br label %l476_start
l476_start:
; VCall: mitsuba::Emitter::pdf_direction()
; - target 1 = @func_bf888b7a279d5208759c322da4df8aa9;
%u476_self_ptr_0 = bitcast i8* %rd474 to i64*
%u476_self_ptr = getelementptr i64, i64* %u476_self_ptr_0, <8 x i32> %r470
%u476_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u476_self_ptr, i32 8, <8 x i1> %p473, <8 x i64> zeroinitializer)
%u476_self_initial = trunc <8 x i64> %u476_self_combined to <8 x i32>
%u476_offset_1 = lshr <8 x i64> %u476_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u476_offset = trunc <8 x i64> %u476_offset_1 to <8 x i32>
%u476_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u476_in_0_1 = bitcast i8* %u476_in_0_0 to <8 x float> *
store <8 x float> %f377, <8 x float>* %u476_in_0_1, align 32
%u476_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u476_in_1_1 = bitcast i8* %u476_in_1_0 to <8 x float> *
store <8 x float> %f378, <8 x float>* %u476_in_1_1, align 32
%u476_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u476_in_2_1 = bitcast i8* %u476_in_2_0 to <8 x float> *
store <8 x float> %f379, <8 x float>* %u476_in_2_1, align 32
%u476_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u476_in_3_1 = bitcast i8* %u476_in_3_0 to <8 x float> *
store <8 x float> %f465, <8 x float>* %u476_in_3_1, align 32
%u476_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u476_in_4_1 = bitcast i8* %u476_in_4_0 to <8 x float> *
store <8 x float> %f466, <8 x float>* %u476_in_4_1, align 32
%u476_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u476_in_5_1 = bitcast i8* %u476_in_5_0 to <8 x float> *
store <8 x float> %f467, <8 x float>* %u476_in_5_1, align 32
%u476_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u476_in_6_1 = bitcast i8* %u476_in_6_0 to <8 x float> *
store <8 x float> %f457, <8 x float>* %u476_in_6_1, align 32
%u476_out = getelementptr i8, i8* %buffer, i32 224
%u476_tmp_0_0 = getelementptr inbounds i8, i8* %u476_out, i64 0
%u476_tmp_0_1 = bitcast i8* %u476_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u476_tmp_0_1, align 32
br label %l476_check
l476_check:
%u476_self = phi <8 x i32> [ %u476_self_initial, %l476_start ], [ %u476_self_next, %l476_call ]
%u476_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u476_self)
%u476_valid = icmp ne i32 %u476_next, 0
br i1 %u476_valid, label %l476_call, label %l476_end
l476_call:
%u476_bcast_0 = insertelement <8 x i32> undef, i32 %u476_next, i32 0
%u476_bcast = shufflevector <8 x i32> %u476_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u476_active = icmp eq <8 x i32> %u476_self, %u476_bcast
%u476_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u476_next
%u476_func_1 = load i8*, i8** %u476_func_0
%u476_func = bitcast i8* %u476_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)*
call void %u476_func(<8 x i1> %u476_active, i8* %buffer, i8* %rd475, <8 x i32> %u476_offset)
%u476_self_next = select <8 x i1> %u476_active, <8 x i32> zeroinitializer, <8 x i32> %u476_self
br label %l476_check
l476_end:
%u476_out_0_0 = getelementptr inbounds i8, i8* %u476_out, i64 0
%u476_out_0_1 = bitcast i8* %u476_out_0_0 to <8 x float> *
%f477 = load <8 x float>, <8 x float>* %u476_out_0_1, align 32
br label %l476_done
l476_done:
%f478 = fmul <8 x float> %f477, %f477
%f479 = fadd <8 x float> %f303, %f478
%f480 = fdiv <8 x float> %f303, %f479
%f481 = select <8 x i1> %p304, <8 x float> %f480, <8 x float> zeroinitializer
%p482 = and <8 x i1> %p304, %p471
%p483 = and <8 x i1> %p482, %p285
%rd484_p1 = getelementptr inbounds i8*, i8** %params, i32 43
%rd484 = load i8*, i8** %rd484_p1, align 8, !alias.scope !2
%rd485_p1 = getelementptr inbounds i8*, i8** %params, i32 44
%rd485 = load i8*, i8** %rd485_p1, align 8, !alias.scope !2
br label %l486_start
l486_start:
; VCall: mitsuba::Emitter::eval()
; - target 1 = @func_972f6c492a7ec6284cf60b520b91aba3;
%u486_self_ptr_0 = bitcast i8* %rd484 to i64*
%u486_self_ptr = getelementptr i64, i64* %u486_self_ptr_0, <8 x i32> %r470
%u486_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u486_self_ptr, i32 8, <8 x i1> %p483, <8 x i64> zeroinitializer)
%u486_self_initial = trunc <8 x i64> %u486_self_combined to <8 x i32>
%u486_offset_1 = lshr <8 x i64> %u486_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u486_offset = trunc <8 x i64> %u486_offset_1 to <8 x i32>
%u486_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u486_in_0_1 = bitcast i8* %u486_in_0_0 to <8 x float> *
store <8 x float> %f377, <8 x float>* %u486_in_0_1, align 32
%u486_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u486_in_1_1 = bitcast i8* %u486_in_1_0 to <8 x float> *
store <8 x float> %f378, <8 x float>* %u486_in_1_1, align 32
%u486_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u486_in_2_1 = bitcast i8* %u486_in_2_0 to <8 x float> *
store <8 x float> %f379, <8 x float>* %u486_in_2_1, align 32
%u486_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u486_in_3_1 = bitcast i8* %u486_in_3_0 to <8 x float> *
store <8 x float> %f448, <8 x float>* %u486_in_3_1, align 32
%u486_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u486_in_4_1 = bitcast i8* %u486_in_4_0 to <8 x i32> *
store <8 x i32> %r346, <8 x i32>* %u486_in_4_1, align 32
%u486_out = getelementptr i8, i8* %buffer, i32 160
%u486_tmp_0_0 = getelementptr inbounds i8, i8* %u486_out, i64 0
%u486_tmp_0_1 = bitcast i8* %u486_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_0_1, align 32
%u486_tmp_1_0 = getelementptr inbounds i8, i8* %u486_out, i64 32
%u486_tmp_1_1 = bitcast i8* %u486_tmp_1_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_1_1, align 32
%u486_tmp_2_0 = getelementptr inbounds i8, i8* %u486_out, i64 64
%u486_tmp_2_1 = bitcast i8* %u486_tmp_2_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u486_tmp_2_1, align 32
br label %l486_check
l486_check:
%u486_self = phi <8 x i32> [ %u486_self_initial, %l486_start ], [ %u486_self_next, %l486_call ]
%u486_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u486_self)
%u486_valid = icmp ne i32 %u486_next, 0
br i1 %u486_valid, label %l486_call, label %l486_end
l486_call:
%u486_bcast_0 = insertelement <8 x i32> undef, i32 %u486_next, i32 0
%u486_bcast = shufflevector <8 x i32> %u486_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u486_active = icmp eq <8 x i32> %u486_self, %u486_bcast
%u486_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u486_next
%u486_func_1 = load i8*, i8** %u486_func_0
%u486_func = bitcast i8* %u486_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)*
call void %u486_func(<8 x i1> %u486_active, i8* %buffer, i8* %rd485, <8 x i32> %u486_offset)
%u486_self_next = select <8 x i1> %u486_active, <8 x i32> zeroinitializer, <8 x i32> %u486_self
br label %l486_check
l486_end:
%u486_out_0_0 = getelementptr inbounds i8, i8* %u486_out, i64 0
%u486_out_0_1 = bitcast i8* %u486_out_0_0 to <8 x float> *
%f487 = load <8 x float>, <8 x float>* %u486_out_0_1, align 32
%u486_out_1_0 = getelementptr inbounds i8, i8* %u486_out, i64 32
%u486_out_1_1 = bitcast i8* %u486_out_1_0 to <8 x float> *
%f488 = load <8 x float>, <8 x float>* %u486_out_1_1, align 32
%u486_out_2_0 = getelementptr inbounds i8, i8* %u486_out, i64 64
%u486_out_2_1 = bitcast i8* %u486_out_2_0 to <8 x float> *
%f489 = load <8 x float>, <8 x float>* %u486_out_2_1, align 32
br label %l486_done
l486_done:
%f490 = fmul <8 x float> %f487, %f481
%f491 = fmul <8 x float> %f488, %f481
%f492 = fmul <8 x float> %f489, %f481
%f493 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f269, <8 x float> %f490, <8 x float> %f272)
%f494 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f270, <8 x float> %f491, <8 x float> %f273)
%f495 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f271, <8 x float> %f492, <8 x float> %f274)
%p496 = and <8 x i1> %p308, %p385
%rd497_p1 = getelementptr inbounds i8*, i8** %params, i32 45
%rd497 = load i8*, i8** %rd497_p1, align 8, !alias.scope !2
%r498_0 = bitcast i8* %rd497 to i32*
%r498_1 = getelementptr i32, i32* %r498_0, <8 x i32> %r388
%r498 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r498_1, i32 4, <8 x i1> %p468, <8 x i32> zeroinitializer)
%p499 = icmp ne <8 x i32> %r498, %r289
%p500 = and <8 x i1> %p499, %p285
%rd501_p1 = getelementptr inbounds i8*, i8** %params, i32 46
%rd501 = load i8*, i8** %rd501_p1, align 8, !alias.scope !2
%r502_0 = bitcast i8* %rd501 to i32*
%r502_1 = getelementptr i32, i32* %r502_0, <8 x i32> %r498
%r502 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r502_1, i32 4, <8 x i1> %p500, <8 x i32> zeroinitializer)
%r503 = and <8 x i32> %r502, %r309
%p504 = icmp ne <8 x i32> %r503, %r289
%p505 = and <8 x i1> %p496, %p504
%rd506 = select <8 x i1> %p505, <8 x i64> %rd311, <8 x i64> %rd261
%rd507_0 = mul <8 x i64> %rd506, %rd310
%rd507 = add <8 x i64> %rd507_0, %rd112
%rd508 = select <8 x i1> %p505, <8 x i64> %rd507, <8 x i64> %rd506
%rd509 = lshr <8 x i64> %rd506, %rd312
%rd510 = xor <8 x i64> %rd509, %rd506
%rd511 = lshr <8 x i64> %rd510, %rd315
%r512 = trunc <8 x i64> %rd511 to <8 x i32>
%rd513 = lshr <8 x i64> %rd506, %rd318
%r514 = trunc <8 x i64> %rd513 to <8 x i32>
%r515 = lshr <8 x i32> %r512, %r514
%r516 = bitcast <8 x i32> %r514 to <8 x i32>
%r517 = sub <8 x i32> zeroinitializer, %r516
%r518 = and <8 x i32> %r517, %r324
%r519 = bitcast <8 x i32> %r518 to <8 x i32>
%r520 = shl <8 x i32> %r512, %r519
%r521 = or <8 x i32> %r515, %r520
%r522 = lshr <8 x i32> %r521, %r329
%r523 = or <8 x i32> %r522, %r331
%f524 = bitcast <8 x i32> %r523 to <8 x float>
%f525 = fsub <8 x float> %f524, %f297
%p526 = and <8 x i1> %p505, %p285
%r527_0 = bitcast i8* %rd335 to i32*
%r527_1 = getelementptr i32, i32* %r527_0, <8 x i32> %r289
%r527 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r527_1, i32 4, <8 x i1> %p526, <8 x i32> zeroinitializer)
%p528 = icmp ne <8 x i32> %r527, %r289
%p529 = and <8 x i1> %p505, %p528
%p530 = and <8 x i1> %p529, %p285
%rd531_p1 = getelementptr inbounds i8*, i8** %params, i32 47
%rd531 = load i8*, i8** %rd531_p1, align 8, !alias.scope !2
%rd532_p1 = getelementptr inbounds i8*, i8** %params, i32 48
%rd532 = load i8*, i8** %rd532_p1, align 8, !alias.scope !2
br label %l533_start
l533_start:
; VCall: mitsuba::Emitter::sample_direction()
; - target 1 = @func_19ee132d0bd6c9e577b7250c22c38555;
%u533_self_ptr_0 = bitcast i8* %rd531 to i64*
%u533_self_ptr = getelementptr i64, i64* %u533_self_ptr_0, <8 x i32> %r527
%u533_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u533_self_ptr, i32 8, <8 x i1> %p530, <8 x i64> zeroinitializer)
%u533_self_initial = trunc <8 x i64> %u533_self_combined to <8 x i32>
%u533_offset_1 = lshr <8 x i64> %u533_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u533_offset = trunc <8 x i64> %u533_offset_1 to <8 x i32>
%u533_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u533_in_0_1 = bitcast i8* %u533_in_0_0 to <8 x float> *
store <8 x float> %f370, <8 x float>* %u533_in_0_1, align 32
%u533_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u533_in_1_1 = bitcast i8* %u533_in_1_0 to <8 x float> *
store <8 x float> %f371, <8 x float>* %u533_in_1_1, align 32
%u533_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u533_in_2_1 = bitcast i8* %u533_in_2_0 to <8 x float> *
store <8 x float> %f372, <8 x float>* %u533_in_2_1, align 32
%u533_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u533_in_3_1 = bitcast i8* %u533_in_3_0 to <8 x float> *
store <8 x float> %f334, <8 x float>* %u533_in_3_1, align 32
%u533_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u533_in_4_1 = bitcast i8* %u533_in_4_0 to <8 x float> *
store <8 x float> %f525, <8 x float>* %u533_in_4_1, align 32
%u533_out = getelementptr i8, i8* %buffer, i32 160
%u533_tmp_0_0 = getelementptr inbounds i8, i8* %u533_out, i64 0
%u533_tmp_0_1 = bitcast i8* %u533_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_0_1, align 32
%u533_tmp_1_0 = getelementptr inbounds i8, i8* %u533_out, i64 32
%u533_tmp_1_1 = bitcast i8* %u533_tmp_1_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_1_1, align 32
%u533_tmp_2_0 = getelementptr inbounds i8, i8* %u533_out, i64 64
%u533_tmp_2_1 = bitcast i8* %u533_tmp_2_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_2_1, align 32
%u533_tmp_8_0 = getelementptr inbounds i8, i8* %u533_out, i64 96
%u533_tmp_8_1 = bitcast i8* %u533_tmp_8_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_8_1, align 32
%u533_tmp_9_0 = getelementptr inbounds i8, i8* %u533_out, i64 128
%u533_tmp_9_1 = bitcast i8* %u533_tmp_9_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_9_1, align 32
%u533_tmp_10_0 = getelementptr inbounds i8, i8* %u533_out, i64 160
%u533_tmp_10_1 = bitcast i8* %u533_tmp_10_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_10_1, align 32
%u533_tmp_11_0 = getelementptr inbounds i8, i8* %u533_out, i64 192
%u533_tmp_11_1 = bitcast i8* %u533_tmp_11_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_11_1, align 32
%u533_tmp_14_0 = getelementptr inbounds i8, i8* %u533_out, i64 224
%u533_tmp_14_1 = bitcast i8* %u533_tmp_14_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_14_1, align 32
%u533_tmp_15_0 = getelementptr inbounds i8, i8* %u533_out, i64 256
%u533_tmp_15_1 = bitcast i8* %u533_tmp_15_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_15_1, align 32
%u533_tmp_16_0 = getelementptr inbounds i8, i8* %u533_out, i64 288
%u533_tmp_16_1 = bitcast i8* %u533_tmp_16_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u533_tmp_16_1, align 32
br label %l533_check
l533_check:
%u533_self = phi <8 x i32> [ %u533_self_initial, %l533_start ], [ %u533_self_next, %l533_call ]
%u533_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u533_self)
%u533_valid = icmp ne i32 %u533_next, 0
br i1 %u533_valid, label %l533_call, label %l533_end
l533_call:
%u533_bcast_0 = insertelement <8 x i32> undef, i32 %u533_next, i32 0
%u533_bcast = shufflevector <8 x i32> %u533_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u533_active = icmp eq <8 x i32> %u533_self, %u533_bcast
%u533_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u533_next
%u533_func_1 = load i8*, i8** %u533_func_0
%u533_func = bitcast i8* %u533_func_1 to void (<8 x i1>, <8 x i32>, i8*, i8*, <8 x i32>)*
call void %u533_func(<8 x i1> %u533_active, <8 x i32> %r527, i8* %buffer, i8* %rd532, <8 x i32> %u533_offset)
%u533_self_next = select <8 x i1> %u533_active, <8 x i32> zeroinitializer, <8 x i32> %u533_self
br label %l533_check
l533_end:
%u533_out_0_0 = getelementptr inbounds i8, i8* %u533_out, i64 0
%u533_out_0_1 = bitcast i8* %u533_out_0_0 to <8 x float> *
%f534 = load <8 x float>, <8 x float>* %u533_out_0_1, align 32
%u533_out_1_0 = getelementptr inbounds i8, i8* %u533_out, i64 32
%u533_out_1_1 = bitcast i8* %u533_out_1_0 to <8 x float> *
%f535 = load <8 x float>, <8 x float>* %u533_out_1_1, align 32
%u533_out_2_0 = getelementptr inbounds i8, i8* %u533_out, i64 64
%u533_out_2_1 = bitcast i8* %u533_out_2_0 to <8 x float> *
%f536 = load <8 x float>, <8 x float>* %u533_out_2_1, align 32
%u533_out_8_0 = getelementptr inbounds i8, i8* %u533_out, i64 96
%u533_out_8_1 = bitcast i8* %u533_out_8_0 to <8 x float> *
%f537 = load <8 x float>, <8 x float>* %u533_out_8_1, align 32
%u533_out_9_0 = getelementptr inbounds i8, i8* %u533_out, i64 128
%u533_out_9_1 = bitcast i8* %u533_out_9_0 to <8 x float> *
%f538 = load <8 x float>, <8 x float>* %u533_out_9_1, align 32
%u533_out_10_0 = getelementptr inbounds i8, i8* %u533_out, i64 160
%u533_out_10_1 = bitcast i8* %u533_out_10_0 to <8 x float> *
%f539 = load <8 x float>, <8 x float>* %u533_out_10_1, align 32
%u533_out_11_0 = getelementptr inbounds i8, i8* %u533_out, i64 192
%u533_out_11_1 = bitcast i8* %u533_out_11_0 to <8 x float> *
%f540 = load <8 x float>, <8 x float>* %u533_out_11_1, align 32
%u533_out_14_0 = getelementptr inbounds i8, i8* %u533_out, i64 224
%u533_out_14_1 = bitcast i8* %u533_out_14_0 to <8 x float> *
%f541 = load <8 x float>, <8 x float>* %u533_out_14_1, align 32
%u533_out_15_0 = getelementptr inbounds i8, i8* %u533_out, i64 256
%u533_out_15_1 = bitcast i8* %u533_out_15_0 to <8 x float> *
%f542 = load <8 x float>, <8 x float>* %u533_out_15_1, align 32
%u533_out_16_0 = getelementptr inbounds i8, i8* %u533_out, i64 288
%u533_out_16_1 = bitcast i8* %u533_out_16_0 to <8 x float> *
%f543 = load <8 x float>, <8 x float>* %u533_out_16_1, align 32
br label %l533_done
l533_done:
%p544 = fcmp one <8 x float> %f537, %f290
%p545 = and <8 x i1> %p505, %p544
%f546 = fsub <8 x float> %f534, %f370
%f547 = fsub <8 x float> %f535, %f371
%f548 = fsub <8 x float> %f536, %f372
%f549_0 = bitcast <8 x float> %f370 to <8 x i32>
%f549_1 = bitcast <8 x float> %f336 to <8 x i32>
%f549_2 = and <8 x i32> %f549_0, %f549_1
%f549 = bitcast <8 x i32> %f549_2 to <8 x float>
%f550_0 = bitcast <8 x float> %f371 to <8 x i32>
%f550_1 = bitcast <8 x float> %f336 to <8 x i32>
%f550_2 = and <8 x i32> %f550_0, %f550_1
%f550 = bitcast <8 x i32> %f550_2 to <8 x float>
%f551_0 = bitcast <8 x float> %f372 to <8 x i32>
%f551_1 = bitcast <8 x float> %f336 to <8 x i32>
%f551_2 = and <8 x i32> %f551_0, %f551_1
%f551 = bitcast <8 x i32> %f551_2 to <8 x float>
%f552 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f549, <8 x float> %f550)
%f553 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f552, <8 x float> %f551)
%f554 = fadd <8 x float> %f297, %f553
%f555 = fmul <8 x float> %f554, %f337
%f556 = fmul <8 x float> %f373, %f546
%f557 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f374, <8 x float> %f547, <8 x float> %f556)
%f558 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f375, <8 x float> %f548, <8 x float> %f557)
%p559 = fcmp oge <8 x float> %f558, %f290
%f560 = fneg <8 x float> %f555
%f561 = select <8 x i1> %p559, <8 x float> %f555, <8 x float> %f560
%f562 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f373, <8 x float> %f370)
%f563 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f374, <8 x float> %f371)
%f564 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f561, <8 x float> %f375, <8 x float> %f372)
%f565 = fsub <8 x float> %f534, %f562
%f566 = fsub <8 x float> %f535, %f563
%f567 = fsub <8 x float> %f536, %f564
%f568 = fmul <8 x float> %f565, %f565
%f569 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f566, <8 x float> %f566, <8 x float> %f568)
%f570 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f567, <8 x float> %f567, <8 x float> %f569)
%f571 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f570)
%f572 = fdiv <8 x float> %f297, %f571
%f573 = fmul <8 x float> %f565, %f572
%f574 = fmul <8 x float> %f566, %f572
%f575 = fmul <8 x float> %f567, %f572
%f576 = fmul <8 x float> %f571, %f338
%p577 = and <8 x i1> %p545, %p285
%r578 = select <8 x i1> %p577, <8 x i32> %r291, <8 x i32> zeroinitializer
; -------- Ray test -------
%u579_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u579_in_0_1 = bitcast i8* %u579_in_0_0 to <8 x i32> *
store <8 x i32> %r578, <8 x i32>* %u579_in_0_1, align 32
%u579_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u579_in_1_1 = bitcast i8* %u579_in_1_0 to <8 x float> *
store <8 x float> %f562, <8 x float>* %u579_in_1_1, align 32
%u579_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u579_in_2_1 = bitcast i8* %u579_in_2_0 to <8 x float> *
store <8 x float> %f563, <8 x float>* %u579_in_2_1, align 32
%u579_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u579_in_3_1 = bitcast i8* %u579_in_3_0 to <8 x float> *
store <8 x float> %f564, <8 x float>* %u579_in_3_1, align 32
%u579_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u579_in_4_1 = bitcast i8* %u579_in_4_0 to <8 x float> *
store <8 x float> %f290, <8 x float>* %u579_in_4_1, align 32
%u579_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u579_in_5_1 = bitcast i8* %u579_in_5_0 to <8 x float> *
store <8 x float> %f573, <8 x float>* %u579_in_5_1, align 32
%u579_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u579_in_6_1 = bitcast i8* %u579_in_6_0 to <8 x float> *
store <8 x float> %f574, <8 x float>* %u579_in_6_1, align 32
%u579_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224
%u579_in_7_1 = bitcast i8* %u579_in_7_0 to <8 x float> *
store <8 x float> %f575, <8 x float>* %u579_in_7_1, align 32
%u579_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256
%u579_in_8_1 = bitcast i8* %u579_in_8_0 to <8 x float> *
store <8 x float> %f229, <8 x float>* %u579_in_8_1, align 32
%u579_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u579_in_9_1 = bitcast i8* %u579_in_9_0 to <8 x float> *
store <8 x float> %f576, <8 x float>* %u579_in_9_1, align 32
%u579_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320
%u579_in_10_1 = bitcast i8* %u579_in_10_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u579_in_10_1, align 32
%u579_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352
%u579_in_11_1 = bitcast i8* %u579_in_11_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u579_in_11_1, align 32
%u579_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384
%u579_in_12_1 = bitcast i8* %u579_in_12_0 to <8 x i32> *
store <8 x i32> %r289, <8 x i32>* %u579_in_12_1, align 32
%u579_in_ctx_0 = getelementptr inbounds i8, i8* %buffer, i32 416
%u579_in_ctx_1 = bitcast i8* %u579_in_ctx_0 to <6 x i32> *
store <6 x i32> <i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0>, <6 x i32>* %u579_in_ctx_1, align 4
%u579_func = bitcast i8* %rd339 to void (i8*, i8*, i8*, i8*)*
call void %u579_func(i8* %u579_in_0_0, i8* %rd288, i8* %u579_in_ctx_0, i8* %u579_in_1_0)
%u579_out_0_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u579_out_0_1 = bitcast i8* %u579_out_0_0 to <8 x float> *
%u579_out_0 = load <8 x float>, <8 x float>* %u579_out_0_1, align 32
; -------------------
%f580 = bitcast <8 x float> %u579_out_0 to <8 x float>
%p581 = fcmp one <8 x float> %f580, %f576
%p582 = and <8 x i1> %p545, %p581
%f583 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f541
%f584 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f542
%f585 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f543
%f586 = select <8 x i1> %p582, <8 x float> %f290, <8 x float> %f537
%p587 = fcmp one <8 x float> %f586, %f290
%p588 = and <8 x i1> %p505, %p587
%f589 = fmul <8 x float> %f538, %f377
%f590 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f539, <8 x float> %f378, <8 x float> %f589)
%f591 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f540, <8 x float> %f379, <8 x float> %f590)
%p592 = and <8 x i1> %p588, %p499
%p593 = and <8 x i1> %p592, %p285
%rd594_p1 = getelementptr inbounds i8*, i8** %params, i32 49
%rd594 = load i8*, i8** %rd594_p1, align 8, !alias.scope !2
%rd595_p1 = getelementptr inbounds i8*, i8** %params, i32 50
%rd595 = load i8*, i8** %rd595_p1, align 8, !alias.scope !2
br label %l596_start
l596_start:
; VCall: mitsuba::BSDF::eval_pdf()
; - target 1 = @func_e9d5179176fa4aaf3f8d2cb1ff4d2c84;
; - target 2 = @func_9d53124ccfd48ba20820a9ee3e480cd9;
; - target 3 = @func_4448b4670e928f754bf33368168c3f24;
%u596_self_ptr_0 = bitcast i8* %rd594 to i64*
%u596_self_ptr = getelementptr i64, i64* %u596_self_ptr_0, <8 x i32> %r498
%u596_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u596_self_ptr, i32 8, <8 x i1> %p593, <8 x i64> zeroinitializer)
%u596_self_initial = trunc <8 x i64> %u596_self_combined to <8 x i32>
%u596_offset_1 = lshr <8 x i64> %u596_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u596_offset = trunc <8 x i64> %u596_offset_1 to <8 x i32>
%u596_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u596_in_0_1 = bitcast i8* %u596_in_0_0 to <8 x float> *
store <8 x float> %f370, <8 x float>* %u596_in_0_1, align 32
%u596_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u596_in_1_1 = bitcast i8* %u596_in_1_0 to <8 x float> *
store <8 x float> %f371, <8 x float>* %u596_in_1_1, align 32
%u596_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u596_in_2_1 = bitcast i8* %u596_in_2_0 to <8 x float> *
store <8 x float> %f372, <8 x float>* %u596_in_2_1, align 32
%u596_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u596_in_3_1 = bitcast i8* %u596_in_3_0 to <8 x float> *
store <8 x float> %f377, <8 x float>* %u596_in_3_1, align 32
%u596_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u596_in_4_1 = bitcast i8* %u596_in_4_0 to <8 x float> *
store <8 x float> %f378, <8 x float>* %u596_in_4_1, align 32
%u596_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u596_in_5_1 = bitcast i8* %u596_in_5_0 to <8 x float> *
store <8 x float> %f379, <8 x float>* %u596_in_5_1, align 32
%u596_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u596_in_6_1 = bitcast i8* %u596_in_6_0 to <8 x float> *
store <8 x float> %f448, <8 x float>* %u596_in_6_1, align 32
%u596_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224
%u596_in_7_1 = bitcast i8* %u596_in_7_0 to <8 x i32> *
store <8 x i32> %r346, <8 x i32>* %u596_in_7_1, align 32
%u596_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256
%u596_in_8_1 = bitcast i8* %u596_in_8_0 to <8 x float> *
store <8 x float> %f591, <8 x float>* %u596_in_8_1, align 32
%u596_out = getelementptr i8, i8* %buffer, i32 288
%u596_tmp_0_0 = getelementptr inbounds i8, i8* %u596_out, i64 0
%u596_tmp_0_1 = bitcast i8* %u596_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_0_1, align 32
%u596_tmp_1_0 = getelementptr inbounds i8, i8* %u596_out, i64 32
%u596_tmp_1_1 = bitcast i8* %u596_tmp_1_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_1_1, align 32
%u596_tmp_2_0 = getelementptr inbounds i8, i8* %u596_out, i64 64
%u596_tmp_2_1 = bitcast i8* %u596_tmp_2_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_2_1, align 32
%u596_tmp_3_0 = getelementptr inbounds i8, i8* %u596_out, i64 96
%u596_tmp_3_1 = bitcast i8* %u596_tmp_3_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u596_tmp_3_1, align 32
br label %l596_check
l596_check:
%u596_self = phi <8 x i32> [ %u596_self_initial, %l596_start ], [ %u596_self_next, %l596_call ]
%u596_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u596_self)
%u596_valid = icmp ne i32 %u596_next, 0
br i1 %u596_valid, label %l596_call, label %l596_end
l596_call:
%u596_bcast_0 = insertelement <8 x i32> undef, i32 %u596_next, i32 0
%u596_bcast = shufflevector <8 x i32> %u596_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u596_active = icmp eq <8 x i32> %u596_self, %u596_bcast
%u596_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u596_next
%u596_func_1 = load i8*, i8** %u596_func_0
%u596_func = bitcast i8* %u596_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)*
call void %u596_func(<8 x i1> %u596_active, i8* %buffer, i8* %rd595, <8 x i32> %u596_offset)
%u596_self_next = select <8 x i1> %u596_active, <8 x i32> zeroinitializer, <8 x i32> %u596_self
br label %l596_check
l596_end:
%u596_out_0_0 = getelementptr inbounds i8, i8* %u596_out, i64 0
%u596_out_0_1 = bitcast i8* %u596_out_0_0 to <8 x float> *
%f597 = load <8 x float>, <8 x float>* %u596_out_0_1, align 32
%u596_out_1_0 = getelementptr inbounds i8, i8* %u596_out, i64 32
%u596_out_1_1 = bitcast i8* %u596_out_1_0 to <8 x float> *
%f598 = load <8 x float>, <8 x float>* %u596_out_1_1, align 32
%u596_out_2_0 = getelementptr inbounds i8, i8* %u596_out, i64 64
%u596_out_2_1 = bitcast i8* %u596_out_2_0 to <8 x float> *
%f599 = load <8 x float>, <8 x float>* %u596_out_2_1, align 32
%u596_out_3_0 = getelementptr inbounds i8, i8* %u596_out, i64 96
%u596_out_3_1 = bitcast i8* %u596_out_3_0 to <8 x float> *
%f600 = load <8 x float>, <8 x float>* %u596_out_3_1, align 32
br label %l596_done
l596_done:
%f601 = fmul <8 x float> %f586, %f586
%f602 = fmul <8 x float> %f600, %f600
%p603 = fcmp ogt <8 x float> %f601, %f290
%f604 = fadd <8 x float> %f601, %f602
%f605 = fdiv <8 x float> %f601, %f604
%f606 = select <8 x i1> %p603, <8 x float> %f605, <8 x float> zeroinitializer
%f607 = fmul <8 x float> %f583, %f597
%f608 = fmul <8 x float> %f584, %f598
%f609 = fmul <8 x float> %f585, %f599
%f610 = fmul <8 x float> %f607, %f606
%f611 = fmul <8 x float> %f608, %f606
%f612 = fmul <8 x float> %f609, %f606
%f613 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f269, <8 x float> %f610, <8 x float> %f493)
%f614 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f270, <8 x float> %f611, <8 x float> %f494)
%f615 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f271, <8 x float> %f612, <8 x float> %f495)
%f616 = select <8 x i1> %p588, <8 x float> %f613, <8 x float> %f493
%f617 = select <8 x i1> %p588, <8 x float> %f614, <8 x float> %f494
%f618 = select <8 x i1> %p588, <8 x float> %f615, <8 x float> %f495
%rd619_0 = mul <8 x i64> %rd508, %rd310
%rd619 = add <8 x i64> %rd619_0, %rd112
%rd620 = select <8 x i1> %p449, <8 x i64> %rd619, <8 x i64> %rd508
%rd621 = lshr <8 x i64> %rd508, %rd312
%rd622 = xor <8 x i64> %rd621, %rd508
%rd623 = lshr <8 x i64> %rd622, %rd315
%r624 = trunc <8 x i64> %rd623 to <8 x i32>
%rd625 = lshr <8 x i64> %rd508, %rd318
%r626 = trunc <8 x i64> %rd625 to <8 x i32>
%r627 = lshr <8 x i32> %r624, %r626
%r628 = bitcast <8 x i32> %r626 to <8 x i32>
%r629 = sub <8 x i32> zeroinitializer, %r628
%r630 = and <8 x i32> %r629, %r324
%r631 = bitcast <8 x i32> %r630 to <8 x i32>
%r632 = shl <8 x i32> %r624, %r631
%r633 = or <8 x i32> %r627, %r632
%r634 = lshr <8 x i32> %r633, %r329
%r635 = or <8 x i32> %r634, %r331
%f636 = bitcast <8 x i32> %r635 to <8 x float>
%f637 = fsub <8 x float> %f636, %f297
%rd638_0 = mul <8 x i64> %rd620, %rd310
%rd638 = add <8 x i64> %rd638_0, %rd112
%rd639 = select <8 x i1> %p449, <8 x i64> %rd638, <8 x i64> %rd620
%rd640 = lshr <8 x i64> %rd620, %rd312
%rd641 = xor <8 x i64> %rd640, %rd620
%rd642 = lshr <8 x i64> %rd641, %rd315
%r643 = trunc <8 x i64> %rd642 to <8 x i32>
%rd644 = lshr <8 x i64> %rd620, %rd318
%r645 = trunc <8 x i64> %rd644 to <8 x i32>
%r646 = lshr <8 x i32> %r643, %r645
%r647 = bitcast <8 x i32> %r645 to <8 x i32>
%r648 = sub <8 x i32> zeroinitializer, %r647
%r649 = and <8 x i32> %r648, %r324
%r650 = bitcast <8 x i32> %r649 to <8 x i32>
%r651 = shl <8 x i32> %r643, %r650
%r652 = or <8 x i32> %r646, %r651
%r653 = lshr <8 x i32> %r652, %r329
%r654 = or <8 x i32> %r653, %r331
%f655 = bitcast <8 x i32> %r654 to <8 x float>
%f656 = fsub <8 x float> %f655, %f297
%rd657_0 = mul <8 x i64> %rd639, %rd310
%rd657 = add <8 x i64> %rd657_0, %rd112
%rd658 = select <8 x i1> %p449, <8 x i64> %rd657, <8 x i64> %rd639
%rd659 = lshr <8 x i64> %rd639, %rd312
%rd660 = xor <8 x i64> %rd659, %rd639
%rd661 = lshr <8 x i64> %rd660, %rd315
%r662 = trunc <8 x i64> %rd661 to <8 x i32>
%rd663 = lshr <8 x i64> %rd639, %rd318
%r664 = trunc <8 x i64> %rd663 to <8 x i32>
%r665 = lshr <8 x i32> %r662, %r664
%r666 = bitcast <8 x i32> %r664 to <8 x i32>
%r667 = sub <8 x i32> zeroinitializer, %r666
%r668 = and <8 x i32> %r667, %r324
%r669 = bitcast <8 x i32> %r668 to <8 x i32>
%r670 = shl <8 x i32> %r662, %r669
%r671 = or <8 x i32> %r665, %r670
%r672 = lshr <8 x i32> %r671, %r329
%r673 = or <8 x i32> %r672, %r331
%f674 = bitcast <8 x i32> %r673 to <8 x float>
%f675 = fsub <8 x float> %f674, %f297
%p676 = and <8 x i1> %p449, %p499
%p677 = and <8 x i1> %p676, %p285
%rd678_p1 = getelementptr inbounds i8*, i8** %params, i32 51
%rd678 = load i8*, i8** %rd678_p1, align 8, !alias.scope !2
%rd679_p1 = getelementptr inbounds i8*, i8** %params, i32 52
%rd679 = load i8*, i8** %rd679_p1, align 8, !alias.scope !2
br label %l680_start
l680_start:
; VCall: mitsuba::BSDF::sample()
; - target 1 = @func_60a1a348aca7bdb47e851f6cedce38a2;
; - target 2 = @func_e6fff8d6b222436f71518443ce068358;
; - target 3 = @func_48d4a1fb2376a0011da8c81578897b84;
%u680_self_ptr_0 = bitcast i8* %rd678 to i64*
%u680_self_ptr = getelementptr i64, i64* %u680_self_ptr_0, <8 x i32> %r498
%u680_self_combined = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %u680_self_ptr, i32 8, <8 x i1> %p677, <8 x i64> zeroinitializer)
%u680_self_initial = trunc <8 x i64> %u680_self_combined to <8 x i32>
%u680_offset_1 = lshr <8 x i64> %u680_self_combined, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%u680_offset = trunc <8 x i64> %u680_offset_1 to <8 x i32>
%u680_in_0_0 = getelementptr inbounds i8, i8* %buffer, i32 0
%u680_in_0_1 = bitcast i8* %u680_in_0_0 to <8 x float> *
store <8 x float> %f370, <8 x float>* %u680_in_0_1, align 32
%u680_in_1_0 = getelementptr inbounds i8, i8* %buffer, i32 32
%u680_in_1_1 = bitcast i8* %u680_in_1_0 to <8 x float> *
store <8 x float> %f371, <8 x float>* %u680_in_1_1, align 32
%u680_in_2_0 = getelementptr inbounds i8, i8* %buffer, i32 64
%u680_in_2_1 = bitcast i8* %u680_in_2_0 to <8 x float> *
store <8 x float> %f372, <8 x float>* %u680_in_2_1, align 32
%u680_in_3_0 = getelementptr inbounds i8, i8* %buffer, i32 96
%u680_in_3_1 = bitcast i8* %u680_in_3_0 to <8 x float> *
store <8 x float> %f377, <8 x float>* %u680_in_3_1, align 32
%u680_in_4_0 = getelementptr inbounds i8, i8* %buffer, i32 128
%u680_in_4_1 = bitcast i8* %u680_in_4_0 to <8 x float> *
store <8 x float> %f378, <8 x float>* %u680_in_4_1, align 32
%u680_in_5_0 = getelementptr inbounds i8, i8* %buffer, i32 160
%u680_in_5_1 = bitcast i8* %u680_in_5_0 to <8 x float> *
store <8 x float> %f379, <8 x float>* %u680_in_5_1, align 32
%u680_in_6_0 = getelementptr inbounds i8, i8* %buffer, i32 192
%u680_in_6_1 = bitcast i8* %u680_in_6_0 to <8 x float> *
store <8 x float> %f446, <8 x float>* %u680_in_6_1, align 32
%u680_in_7_0 = getelementptr inbounds i8, i8* %buffer, i32 224
%u680_in_7_1 = bitcast i8* %u680_in_7_0 to <8 x float> *
store <8 x float> %f447, <8 x float>* %u680_in_7_1, align 32
%u680_in_8_0 = getelementptr inbounds i8, i8* %buffer, i32 256
%u680_in_8_1 = bitcast i8* %u680_in_8_0 to <8 x float> *
store <8 x float> %f448, <8 x float>* %u680_in_8_1, align 32
%u680_in_9_0 = getelementptr inbounds i8, i8* %buffer, i32 288
%u680_in_9_1 = bitcast i8* %u680_in_9_0 to <8 x i32> *
store <8 x i32> %r346, <8 x i32>* %u680_in_9_1, align 32
%u680_in_10_0 = getelementptr inbounds i8, i8* %buffer, i32 320
%u680_in_10_1 = bitcast i8* %u680_in_10_0 to <8 x float> *
store <8 x float> %f637, <8 x float>* %u680_in_10_1, align 32
%u680_in_11_0 = getelementptr inbounds i8, i8* %buffer, i32 352
%u680_in_11_1 = bitcast i8* %u680_in_11_0 to <8 x float> *
store <8 x float> %f656, <8 x float>* %u680_in_11_1, align 32
%u680_in_12_0 = getelementptr inbounds i8, i8* %buffer, i32 384
%u680_in_12_1 = bitcast i8* %u680_in_12_0 to <8 x float> *
store <8 x float> %f675, <8 x float>* %u680_in_12_1, align 32
%u680_out = getelementptr i8, i8* %buffer, i32 416
%u680_tmp_0_0 = getelementptr inbounds i8, i8* %u680_out, i64 0
%u680_tmp_0_1 = bitcast i8* %u680_tmp_0_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_0_1, align 32
%u680_tmp_1_0 = getelementptr inbounds i8, i8* %u680_out, i64 32
%u680_tmp_1_1 = bitcast i8* %u680_tmp_1_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_1_1, align 32
%u680_tmp_2_0 = getelementptr inbounds i8, i8* %u680_out, i64 64
%u680_tmp_2_1 = bitcast i8* %u680_tmp_2_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_2_1, align 32
%u680_tmp_3_0 = getelementptr inbounds i8, i8* %u680_out, i64 96
%u680_tmp_3_1 = bitcast i8* %u680_tmp_3_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_3_1, align 32
%u680_tmp_4_0 = getelementptr inbounds i8, i8* %u680_out, i64 128
%u680_tmp_4_1 = bitcast i8* %u680_tmp_4_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_4_1, align 32
%u680_tmp_5_0 = getelementptr inbounds i8, i8* %u680_out, i64 160
%u680_tmp_5_1 = bitcast i8* %u680_tmp_5_0 to <8 x i32> *
store <8 x i32> zeroinitializer, <8 x i32>* %u680_tmp_5_1, align 32
%u680_tmp_7_0 = getelementptr inbounds i8, i8* %u680_out, i64 192
%u680_tmp_7_1 = bitcast i8* %u680_tmp_7_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_7_1, align 32
%u680_tmp_8_0 = getelementptr inbounds i8, i8* %u680_out, i64 224
%u680_tmp_8_1 = bitcast i8* %u680_tmp_8_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_8_1, align 32
%u680_tmp_9_0 = getelementptr inbounds i8, i8* %u680_out, i64 256
%u680_tmp_9_1 = bitcast i8* %u680_tmp_9_0 to <8 x float> *
store <8 x float> zeroinitializer, <8 x float>* %u680_tmp_9_1, align 32
br label %l680_check
l680_check:
%u680_self = phi <8 x i32> [ %u680_self_initial, %l680_start ], [ %u680_self_next, %l680_call ]
%u680_next = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %u680_self)
%u680_valid = icmp ne i32 %u680_next, 0
br i1 %u680_valid, label %l680_call, label %l680_end
l680_call:
%u680_bcast_0 = insertelement <8 x i32> undef, i32 %u680_next, i32 0
%u680_bcast = shufflevector <8 x i32> %u680_bcast_0, <8 x i32> undef, <8 x i32> zeroinitializer
%u680_active = icmp eq <8 x i32> %u680_self, %u680_bcast
%u680_func_0 = getelementptr inbounds i8*, i8** %callables, i32 %u680_next
%u680_func_1 = load i8*, i8** %u680_func_0
%u680_func = bitcast i8* %u680_func_1 to void (<8 x i1>, i8*, i8*, <8 x i32>)*
call void %u680_func(<8 x i1> %u680_active, i8* %buffer, i8* %rd679, <8 x i32> %u680_offset)
%u680_self_next = select <8 x i1> %u680_active, <8 x i32> zeroinitializer, <8 x i32> %u680_self
br label %l680_check
l680_end:
%u680_out_0_0 = getelementptr inbounds i8, i8* %u680_out, i64 0
%u680_out_0_1 = bitcast i8* %u680_out_0_0 to <8 x float> *
%f681 = load <8 x float>, <8 x float>* %u680_out_0_1, align 32
%u680_out_1_0 = getelementptr inbounds i8, i8* %u680_out, i64 32
%u680_out_1_1 = bitcast i8* %u680_out_1_0 to <8 x float> *
%f682 = load <8 x float>, <8 x float>* %u680_out_1_1, align 32
%u680_out_2_0 = getelementptr inbounds i8, i8* %u680_out, i64 64
%u680_out_2_1 = bitcast i8* %u680_out_2_0 to <8 x float> *
%f683 = load <8 x float>, <8 x float>* %u680_out_2_1, align 32
%u680_out_3_0 = getelementptr inbounds i8, i8* %u680_out, i64 96
%u680_out_3_1 = bitcast i8* %u680_out_3_0 to <8 x float> *
%f684 = load <8 x float>, <8 x float>* %u680_out_3_1, align 32
%u680_out_4_0 = getelementptr inbounds i8, i8* %u680_out, i64 128
%u680_out_4_1 = bitcast i8* %u680_out_4_0 to <8 x float> *
%f685 = load <8 x float>, <8 x float>* %u680_out_4_1, align 32
%u680_out_5_0 = getelementptr inbounds i8, i8* %u680_out, i64 160
%u680_out_5_1 = bitcast i8* %u680_out_5_0 to <8 x i32> *
%r686 = load <8 x i32>, <8 x i32>* %u680_out_5_1, align 32
%u680_out_7_0 = getelementptr inbounds i8, i8* %u680_out, i64 192
%u680_out_7_1 = bitcast i8* %u680_out_7_0 to <8 x float> *
%f687 = load <8 x float>, <8 x float>* %u680_out_7_1, align 32
%u680_out_8_0 = getelementptr inbounds i8, i8* %u680_out, i64 224
%u680_out_8_1 = bitcast i8* %u680_out_8_0 to <8 x float> *
%f688 = load <8 x float>, <8 x float>* %u680_out_8_1, align 32
%u680_out_9_0 = getelementptr inbounds i8, i8* %u680_out, i64 256
%u680_out_9_1 = bitcast i8* %u680_out_9_0 to <8 x float> *
%f689 = load <8 x float>, <8 x float>* %u680_out_9_1, align 32
br label %l680_done
l680_done:
%f690 = fmul <8 x float> %f425, %f681
%f691 = fmul <8 x float> %f426, %f681
%f692 = fmul <8 x float> %f427, %f681
%f693 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f432, <8 x float> %f682, <8 x float> %f690)
%f694 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f434, <8 x float> %f682, <8 x float> %f691)
%f695 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f436, <8 x float> %f682, <8 x float> %f692)
%f696 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f377, <8 x float> %f683, <8 x float> %f693)
%f697 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f378, <8 x float> %f683, <8 x float> %f694)
%f698 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f379, <8 x float> %f683, <8 x float> %f695)
%f699 = fmul <8 x float> %f373, %f696
%f700 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f374, <8 x float> %f697, <8 x float> %f699)
%f701 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f375, <8 x float> %f698, <8 x float> %f700)
%p702 = fcmp oge <8 x float> %f701, %f290
%f703 = select <8 x i1> %p702, <8 x float> %f555, <8 x float> %f560
%f704 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f373, <8 x float> %f370)
%f705 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f374, <8 x float> %f371)
%f706 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f703, <8 x float> %f375, <8 x float> %f372)
%f707_1 = insertelement <8 x float> undef, float 0x47efffffe0000000, i32 0
%f707 = shufflevector <8 x float> %f707_1, <8 x float> undef, <8 x i32> zeroinitializer
%f708 = fmul <8 x float> %f269, %f687
%f709 = fmul <8 x float> %f270, %f688
%f710 = fmul <8 x float> %f271, %f689
%f711 = fmul <8 x float> %f275, %f685
%r712_1 = insertelement <8 x i32> undef, i32 97, i32 0
%r712 = shufflevector <8 x i32> %r712_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r713 = and <8 x i32> %r686, %r712
%p714 = icmp ne <8 x i32> %r713, %r289
%r715 = select <8 x i1> %p385, <8 x i32> %r306, <8 x i32> %r276
%f716 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f708, <8 x float> %f709)
%f717 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f716, <8 x float> %f710)
%f718 = fmul <8 x float> %f711, %f711
%f719 = fmul <8 x float> %f717, %f718
%f720 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %f719, <8 x float> %f340)
%p721 = icmp uge <8 x i32> %r715, %r341
%rd722_0 = mul <8 x i64> %rd658, %rd310
%rd722 = add <8 x i64> %rd722_0, %rd112
%rd723 = lshr <8 x i64> %rd658, %rd312
%rd724 = xor <8 x i64> %rd723, %rd658
%rd725 = lshr <8 x i64> %rd724, %rd315
%r726 = trunc <8 x i64> %rd725 to <8 x i32>
%rd727 = lshr <8 x i64> %rd658, %rd318
%r728 = trunc <8 x i64> %rd727 to <8 x i32>
%r729 = lshr <8 x i32> %r726, %r728
%r730 = bitcast <8 x i32> %r728 to <8 x i32>
%r731 = sub <8 x i32> zeroinitializer, %r730
%r732 = and <8 x i32> %r731, %r324
%r733 = bitcast <8 x i32> %r732 to <8 x i32>
%r734 = shl <8 x i32> %r726, %r733
%r735 = or <8 x i32> %r729, %r734
%r736 = lshr <8 x i32> %r735, %r329
%r737 = or <8 x i32> %r736, %r331
%f738 = bitcast <8 x i32> %r737 to <8 x float>
%f739 = fsub <8 x float> %f738, %f297
%p740 = fcmp olt <8 x float> %f739, %f720
%f741 = fdiv <8 x float> %f297, %f720
%f742 = fmul <8 x float> %f708, %f741
%f743 = fmul <8 x float> %f709, %f741
%f744 = fmul <8 x float> %f710, %f741
%f745 = select <8 x i1> %p721, <8 x float> %f742, <8 x float> %f708
%f746 = select <8 x i1> %p721, <8 x float> %f743, <8 x float> %f709
%f747 = select <8 x i1> %p721, <8 x float> %f744, <8 x float> %f710
%p748 = xor <8 x i1> %p721, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%p749 = or <8 x i1> %p748, %p740
%p750 = and <8 x i1> %p496, %p749
%p751 = fcmp one <8 x float> %f717, %f290
%p752 = and <8 x i1> %p750, %p751
br label %l_236_tail
l_236_tail:
%rd237_final = select <8 x i1> %p259, <8 x i64> %rd722, <8 x i64> %rd237
%f238_final = select <8 x i1> %p259, <8 x float> %f704, <8 x float> %f238
%f239_final = select <8 x i1> %p259, <8 x float> %f705, <8 x float> %f239
%f240_final = select <8 x i1> %p259, <8 x float> %f706, <8 x float> %f240
%f241_final = select <8 x i1> %p259, <8 x float> %f696, <8 x float> %f241
%f242_final = select <8 x i1> %p259, <8 x float> %f697, <8 x float> %f242
%f243_final = select <8 x i1> %p259, <8 x float> %f698, <8 x float> %f243
%f244_final = select <8 x i1> %p259, <8 x float> %f707, <8 x float> %f244
%f245_final = select <8 x i1> %p259, <8 x float> %f745, <8 x float> %f245
%f246_final = select <8 x i1> %p259, <8 x float> %f746, <8 x float> %f246
%f247_final = select <8 x i1> %p259, <8 x float> %f747, <8 x float> %f247
%f248_final = select <8 x i1> %p259, <8 x float> %f616, <8 x float> %f248
%f249_final = select <8 x i1> %p259, <8 x float> %f617, <8 x float> %f249
%f250_final = select <8 x i1> %p259, <8 x float> %f618, <8 x float> %f250
%f251_final = select <8 x i1> %p259, <8 x float> %f711, <8 x float> %f251
%r252_final = select <8 x i1> %p259, <8 x i32> %r715, <8 x i32> %r252
%f254_final = select <8 x i1> %p259, <8 x float> %f370, <8 x float> %f254
%f255_final = select <8 x i1> %p259, <8 x float> %f371, <8 x float> %f255
%f256_final = select <8 x i1> %p259, <8 x float> %f372, <8 x float> %f256
%f257_final = select <8 x i1> %p259, <8 x float> %f684, <8 x float> %f257
%p258_final = select <8 x i1> %p259, <8 x i1> %p714, <8 x i1> %p258
%p259_final = select <8 x i1> %p259, <8 x i1> %p752, <8 x i1> %p259
br label %l_236_cond;
l_236_done:
%f754 = bitcast <8 x float> %f248 to <8 x float>
%f755 = bitcast <8 x float> %f249 to <8 x float>
%f756 = bitcast <8 x float> %f250 to <8 x float>
%f757 = bitcast <8 x float> %f253 to <8 x float>
%p758 = fcmp one <8 x float> %f757, %f220
%f759 = select <8 x i1> %p758, <8 x float> %f757, <8 x float> zeroinitializer
%f760_1 = insertelement <8 x float> undef, float 0xbfe0000000000000, i32 0
%f760 = shufflevector <8 x float> %f760_1, <8 x float> undef, <8 x i32> zeroinitializer
%f761 = fadd <8 x float> %f168, %f760
%f762 = fadd <8 x float> %f169, %f760
%f763 = fsub <8 x float> %f761, %f30
%f764 = fsub <8 x float> %f762, %f30
%f765 = fadd <8 x float> %f761, %f30
%f766 = fadd <8 x float> %f762, %f30
%f767 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %f763)
%r768 = fptosi <8 x float> %f767 to <8 x i32>
%f769 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %f764)
%r770 = fptosi <8 x float> %f769 to <8 x i32>
%r771_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r771 = shufflevector <8 x i32> %r771_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r772_0 = icmp sgt <8 x i32> %r768, %r771
%r772 = select <8 x i1> %r772_0, <8 x i32> %r768, <8 x i32> %r771
%r773_0 = icmp sgt <8 x i32> %r770, %r771
%r773 = select <8 x i1> %r773_0, <8 x i32> %r770, <8 x i32> %r771
%r774 = bitcast <8 x i32> %r772 to <8 x i32>
%r775 = bitcast <8 x i32> %r773 to <8 x i32>
%f776 = call <8 x float> @llvm.floor.v8f32(<8 x float> %f765)
%r777 = fptosi <8 x float> %f776 to <8 x i32>
%f778 = call <8 x float> @llvm.floor.v8f32(<8 x float> %f766)
%r779 = fptosi <8 x float> %f778 to <8 x i32>
%r780_1 = insertelement <8 x i32> undef, i32 255, i32 0
%r780 = shufflevector <8 x i32> %r780_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r781_0 = icmp slt <8 x i32> %r777, %r780
%r781 = select <8 x i1> %r781_0, <8 x i32> %r777, <8 x i32> %r780
%r782_0 = icmp slt <8 x i32> %r779, %r780
%r782 = select <8 x i1> %r782_0, <8 x i32> %r779, <8 x i32> %r780
%r783 = bitcast <8 x i32> %r781 to <8 x i32>
%r784 = bitcast <8 x i32> %r782 to <8 x i32>
%r785_0 = mul <8 x i32> %r775, %r121
%r785 = add <8 x i32> %r785_0, %r774
%r786 = mul <8 x i32> %r785, %r40
%p787 = icmp ule <8 x i32> %r774, %r783
%p788 = icmp ule <8 x i32> %r775, %r784
%p789 = and <8 x i1> %p787, %p788
%f790 = uitofp <8 x i32> %r774 to <8 x float>
%f791 = uitofp <8 x i32> %r775 to <8 x float>
%f792 = fsub <8 x float> %f790, %f761
%f793 = fsub <8 x float> %f791, %f762
br label %l_794_start
l_794_start:
br label %l_794_cond
l_794_cond: ; Loop (ImageBlock::put() [1])
%r795 = phi <8 x i32> [ %r795_final, %l_794_tail ], [ %r219, %l_794_start ]
%r796 = phi <8 x i32> [ %r796_final, %l_794_tail ], [ %r786, %l_794_start ]
%r797_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r797 = shufflevector <8 x i32> %r797_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p798 = icmp ult <8 x i32> %r795, %r797
%p794 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p798)
br i1 %p794, label %l_794_body, label %l_794_done
l_794_body:
%r800 = phi <8 x i32> [ %r795, %l_794_cond ]
%r801 = phi <8 x i32> [ %r796, %l_794_cond ]
%r802_0 = trunc i64 %index to i32
%r802_1 = insertelement <8 x i32> undef, i32 %r802_0, i32 0
%r802_2 = shufflevector <8 x i32> %r802_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r802 = add <8 x i32> %r802_2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%p803_0 = trunc i64 %end to i32
%p803_1 = insertelement <8 x i32> undef, i32 %p803_0, i32 0
%p803_2 = shufflevector <8 x i32> %p803_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p803 = icmp ult <8 x i32> %r802, %p803_2
%p804 = and <8 x i1> %p803, %p798
%f805 = uitofp <8 x i32> %r800 to <8 x float>
%f806 = fadd <8 x float> %f793, %f805
%f807_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f807 = shufflevector <8 x float> %f807_1, <8 x float> undef, <8 x i32> zeroinitializer
%f808_0 = bitcast <8 x float> %f806 to <8 x i32>
%f808_1 = bitcast <8 x float> %f807 to <8 x i32>
%f808_2 = and <8 x i32> %f808_0, %f808_1
%f808 = bitcast <8 x i32> %f808_2 to <8 x float>
%f809_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f809 = shufflevector <8 x float> %f809_1, <8 x float> undef, <8 x i32> zeroinitializer
%f810 = fsub <8 x float> %f809, %f808
%f811_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f811 = shufflevector <8 x float> %f811_1, <8 x float> undef, <8 x i32> zeroinitializer
%f812 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f811, <8 x float> %f810)
%r813 = add <8 x i32> %r775, %r800
%p814 = icmp ule <8 x i32> %r813, %r784
%p815 = and <8 x i1> %p789, %p814
%r816_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r816 = shufflevector <8 x i32> %r816_1, <8 x i32> undef, <8 x i32> zeroinitializer
br label %l_817_start
l_817_start:
br label %l_817_cond
l_817_cond: ; Loop (ImageBlock::put() [2])
%r818 = phi <8 x i32> [ %r818_final, %l_817_tail ], [ %r816, %l_817_start ]
%r819 = phi <8 x i32> [ %r819_final, %l_817_tail ], [ %r801, %l_817_start ]
%r820_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r820 = shufflevector <8 x i32> %r820_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p821 = icmp ult <8 x i32> %r818, %r820
%p817 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %p821)
br i1 %p817, label %l_817_body, label %l_817_done
l_817_body:
%r823 = phi <8 x i32> [ %r818, %l_817_cond ]
%r824 = phi <8 x i32> [ %r819, %l_817_cond ]
%p825 = and <8 x i1> %p804, %p821
%f826 = uitofp <8 x i32> %r823 to <8 x float>
%f827 = fadd <8 x float> %f792, %f826
%f828_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f828 = shufflevector <8 x float> %f828_1, <8 x float> undef, <8 x i32> zeroinitializer
%f829_0 = bitcast <8 x float> %f827 to <8 x i32>
%f829_1 = bitcast <8 x float> %f828 to <8 x i32>
%f829_2 = and <8 x i32> %f829_0, %f829_1
%f829 = bitcast <8 x i32> %f829_2 to <8 x float>
%f830_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f830 = shufflevector <8 x float> %f830_1, <8 x float> undef, <8 x i32> zeroinitializer
%f831 = fsub <8 x float> %f830, %f829
%f832_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f832 = shufflevector <8 x float> %f832_1, <8 x float> undef, <8 x i32> zeroinitializer
%f833 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f832, <8 x float> %f831)
%f834 = fmul <8 x float> %f833, %f812
%r835 = add <8 x i32> %r774, %r823
%p836 = icmp ule <8 x i32> %r835, %r783
%p837 = and <8 x i1> %p815, %p836
%f838 = fmul <8 x float> %f754, %f834
%rd839_p1 = getelementptr inbounds i8*, i8** %params, i32 53
%rd839 = load i8*, i8** %rd839_p1, align 8, !alias.scope !2
%p840 = and <8 x i1> %p837, %p825
%u841_0 = bitcast i8* %rd839 to float*
%u841_1 = getelementptr float, float* %u841_0, <8 x i32> %r824
call void @ek.scatter_add.v8f32(<8 x float*> %u841_1, <8 x float> %f838, <8 x i1> %p840)
%r842_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r842 = shufflevector <8 x i32> %r842_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r843 = add <8 x i32> %r824, %r842
%f844 = fmul <8 x float> %f755, %f834
%u845_0 = bitcast i8* %rd839 to float*
%u845_1 = getelementptr float, float* %u845_0, <8 x i32> %r843
call void @ek.scatter_add.v8f32(<8 x float*> %u845_1, <8 x float> %f844, <8 x i1> %p840)
%r846 = add <8 x i32> %r843, %r842
%f847 = fmul <8 x float> %f756, %f834
%u848_0 = bitcast i8* %rd839 to float*
%u848_1 = getelementptr float, float* %u848_0, <8 x i32> %r846
call void @ek.scatter_add.v8f32(<8 x float*> %u848_1, <8 x float> %f847, <8 x i1> %p840)
%r849 = add <8 x i32> %r846, %r842
%u850_0 = bitcast i8* %rd839 to float*
%u850_1 = getelementptr float, float* %u850_0, <8 x i32> %r849
call void @ek.scatter_add.v8f32(<8 x float*> %u850_1, <8 x float> %f834, <8 x i1> %p840)
%r851 = add <8 x i32> %r849, %r842
%f852 = fmul <8 x float> %f759, %f834
%u853_0 = bitcast i8* %rd839 to float*
%u853_1 = getelementptr float, float* %u853_0, <8 x i32> %r851
call void @ek.scatter_add.v8f32(<8 x float*> %u853_1, <8 x float> %f852, <8 x i1> %p840)
%r854 = add <8 x i32> %r851, %r842
%r855 = add <8 x i32> %r823, %r842
br label %l_817_tail
l_817_tail:
%r818_final = select <8 x i1> %p821, <8 x i32> %r855, <8 x i32> %r818
%r819_final = select <8 x i1> %p821, <8 x i32> %r854, <8 x i32> %r819
br label %l_817_cond;
l_817_done:
%r858 = bitcast <8 x i32> %r819 to <8 x i32>
%r859_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r859 = shufflevector <8 x i32> %r859_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r860 = add <8 x i32> %r800, %r859
%r861_1 = insertelement <8 x i32> undef, i32 1270, i32 0
%r861 = shufflevector <8 x i32> %r861_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r862 = add <8 x i32> %r858, %r861
br label %l_794_tail
l_794_tail:
%r795_final = select <8 x i1> %p798, <8 x i32> %r860, <8 x i32> %r795
%r796_final = select <8 x i1> %p798, <8 x i32> %r862, <8 x i32> %r796
br label %l_794_cond;
l_794_done:
br label %suffix
suffix:
%index_next = add i64 %index, 8
%cond = icmp uge i64 %index_next, %end
br i1 %cond, label %done, label %body, !llvm.loop !2
done:
ret void
}
define void @func_bac3cacf8aebcc87b13f4c43e6cdd822(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Shape::compute_surface_interaction()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 12
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*>
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 72
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*>
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f6_p1 = getelementptr inbounds i8, i8* %data, i32 84
%f6_p2 = getelementptr inbounds i8, i8* %f6_p1, <8 x i32> %offsets
%f6_p3 = bitcast <8 x i8*> %f6_p2 to <8 x float*>
%f6 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f6_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f7_p1 = getelementptr inbounds i8, i8* %data, i32 96
%f7_p2 = getelementptr inbounds i8, i8* %f7_p1, <8 x i32> %offsets
%f7_p3 = bitcast <8 x i8*> %f7_p2 to <8 x float*>
%f7 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f7_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f8_p1 = getelementptr inbounds i8, i8* %data, i32 68
%f8_p2 = getelementptr inbounds i8, i8* %f8_p1, <8 x i32> %offsets
%f8_p3 = bitcast <8 x i8*> %f8_p2 to <8 x float*>
%f8 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f8_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f9_p1 = getelementptr inbounds i8, i8* %data, i32 80
%f9_p2 = getelementptr inbounds i8, i8* %f9_p1, <8 x i32> %offsets
%f9_p3 = bitcast <8 x i8*> %f9_p2 to <8 x float*>
%f9 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f9_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f10_p1 = getelementptr inbounds i8, i8* %data, i32 92
%f10_p2 = getelementptr inbounds i8, i8* %f10_p1, <8 x i32> %offsets
%f10_p3 = bitcast <8 x i8*> %f10_p2 to <8 x float*>
%f10 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f10_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f11_p1 = getelementptr inbounds i8, i8* %data, i32 64
%f11_p2 = getelementptr inbounds i8, i8* %f11_p1, <8 x i32> %offsets
%f11_p3 = bitcast <8 x i8*> %f11_p2 to <8 x float*>
%f11 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f11_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f12_p1 = getelementptr inbounds i8, i8* %data, i32 76
%f12_p2 = getelementptr inbounds i8, i8* %f12_p1, <8 x i32> %offsets
%f12_p3 = bitcast <8 x i8*> %f12_p2 to <8 x float*>
%f12 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f12_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f13_p1 = getelementptr inbounds i8, i8* %data, i32 88
%f13_p2 = getelementptr inbounds i8, i8* %f13_p1, <8 x i32> %offsets
%f13_p3 = bitcast <8 x i8*> %f13_p2 to <8 x float*>
%f13 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f13_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f14_p1 = getelementptr inbounds i8, i8* %data, i32 40
%f14_p2 = getelementptr inbounds i8, i8* %f14_p1, <8 x i32> %offsets
%f14_p3 = bitcast <8 x i8*> %f14_p2 to <8 x float*>
%f14 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f14_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f15_p1 = getelementptr inbounds i8, i8* %data, i32 24
%f15_p2 = getelementptr inbounds i8, i8* %f15_p1, <8 x i32> %offsets
%f15_p3 = bitcast <8 x i8*> %f15_p2 to <8 x float*>
%f15 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f15_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f16_p1 = getelementptr inbounds i8, i8* %data, i32 36
%f16_p2 = getelementptr inbounds i8, i8* %f16_p1, <8 x i32> %offsets
%f16_p3 = bitcast <8 x i8*> %f16_p2 to <8 x float*>
%f16 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f16_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f17_p1 = getelementptr inbounds i8, i8* %data, i32 20
%f17_p2 = getelementptr inbounds i8, i8* %f17_p1, <8 x i32> %offsets
%f17_p3 = bitcast <8 x i8*> %f17_p2 to <8 x float*>
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f18_p1 = getelementptr inbounds i8, i8* %data, i32 32
%f18_p2 = getelementptr inbounds i8, i8* %f18_p1, <8 x i32> %offsets
%f18_p3 = bitcast <8 x i8*> %f18_p2 to <8 x float*>
%f18 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f18_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f19_p1 = getelementptr inbounds i8, i8* %data, i32 16
%f19_p2 = getelementptr inbounds i8, i8* %f19_p1, <8 x i32> %offsets
%f19_p3 = bitcast <8 x i8*> %f19_p2 to <8 x float*>
%f19 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f19_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f20_p1 = getelementptr inbounds i8, i8* %data, i32 44
%f20_p2 = getelementptr inbounds i8, i8* %f20_p1, <8 x i32> %offsets
%f20_p3 = bitcast <8 x i8*> %f20_p2 to <8 x float*>
%f20 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f20_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f21_p1 = getelementptr inbounds i8, i8* %data, i32 28
%f21_p2 = getelementptr inbounds i8, i8* %f21_p1, <8 x i32> %offsets
%f21_p3 = bitcast <8 x i8*> %f21_p2 to <8 x float*>
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f22_i0 = getelementptr inbounds i8, i8* %params, i64 0
%f22_i1 = bitcast i8* %f22_i0 to <8 x float>*
%f22 = load <8 x float>, <8 x float>* %f22_i1, align 32
%f23_i0 = getelementptr inbounds i8, i8* %params, i64 32
%f23_i1 = bitcast i8* %f23_i0 to <8 x float>*
%f23 = load <8 x float>, <8 x float>* %f23_i1, align 32
%f24_i0 = getelementptr inbounds i8, i8* %params, i64 64
%f24_i1 = bitcast i8* %f24_i0 to <8 x float>*
%f24 = load <8 x float>, <8 x float>* %f24_i1, align 32
%f25_i0 = getelementptr inbounds i8, i8* %params, i64 96
%f25_i1 = bitcast i8* %f25_i0 to <8 x float>*
%f25 = load <8 x float>, <8 x float>* %f25_i1, align 32
%f26_i0 = getelementptr inbounds i8, i8* %params, i64 128
%f26_i1 = bitcast i8* %f26_i0 to <8 x float>*
%f26 = load <8 x float>, <8 x float>* %f26_i1, align 32
%f27_i0 = getelementptr inbounds i8, i8* %params, i64 160
%f27_i1 = bitcast i8* %f27_i0 to <8 x float>*
%f27 = load <8 x float>, <8 x float>* %f27_i1, align 32
%f28_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f28_i1 = bitcast i8* %f28_i0 to <8 x float>*
%f28 = load <8 x float>, <8 x float>* %f28_i1, align 32
%r29 = bitcast <8 x i32> %self to <8 x i32>
%f30_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer
%f31 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f28, <8 x float> %f22)
%f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f28, <8 x float> %f23)
%f33 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f28, <8 x float> %f24)
%f34 = fsub <8 x float> %f31, %f2
%f35 = fsub <8 x float> %f32, %f3
%f36 = fsub <8 x float> %f33, %f4
%f37 = fmul <8 x float> %f34, %f34
%f38 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f35, <8 x float> %f35, <8 x float> %f37)
%f39 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f36, <8 x float> %f36, <8 x float> %f38)
%f40_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f40 = shufflevector <8 x float> %f40_1, <8 x float> undef, <8 x i32> zeroinitializer
%f41 = fdiv <8 x float> %f40, %f39
%f42 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f41)
%f43 = fmul <8 x float> %f34, %f42
%f44 = fmul <8 x float> %f35, %f42
%f45 = fmul <8 x float> %f36, %f42
%f46 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f43, <8 x float> %f1, <8 x float> %f2)
%f47 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f44, <8 x float> %f1, <8 x float> %f3)
%f48 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f45, <8 x float> %f1, <8 x float> %f4)
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f46, <8 x float> %f20)
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f46, <8 x float> %f21)
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f47, <8 x float> %f49)
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f47, <8 x float> %f50)
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f48, <8 x float> %f51)
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f48, <8 x float> %f52)
%f55_1 = insertelement <8 x float> undef, float 0x401921fb60000000, i32 0
%f55 = shufflevector <8 x float> %f55_1, <8 x float> undef, <8 x i32> zeroinitializer
%f56 = fneg <8 x float> %f54
%f57 = fmul <8 x float> %f5, %f56
%f58 = fmul <8 x float> %f6, %f56
%f59 = fmul <8 x float> %f7, %f56
%f60 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f53, <8 x float> %f57)
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f53, <8 x float> %f58)
%f62 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f10, <8 x float> %f53, <8 x float> %f59)
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f30, <8 x float> %f60)
%f64 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f12, <8 x float> %f30, <8 x float> %f61)
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f30, <8 x float> %f62)
%f66 = fmul <8 x float> %f63, %f55
%f67 = fmul <8 x float> %f64, %f55
%f68 = fmul <8 x float> %f65, %f55
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f67, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f47, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f48, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>*
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x float> %f43, <8 x float> %out_5_2
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>*
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32
%out_6_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_6_2
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>*
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r29, <8 x i32> %out_8_2
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>*
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32
%out_13_3 = select <8 x i1> %mask, <8 x float> %f68, <8 x float> %out_13_2
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>*
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32
%out_14_3 = select <8 x i1> %mask, <8 x float> %f66, <8 x float> %out_14_2
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>*
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32
%out_15_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_15_2
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>*
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32
%out_16_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_16_2
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>*
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32
%out_17_3 = select <8 x i1> %mask, <8 x float> %f43, <8 x float> %out_17_2
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32
ret void;
}
define void @func_570d1d9103e40ae9598d8f351c2e5c2a(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Shape::compute_surface_interaction()
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 224
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>*
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>*
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32
%r3_i0 = getelementptr inbounds i8, i8* %params, i64 288
%r3_i1 = bitcast i8* %r3_i0 to <8 x i32>*
%r3 = load <8 x i32>, <8 x i32>* %r3_i1, align 32
%f4_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer
%f5_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer
%f6_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%r7 = bitcast <8 x i32> %self to <8 x i32>
%p8 = bitcast <8 x i1> %mask to <8 x i1>
%r9_1 = insertelement <8 x i32> undef, i32 3, i32 0
%r9 = shufflevector <8 x i32> %r9_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r10 = mul <8 x i32> %r3, %r9
%r11_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r11 = shufflevector <8 x i32> %r11_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r12 = add <8 x i32> %r10, %r11
%r13_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r13 = shufflevector <8 x i32> %r13_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r14 = add <8 x i32> %r10, %r13
%rd15_p1 = getelementptr inbounds i8, i8* %data, i32 8
%rd15_p2 = getelementptr inbounds i8, i8* %rd15_p1, <8 x i32> %offsets
%rd15_p3 = bitcast <8 x i8*> %rd15_p2 to <8 x i64*>
%rd15_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd15_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd15 = inttoptr <8 x i64> %rd15_p4 to <8 x i8*>
%r16_0 = bitcast <8 x i8*> %rd15 to <8 x i32*>
%r16_1 = getelementptr i32, <8 x i32*> %r16_0, <8 x i32> %r10
%r16 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r16_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer)
%r17_0 = bitcast <8 x i8*> %rd15 to <8 x i32*>
%r17_1 = getelementptr i32, <8 x i32*> %r17_0, <8 x i32> %r12
%r17 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r17_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer)
%r18_0 = bitcast <8 x i8*> %rd15 to <8 x i32*>
%r18_1 = getelementptr i32, <8 x i32*> %r18_0, <8 x i32> %r14
%r18 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r18_1, i32 4, <8 x i1> %p8, <8 x i32> zeroinitializer)
%r19 = mul <8 x i32> %r16, %r9
%r20 = add <8 x i32> %r19, %r11
%r21 = add <8 x i32> %r19, %r13
%rd22_p1 = getelementptr inbounds i8, i8* %data, i32 0
%rd22_p2 = getelementptr inbounds i8, i8* %rd22_p1, <8 x i32> %offsets
%rd22_p3 = bitcast <8 x i8*> %rd22_p2 to <8 x i64*>
%rd22_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd22_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd22 = inttoptr <8 x i64> %rd22_p4 to <8 x i8*>
%f23_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f23_1 = getelementptr float, <8 x float*> %f23_0, <8 x i32> %r19
%f23 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f23_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f24_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f24_1 = getelementptr float, <8 x float*> %f24_0, <8 x i32> %r20
%f24 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f24_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f25_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f25_1 = getelementptr float, <8 x float*> %f25_0, <8 x i32> %r21
%f25 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f25_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%r26 = mul <8 x i32> %r17, %r9
%r27 = add <8 x i32> %r26, %r11
%r28 = add <8 x i32> %r26, %r13
%f29_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f29_1 = getelementptr float, <8 x float*> %f29_0, <8 x i32> %r26
%f29 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f29_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f30_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f30_1 = getelementptr float, <8 x float*> %f30_0, <8 x i32> %r27
%f30 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f30_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f31_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f31_1 = getelementptr float, <8 x float*> %f31_0, <8 x i32> %r28
%f31 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f31_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%r32 = mul <8 x i32> %r18, %r9
%r33 = add <8 x i32> %r32, %r11
%r34 = add <8 x i32> %r32, %r13
%f35_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f35_1 = getelementptr float, <8 x float*> %f35_0, <8 x i32> %r32
%f35 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f35_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f36_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f36_1 = getelementptr float, <8 x float*> %f36_0, <8 x i32> %r33
%f36 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f36_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f37_0 = bitcast <8 x i8*> %rd22 to <8 x float*>
%f37_1 = getelementptr float, <8 x float*> %f37_0, <8 x i32> %r34
%f37 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f37_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f38 = fsub <8 x float> %f5, %f1
%f39 = fsub <8 x float> %f38, %f2
%f40 = fsub <8 x float> %f29, %f23
%f41 = fsub <8 x float> %f30, %f24
%f42 = fsub <8 x float> %f31, %f25
%f43 = fsub <8 x float> %f35, %f23
%f44 = fsub <8 x float> %f36, %f24
%f45 = fsub <8 x float> %f37, %f25
%f46 = fmul <8 x float> %f35, %f2
%f47 = fmul <8 x float> %f36, %f2
%f48 = fmul <8 x float> %f37, %f2
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f29, <8 x float> %f1, <8 x float> %f46)
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f30, <8 x float> %f1, <8 x float> %f47)
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f31, <8 x float> %f1, <8 x float> %f48)
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f23, <8 x float> %f39, <8 x float> %f49)
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f24, <8 x float> %f39, <8 x float> %f50)
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f39, <8 x float> %f51)
%f55 = fmul <8 x float> %f42, %f44
%f56 = fmul <8 x float> %f40, %f45
%f57 = fmul <8 x float> %f41, %f43
%f58 = fneg <8 x float> %f55
%f59 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f41, <8 x float> %f45, <8 x float> %f58)
%f60 = fneg <8 x float> %f56
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f42, <8 x float> %f43, <8 x float> %f60)
%f62 = fneg <8 x float> %f57
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f40, <8 x float> %f44, <8 x float> %f62)
%f64 = fmul <8 x float> %f59, %f59
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f61, <8 x float> %f64)
%f66 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f63, <8 x float> %f65)
%f67 = fdiv <8 x float> %f5, %f66
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67)
%f69 = fmul <8 x float> %f59, %f68
%f70 = fmul <8 x float> %f61, %f68
%f71 = fmul <8 x float> %f63, %f68
%p72 = fcmp oge <8 x float> %f71, %f4
%f73 = select <8 x i1> %p72, <8 x float> %f5, <8 x float> %f6
%f74 = fadd <8 x float> %f73, %f71
%f75 = fdiv <8 x float> %f5, %f74
%f76 = fneg <8 x float> %f75
%f77 = fmul <8 x float> %f69, %f70
%f78 = fmul <8 x float> %f77, %f76
%f79 = fmul <8 x float> %f69, %f69
%f80 = fmul <8 x float> %f79, %f76
%f81 = fneg <8 x float> %f80
%f82 = select <8 x i1> %p72, <8 x float> %f80, <8 x float> %f81
%f83 = fadd <8 x float> %f82, %f5
%f84 = fneg <8 x float> %f78
%f85 = select <8 x i1> %p72, <8 x float> %f78, <8 x float> %f84
%f86 = fneg <8 x float> %f69
%f87 = select <8 x i1> %p72, <8 x float> %f86, <8 x float> %f69
%rd88_p1 = getelementptr inbounds i8, i8* %data, i32 16
%rd88_p2 = getelementptr inbounds i8, i8* %rd88_p1, <8 x i32> %offsets
%rd88_p3 = bitcast <8 x i8*> %rd88_p2 to <8 x i64*>
%rd88_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd88_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd88 = inttoptr <8 x i64> %rd88_p4 to <8 x i8*>
%f89_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f89_1 = getelementptr float, <8 x float*> %f89_0, <8 x i32> %r19
%f89 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f89_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f90_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f90_1 = getelementptr float, <8 x float*> %f90_0, <8 x i32> %r20
%f90 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f90_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f91_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f91_1 = getelementptr float, <8 x float*> %f91_0, <8 x i32> %r21
%f91 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f91_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f92_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f92_1 = getelementptr float, <8 x float*> %f92_0, <8 x i32> %r26
%f92 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f92_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f93_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f93_1 = getelementptr float, <8 x float*> %f93_0, <8 x i32> %r27
%f93 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f93_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f94_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f94_1 = getelementptr float, <8 x float*> %f94_0, <8 x i32> %r28
%f94 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f94_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f95_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f95_1 = getelementptr float, <8 x float*> %f95_0, <8 x i32> %r32
%f95 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f95_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f96_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f96_1 = getelementptr float, <8 x float*> %f96_0, <8 x i32> %r33
%f96 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f96_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f97_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f97_1 = getelementptr float, <8 x float*> %f97_0, <8 x i32> %r34
%f97 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f97_1, i32 4, <8 x i1> %p8, <8 x float> zeroinitializer)
%f98 = fmul <8 x float> %f89, %f39
%f99 = fmul <8 x float> %f90, %f39
%f100 = fmul <8 x float> %f91, %f39
%f101 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f92, <8 x float> %f1, <8 x float> %f98)
%f102 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f93, <8 x float> %f1, <8 x float> %f99)
%f103 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f94, <8 x float> %f1, <8 x float> %f100)
%f104 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f95, <8 x float> %f2, <8 x float> %f101)
%f105 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f2, <8 x float> %f102)
%f106 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f2, <8 x float> %f103)
%f107 = fmul <8 x float> %f104, %f104
%f108 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f105, <8 x float> %f105, <8 x float> %f107)
%f109 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f106, <8 x float> %f106, <8 x float> %f108)
%f110 = fdiv <8 x float> %f5, %f109
%f111 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f110)
%f112 = fmul <8 x float> %f104, %f111
%f113 = fmul <8 x float> %f105, %f111
%f114 = fmul <8 x float> %f106, %f111
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f85, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f54, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>*
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x float> %f69, <8 x float> %out_5_2
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>*
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32
%out_6_3 = select <8 x i1> %mask, <8 x float> %f70, <8 x float> %out_6_2
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f71, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>*
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r7, <8 x i32> %out_8_2
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>*
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32
%out_13_3 = select <8 x i1> %mask, <8 x float> %f87, <8 x float> %out_13_2
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>*
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32
%out_14_3 = select <8 x i1> %mask, <8 x float> %f83, <8 x float> %out_14_2
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>*
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32
%out_15_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_15_2
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>*
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32
%out_16_3 = select <8 x i1> %mask, <8 x float> %f113, <8 x float> %out_16_2
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>*
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32
%out_17_3 = select <8 x i1> %mask, <8 x float> %f112, <8 x float> %out_17_2
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32
ret void;
}
define void @func_e17fd895e12cc8bec8c4e55075a069b3(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Shape::compute_surface_interaction()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 12
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*>
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 72
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*>
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f6_p1 = getelementptr inbounds i8, i8* %data, i32 84
%f6_p2 = getelementptr inbounds i8, i8* %f6_p1, <8 x i32> %offsets
%f6_p3 = bitcast <8 x i8*> %f6_p2 to <8 x float*>
%f6 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f6_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f7_p1 = getelementptr inbounds i8, i8* %data, i32 96
%f7_p2 = getelementptr inbounds i8, i8* %f7_p1, <8 x i32> %offsets
%f7_p3 = bitcast <8 x i8*> %f7_p2 to <8 x float*>
%f7 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f7_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f8_p1 = getelementptr inbounds i8, i8* %data, i32 68
%f8_p2 = getelementptr inbounds i8, i8* %f8_p1, <8 x i32> %offsets
%f8_p3 = bitcast <8 x i8*> %f8_p2 to <8 x float*>
%f8 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f8_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f9_p1 = getelementptr inbounds i8, i8* %data, i32 80
%f9_p2 = getelementptr inbounds i8, i8* %f9_p1, <8 x i32> %offsets
%f9_p3 = bitcast <8 x i8*> %f9_p2 to <8 x float*>
%f9 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f9_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f10_p1 = getelementptr inbounds i8, i8* %data, i32 92
%f10_p2 = getelementptr inbounds i8, i8* %f10_p1, <8 x i32> %offsets
%f10_p3 = bitcast <8 x i8*> %f10_p2 to <8 x float*>
%f10 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f10_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f11_p1 = getelementptr inbounds i8, i8* %data, i32 64
%f11_p2 = getelementptr inbounds i8, i8* %f11_p1, <8 x i32> %offsets
%f11_p3 = bitcast <8 x i8*> %f11_p2 to <8 x float*>
%f11 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f11_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f12_p1 = getelementptr inbounds i8, i8* %data, i32 76
%f12_p2 = getelementptr inbounds i8, i8* %f12_p1, <8 x i32> %offsets
%f12_p3 = bitcast <8 x i8*> %f12_p2 to <8 x float*>
%f12 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f12_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f13_p1 = getelementptr inbounds i8, i8* %data, i32 88
%f13_p2 = getelementptr inbounds i8, i8* %f13_p1, <8 x i32> %offsets
%f13_p3 = bitcast <8 x i8*> %f13_p2 to <8 x float*>
%f13 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f13_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f14_p1 = getelementptr inbounds i8, i8* %data, i32 40
%f14_p2 = getelementptr inbounds i8, i8* %f14_p1, <8 x i32> %offsets
%f14_p3 = bitcast <8 x i8*> %f14_p2 to <8 x float*>
%f14 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f14_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f15_p1 = getelementptr inbounds i8, i8* %data, i32 24
%f15_p2 = getelementptr inbounds i8, i8* %f15_p1, <8 x i32> %offsets
%f15_p3 = bitcast <8 x i8*> %f15_p2 to <8 x float*>
%f15 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f15_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f16_p1 = getelementptr inbounds i8, i8* %data, i32 36
%f16_p2 = getelementptr inbounds i8, i8* %f16_p1, <8 x i32> %offsets
%f16_p3 = bitcast <8 x i8*> %f16_p2 to <8 x float*>
%f16 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f16_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f17_p1 = getelementptr inbounds i8, i8* %data, i32 20
%f17_p2 = getelementptr inbounds i8, i8* %f17_p1, <8 x i32> %offsets
%f17_p3 = bitcast <8 x i8*> %f17_p2 to <8 x float*>
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f18_p1 = getelementptr inbounds i8, i8* %data, i32 32
%f18_p2 = getelementptr inbounds i8, i8* %f18_p1, <8 x i32> %offsets
%f18_p3 = bitcast <8 x i8*> %f18_p2 to <8 x float*>
%f18 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f18_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f19_p1 = getelementptr inbounds i8, i8* %data, i32 16
%f19_p2 = getelementptr inbounds i8, i8* %f19_p1, <8 x i32> %offsets
%f19_p3 = bitcast <8 x i8*> %f19_p2 to <8 x float*>
%f19 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f19_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f20_p1 = getelementptr inbounds i8, i8* %data, i32 44
%f20_p2 = getelementptr inbounds i8, i8* %f20_p1, <8 x i32> %offsets
%f20_p3 = bitcast <8 x i8*> %f20_p2 to <8 x float*>
%f20 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f20_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f21_p1 = getelementptr inbounds i8, i8* %data, i32 28
%f21_p2 = getelementptr inbounds i8, i8* %f21_p1, <8 x i32> %offsets
%f21_p3 = bitcast <8 x i8*> %f21_p2 to <8 x float*>
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f22_i0 = getelementptr inbounds i8, i8* %params, i64 0
%f22_i1 = bitcast i8* %f22_i0 to <8 x float>*
%f22 = load <8 x float>, <8 x float>* %f22_i1, align 32
%f23_i0 = getelementptr inbounds i8, i8* %params, i64 32
%f23_i1 = bitcast i8* %f23_i0 to <8 x float>*
%f23 = load <8 x float>, <8 x float>* %f23_i1, align 32
%f24_i0 = getelementptr inbounds i8, i8* %params, i64 64
%f24_i1 = bitcast i8* %f24_i0 to <8 x float>*
%f24 = load <8 x float>, <8 x float>* %f24_i1, align 32
%f25_i0 = getelementptr inbounds i8, i8* %params, i64 96
%f25_i1 = bitcast i8* %f25_i0 to <8 x float>*
%f25 = load <8 x float>, <8 x float>* %f25_i1, align 32
%f26_i0 = getelementptr inbounds i8, i8* %params, i64 128
%f26_i1 = bitcast i8* %f26_i0 to <8 x float>*
%f26 = load <8 x float>, <8 x float>* %f26_i1, align 32
%f27_i0 = getelementptr inbounds i8, i8* %params, i64 160
%f27_i1 = bitcast i8* %f27_i0 to <8 x float>*
%f27 = load <8 x float>, <8 x float>* %f27_i1, align 32
%f28_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f28_i1 = bitcast i8* %f28_i0 to <8 x float>*
%f28 = load <8 x float>, <8 x float>* %f28_i1, align 32
%f29_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f29 = shufflevector <8 x float> %f29_1, <8 x float> undef, <8 x i32> zeroinitializer
%f30 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f28, <8 x float> %f22)
%f31 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f28, <8 x float> %f23)
%f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f28, <8 x float> %f24)
%f33_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f33 = shufflevector <8 x float> %f33_1, <8 x float> undef, <8 x i32> zeroinitializer
%f34_1 = insertelement <8 x float> undef, float 0x401921fb60000000, i32 0
%f34 = shufflevector <8 x float> %f34_1, <8 x float> undef, <8 x i32> zeroinitializer
%r35 = bitcast <8 x i32> %self to <8 x i32>
%f36 = fsub <8 x float> %f30, %f2
%f37 = fsub <8 x float> %f31, %f3
%f38 = fsub <8 x float> %f32, %f4
%f39 = fmul <8 x float> %f36, %f36
%f40 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f37, <8 x float> %f37, <8 x float> %f39)
%f41 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f38, <8 x float> %f38, <8 x float> %f40)
%f42 = fdiv <8 x float> %f33, %f41
%f43 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f42)
%f44 = fmul <8 x float> %f36, %f43
%f45 = fmul <8 x float> %f37, %f43
%f46 = fmul <8 x float> %f38, %f43
%f47 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f44, <8 x float> %f1, <8 x float> %f2)
%f48 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f45, <8 x float> %f1, <8 x float> %f3)
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f46, <8 x float> %f1, <8 x float> %f4)
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f47, <8 x float> %f20)
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f15, <8 x float> %f47, <8 x float> %f21)
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f16, <8 x float> %f48, <8 x float> %f50)
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f17, <8 x float> %f48, <8 x float> %f51)
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f18, <8 x float> %f49, <8 x float> %f52)
%f55 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f19, <8 x float> %f49, <8 x float> %f53)
%f56 = fneg <8 x float> %f55
%f57 = fmul <8 x float> %f5, %f56
%f58 = fmul <8 x float> %f6, %f56
%f59 = fmul <8 x float> %f7, %f56
%f60 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f8, <8 x float> %f54, <8 x float> %f57)
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f54, <8 x float> %f58)
%f62 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f10, <8 x float> %f54, <8 x float> %f59)
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f29, <8 x float> %f60)
%f64 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f12, <8 x float> %f29, <8 x float> %f61)
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f13, <8 x float> %f29, <8 x float> %f62)
%f66 = fmul <8 x float> %f63, %f34
%f67 = fmul <8 x float> %f64, %f34
%f68 = fmul <8 x float> %f65, %f34
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f67, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f47, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f48, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f49, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>*
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_5_2
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>*
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32
%out_6_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_6_2
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>*
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r35, <8 x i32> %out_8_2
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>*
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32
%out_13_3 = select <8 x i1> %mask, <8 x float> %f68, <8 x float> %out_13_2
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>*
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32
%out_14_3 = select <8 x i1> %mask, <8 x float> %f66, <8 x float> %out_14_2
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>*
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32
%out_15_3 = select <8 x i1> %mask, <8 x float> %f46, <8 x float> %out_15_2
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>*
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32
%out_16_3 = select <8 x i1> %mask, <8 x float> %f45, <8 x float> %out_16_2
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>*
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32
%out_17_3 = select <8 x i1> %mask, <8 x float> %f44, <8 x float> %out_17_2
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32
ret void;
}
define void @func_dcfca7b5b523c613a93412a4c52b1f7a(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Shape::compute_surface_interaction()
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 224
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>*
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>*
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32
%r3_i0 = getelementptr inbounds i8, i8* %params, i64 288
%r3_i1 = bitcast i8* %r3_i0 to <8 x i32>*
%r3 = load <8 x i32>, <8 x i32>* %r3_i1, align 32
%f4_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f4 = shufflevector <8 x float> %f4_1, <8 x float> undef, <8 x i32> zeroinitializer
%f5_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer
%f6_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%p7 = bitcast <8 x i1> %mask to <8 x i1>
%r8_1 = insertelement <8 x i32> undef, i32 3, i32 0
%r8 = shufflevector <8 x i32> %r8_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r9 = mul <8 x i32> %r3, %r8
%r10_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r10 = shufflevector <8 x i32> %r10_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r11 = add <8 x i32> %r9, %r10
%r12_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r13 = add <8 x i32> %r9, %r12
%f14 = fsub <8 x float> %f5, %f1
%f15 = fsub <8 x float> %f14, %f2
%r16 = bitcast <8 x i32> %self to <8 x i32>
%rd17_p1 = getelementptr inbounds i8, i8* %data, i32 8
%rd17_p2 = getelementptr inbounds i8, i8* %rd17_p1, <8 x i32> %offsets
%rd17_p3 = bitcast <8 x i8*> %rd17_p2 to <8 x i64*>
%rd17_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd17_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd17 = inttoptr <8 x i64> %rd17_p4 to <8 x i8*>
%r18_0 = bitcast <8 x i8*> %rd17 to <8 x i32*>
%r18_1 = getelementptr i32, <8 x i32*> %r18_0, <8 x i32> %r9
%r18 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r18_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer)
%r19_0 = bitcast <8 x i8*> %rd17 to <8 x i32*>
%r19_1 = getelementptr i32, <8 x i32*> %r19_0, <8 x i32> %r11
%r19 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r19_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer)
%r20_0 = bitcast <8 x i8*> %rd17 to <8 x i32*>
%r20_1 = getelementptr i32, <8 x i32*> %r20_0, <8 x i32> %r13
%r20 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r20_1, i32 4, <8 x i1> %p7, <8 x i32> zeroinitializer)
%r21 = mul <8 x i32> %r18, %r8
%r22 = add <8 x i32> %r21, %r10
%r23 = add <8 x i32> %r21, %r12
%rd24_p1 = getelementptr inbounds i8, i8* %data, i32 0
%rd24_p2 = getelementptr inbounds i8, i8* %rd24_p1, <8 x i32> %offsets
%rd24_p3 = bitcast <8 x i8*> %rd24_p2 to <8 x i64*>
%rd24_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd24_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd24 = inttoptr <8 x i64> %rd24_p4 to <8 x i8*>
%f25_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f25_1 = getelementptr float, <8 x float*> %f25_0, <8 x i32> %r21
%f25 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f25_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f26_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f26_1 = getelementptr float, <8 x float*> %f26_0, <8 x i32> %r22
%f26 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f26_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f27_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f27_1 = getelementptr float, <8 x float*> %f27_0, <8 x i32> %r23
%f27 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f27_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%r28 = mul <8 x i32> %r19, %r8
%r29 = add <8 x i32> %r28, %r10
%r30 = add <8 x i32> %r28, %r12
%f31_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f31_1 = getelementptr float, <8 x float*> %f31_0, <8 x i32> %r28
%f31 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f31_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f32_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f32_1 = getelementptr float, <8 x float*> %f32_0, <8 x i32> %r29
%f32 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f32_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f33_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f33_1 = getelementptr float, <8 x float*> %f33_0, <8 x i32> %r30
%f33 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f33_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%r34 = mul <8 x i32> %r20, %r8
%r35 = add <8 x i32> %r34, %r10
%r36 = add <8 x i32> %r34, %r12
%f37_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f37_1 = getelementptr float, <8 x float*> %f37_0, <8 x i32> %r34
%f37 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f37_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f38_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f38_1 = getelementptr float, <8 x float*> %f38_0, <8 x i32> %r35
%f38 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f38_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f39_0 = bitcast <8 x i8*> %rd24 to <8 x float*>
%f39_1 = getelementptr float, <8 x float*> %f39_0, <8 x i32> %r36
%f39 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f39_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f40 = fsub <8 x float> %f31, %f25
%f41 = fsub <8 x float> %f32, %f26
%f42 = fsub <8 x float> %f33, %f27
%f43 = fsub <8 x float> %f37, %f25
%f44 = fsub <8 x float> %f38, %f26
%f45 = fsub <8 x float> %f39, %f27
%f46 = fmul <8 x float> %f37, %f2
%f47 = fmul <8 x float> %f38, %f2
%f48 = fmul <8 x float> %f39, %f2
%f49 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f31, <8 x float> %f1, <8 x float> %f46)
%f50 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f32, <8 x float> %f1, <8 x float> %f47)
%f51 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f33, <8 x float> %f1, <8 x float> %f48)
%f52 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f25, <8 x float> %f15, <8 x float> %f49)
%f53 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f26, <8 x float> %f15, <8 x float> %f50)
%f54 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f27, <8 x float> %f15, <8 x float> %f51)
%f55 = fmul <8 x float> %f42, %f44
%f56 = fmul <8 x float> %f40, %f45
%f57 = fmul <8 x float> %f41, %f43
%f58 = fneg <8 x float> %f55
%f59 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f41, <8 x float> %f45, <8 x float> %f58)
%f60 = fneg <8 x float> %f56
%f61 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f42, <8 x float> %f43, <8 x float> %f60)
%f62 = fneg <8 x float> %f57
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f40, <8 x float> %f44, <8 x float> %f62)
%f64 = fmul <8 x float> %f59, %f59
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f61, <8 x float> %f64)
%f66 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f63, <8 x float> %f65)
%f67 = fdiv <8 x float> %f5, %f66
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67)
%f69 = fmul <8 x float> %f59, %f68
%f70 = fmul <8 x float> %f61, %f68
%f71 = fmul <8 x float> %f63, %f68
%p72 = fcmp oge <8 x float> %f71, %f4
%f73 = select <8 x i1> %p72, <8 x float> %f5, <8 x float> %f6
%f74 = fadd <8 x float> %f73, %f71
%f75 = fdiv <8 x float> %f5, %f74
%f76 = fneg <8 x float> %f75
%f77 = fmul <8 x float> %f69, %f70
%f78 = fmul <8 x float> %f77, %f76
%f79 = fmul <8 x float> %f69, %f69
%f80 = fmul <8 x float> %f79, %f76
%f81 = fneg <8 x float> %f80
%f82 = select <8 x i1> %p72, <8 x float> %f80, <8 x float> %f81
%f83 = fadd <8 x float> %f82, %f5
%f84 = fneg <8 x float> %f78
%f85 = select <8 x i1> %p72, <8 x float> %f78, <8 x float> %f84
%f86 = fneg <8 x float> %f69
%f87 = select <8 x i1> %p72, <8 x float> %f86, <8 x float> %f69
%rd88_p1 = getelementptr inbounds i8, i8* %data, i32 16
%rd88_p2 = getelementptr inbounds i8, i8* %rd88_p1, <8 x i32> %offsets
%rd88_p3 = bitcast <8 x i8*> %rd88_p2 to <8 x i64*>
%rd88_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd88_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd88 = inttoptr <8 x i64> %rd88_p4 to <8 x i8*>
%f89_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f89_1 = getelementptr float, <8 x float*> %f89_0, <8 x i32> %r21
%f89 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f89_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f90_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f90_1 = getelementptr float, <8 x float*> %f90_0, <8 x i32> %r22
%f90 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f90_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f91_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f91_1 = getelementptr float, <8 x float*> %f91_0, <8 x i32> %r23
%f91 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f91_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f92_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f92_1 = getelementptr float, <8 x float*> %f92_0, <8 x i32> %r28
%f92 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f92_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f93_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f93_1 = getelementptr float, <8 x float*> %f93_0, <8 x i32> %r29
%f93 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f93_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f94_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f94_1 = getelementptr float, <8 x float*> %f94_0, <8 x i32> %r30
%f94 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f94_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f95_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f95_1 = getelementptr float, <8 x float*> %f95_0, <8 x i32> %r34
%f95 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f95_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f96_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f96_1 = getelementptr float, <8 x float*> %f96_0, <8 x i32> %r35
%f96 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f96_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f97_0 = bitcast <8 x i8*> %rd88 to <8 x float*>
%f97_1 = getelementptr float, <8 x float*> %f97_0, <8 x i32> %r36
%f97 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f97_1, i32 4, <8 x i1> %p7, <8 x float> zeroinitializer)
%f98 = fmul <8 x float> %f89, %f15
%f99 = fmul <8 x float> %f90, %f15
%f100 = fmul <8 x float> %f91, %f15
%f101 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f92, <8 x float> %f1, <8 x float> %f98)
%f102 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f93, <8 x float> %f1, <8 x float> %f99)
%f103 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f94, <8 x float> %f1, <8 x float> %f100)
%f104 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f95, <8 x float> %f2, <8 x float> %f101)
%f105 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f2, <8 x float> %f102)
%f106 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f2, <8 x float> %f103)
%f107 = fmul <8 x float> %f104, %f104
%f108 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f105, <8 x float> %f105, <8 x float> %f107)
%f109 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f106, <8 x float> %f106, <8 x float> %f108)
%f110 = fdiv <8 x float> %f5, %f109
%f111 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f110)
%f112 = fmul <8 x float> %f104, %f111
%f113 = fmul <8 x float> %f105, %f111
%f114 = fmul <8 x float> %f106, %f111
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f85, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f54, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_5_1 = bitcast i8* %out_5_0 to <8 x float>*
%out_5_2 = load <8 x float>, <8 x float>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x float> %f69, <8 x float> %out_5_2
store <8 x float> %out_5_3, <8 x float>* %out_5_1, align 32
%out_6_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_6_1 = bitcast i8* %out_6_0 to <8 x float>*
%out_6_2 = load <8 x float>, <8 x float>* %out_6_1, align 32
%out_6_3 = select <8 x i1> %mask, <8 x float> %f70, <8 x float> %out_6_2
store <8 x float> %out_6_3, <8 x float>* %out_6_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f71, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_8_1 = bitcast i8* %out_8_0 to <8 x i32>*
%out_8_2 = load <8 x i32>, <8 x i32>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x i32> %r16, <8 x i32> %out_8_2
store <8 x i32> %out_8_3, <8 x i32>* %out_8_1, align 32
%out_13_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_13_1 = bitcast i8* %out_13_0 to <8 x float>*
%out_13_2 = load <8 x float>, <8 x float>* %out_13_1, align 32
%out_13_3 = select <8 x i1> %mask, <8 x float> %f87, <8 x float> %out_13_2
store <8 x float> %out_13_3, <8 x float>* %out_13_1, align 32
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>*
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32
%out_14_3 = select <8 x i1> %mask, <8 x float> %f83, <8 x float> %out_14_2
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>*
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32
%out_15_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_15_2
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>*
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32
%out_16_3 = select <8 x i1> %mask, <8 x float> %f113, <8 x float> %out_16_2
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32
%out_17_0 = getelementptr inbounds i8, i8* %params, i64 704
%out_17_1 = bitcast i8* %out_17_0 to <8 x float>*
%out_17_2 = load <8 x float>, <8 x float>* %out_17_1, align 32
%out_17_3 = select <8 x i1> %mask, <8 x float> %f112, <8 x float> %out_17_2
store <8 x float> %out_17_3, <8 x float>* %out_17_1, align 32
ret void;
}
define void @func_bf888b7a279d5208759c322da4df8aa9(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Emitter::pdf_direction()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 0
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>*
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32
%f3_i0 = getelementptr inbounds i8, i8* %params, i64 32
%f3_i1 = bitcast i8* %f3_i0 to <8 x float>*
%f3 = load <8 x float>, <8 x float>* %f3_i1, align 32
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 64
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 96
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>*
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 128
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>*
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32
%f7_i0 = getelementptr inbounds i8, i8* %params, i64 160
%f7_i1 = bitcast i8* %f7_i0 to <8 x float>*
%f7 = load <8 x float>, <8 x float>* %f7_i1, align 32
%f8_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f8_i1 = bitcast i8* %f8_i0 to <8 x float>*
%f8 = load <8 x float>, <8 x float>* %f8_i1, align 32
%f9 = fmul <8 x float> %f5, %f2
%f10 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f6, <8 x float> %f3, <8 x float> %f9)
%f11 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f7, <8 x float> %f4, <8 x float> %f10)
%f12_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f12 = shufflevector <8 x float> %f12_1, <8 x float> undef, <8 x i32> zeroinitializer
%p13 = fcmp olt <8 x float> %f11, %f12
%f14_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f14 = shufflevector <8 x float> %f14_1, <8 x float> undef, <8 x i32> zeroinitializer
%f15_0 = bitcast <8 x float> %f11 to <8 x i32>
%f15_1 = bitcast <8 x float> %f14 to <8 x i32>
%f15_2 = and <8 x i32> %f15_0, %f15_1
%f15 = bitcast <8 x i32> %f15_2 to <8 x float>
%p16 = fcmp one <8 x float> %f15, %f12
%f17 = fmul <8 x float> %f8, %f8
%f18 = fdiv <8 x float> %f17, %f15
%f19 = select <8 x i1> %p16, <8 x float> %f18, <8 x float> zeroinitializer
%f20 = fmul <8 x float> %f1, %f19
%f21 = select <8 x i1> %p13, <8 x float> %f20, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 224
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
ret void;
}
define void @func_972f6c492a7ec6284cf60b520b91aba3(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Emitter::eval()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 96
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer
%p6 = fcmp ogt <8 x float> %f4, %f5
%f7 = select <8 x i1> %p6, <8 x float> %f1, <8 x float> zeroinitializer
%f8 = select <8 x i1> %p6, <8 x float> %f2, <8 x float> zeroinitializer
%f9 = select <8 x i1> %p6, <8 x float> %f3, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 160
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f7, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 192
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f8, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 224
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f9, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
ret void;
}
define void @func_19ee132d0bd6c9e577b7250c22c38555(<8 x i1> %mask, <8 x i32> %self, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::Emitter::sample_direction()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 48
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 52
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 56
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_p1 = getelementptr inbounds i8, i8* %data, i32 24
%f4_p2 = getelementptr inbounds i8, i8* %f4_p1, <8 x i32> %offsets
%f4_p3 = bitcast <8 x i8*> %f4_p2 to <8 x float*>
%f4 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f4_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f5_p1 = getelementptr inbounds i8, i8* %data, i32 28
%f5_p2 = getelementptr inbounds i8, i8* %f5_p1, <8 x i32> %offsets
%f5_p3 = bitcast <8 x i8*> %f5_p2 to <8 x float*>
%f5 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f5_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 96
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>*
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32
%f7_i0 = getelementptr inbounds i8, i8* %params, i64 0
%f7_i1 = bitcast i8* %f7_i0 to <8 x float>*
%f7 = load <8 x float>, <8 x float>* %f7_i1, align 32
%f8_i0 = getelementptr inbounds i8, i8* %params, i64 32
%f8_i1 = bitcast i8* %f8_i0 to <8 x float>*
%f8 = load <8 x float>, <8 x float>* %f8_i1, align 32
%f9_i0 = getelementptr inbounds i8, i8* %params, i64 64
%f9_i1 = bitcast i8* %f9_i0 to <8 x float>*
%f9 = load <8 x float>, <8 x float>* %f9_i1, align 32
%f10_i0 = getelementptr inbounds i8, i8* %params, i64 128
%f10_i1 = bitcast i8* %f10_i0 to <8 x float>*
%f10 = load <8 x float>, <8 x float>* %f10_i1, align 32
%p11 = bitcast <8 x i1> %mask to <8 x i1>
%r12_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer
%f13_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0
%f13 = shufflevector <8 x float> %f13_1, <8 x float> undef, <8 x i32> zeroinitializer
%f14 = fmul <8 x float> %f10, %f4
%r15_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r15 = shufflevector <8 x i32> %r15_1, <8 x i32> undef, <8 x i32> zeroinitializer
%rd16_p1 = getelementptr inbounds i8, i8* %data, i32 16
%rd16_p2 = getelementptr inbounds i8, i8* %rd16_p1, <8 x i32> %offsets
%rd16_p3 = bitcast <8 x i8*> %rd16_p2 to <8 x i64*>
%rd16_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd16_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd16 = inttoptr <8 x i64> %rd16_p4 to <8 x i8*>
%f17_0 = bitcast <8 x i8*> %rd16 to <8 x float*>
%f17_1 = getelementptr float, <8 x float*> %f17_0, <8 x i32> %r12
%f17 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f17_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%p18 = fcmp olt <8 x float> %f17, %f14
%r19 = select <8 x i1> %p18, <8 x i32> %r15, <8 x i32> zeroinitializer
%rd20_p1 = getelementptr inbounds i8, i8* %data, i32 32
%rd20_p2 = getelementptr inbounds i8, i8* %rd20_p1, <8 x i32> %offsets
%rd20_p3 = bitcast <8 x i8*> %rd20_p2 to <8 x i64*>
%rd20_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd20_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd20 = inttoptr <8 x i64> %rd20_p4 to <8 x i8*>
%f21_0 = bitcast <8 x i8*> %rd20 to <8 x float*>
%f21_1 = getelementptr float, <8 x float*> %f21_0, <8 x i32> %r19
%f21 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f21_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f22 = fmul <8 x float> %f21, %f5
%r23 = sub <8 x i32> %r19, %r15
%p24 = icmp ugt <8 x i32> %r19, %r12
%p25 = and <8 x i1> %p24, %p11
%f26_0 = bitcast <8 x i8*> %rd16 to <8 x float*>
%f26_1 = getelementptr float, <8 x float*> %f26_0, <8 x i32> %r23
%f26 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f26_1, i32 4, <8 x i1> %p25, <8 x float> zeroinitializer)
%f27 = fmul <8 x float> %f26, %f5
%f28 = fsub <8 x float> %f10, %f27
%f29 = fdiv <8 x float> %f28, %f22
%r30_1 = insertelement <8 x i32> undef, i32 3, i32 0
%r30 = shufflevector <8 x i32> %r30_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r31 = mul <8 x i32> %r19, %r30
%r32 = add <8 x i32> %r31, %r15
%r33_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r34 = add <8 x i32> %r31, %r33
%rd35_p1 = getelementptr inbounds i8, i8* %data, i32 8
%rd35_p2 = getelementptr inbounds i8, i8* %rd35_p1, <8 x i32> %offsets
%rd35_p3 = bitcast <8 x i8*> %rd35_p2 to <8 x i64*>
%rd35_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd35_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd35 = inttoptr <8 x i64> %rd35_p4 to <8 x i8*>
%r36_0 = bitcast <8 x i8*> %rd35 to <8 x i32*>
%r36_1 = getelementptr i32, <8 x i32*> %r36_0, <8 x i32> %r31
%r36 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r36_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer)
%r37_0 = bitcast <8 x i8*> %rd35 to <8 x i32*>
%r37_1 = getelementptr i32, <8 x i32*> %r37_0, <8 x i32> %r32
%r37 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r37_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer)
%r38_0 = bitcast <8 x i8*> %rd35 to <8 x i32*>
%r38_1 = getelementptr i32, <8 x i32*> %r38_0, <8 x i32> %r34
%r38 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %r38_1, i32 4, <8 x i1> %p11, <8 x i32> zeroinitializer)
%r39 = mul <8 x i32> %r36, %r30
%r40 = add <8 x i32> %r39, %r15
%r41 = add <8 x i32> %r39, %r33
%rd42_p1 = getelementptr inbounds i8, i8* %data, i32 0
%rd42_p2 = getelementptr inbounds i8, i8* %rd42_p1, <8 x i32> %offsets
%rd42_p3 = bitcast <8 x i8*> %rd42_p2 to <8 x i64*>
%rd42_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd42_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd42 = inttoptr <8 x i64> %rd42_p4 to <8 x i8*>
%f43_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f43_1 = getelementptr float, <8 x float*> %f43_0, <8 x i32> %r39
%f43 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f43_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f44_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f44_1 = getelementptr float, <8 x float*> %f44_0, <8 x i32> %r40
%f44 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f44_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f45_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f45_1 = getelementptr float, <8 x float*> %f45_0, <8 x i32> %r41
%f45 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f45_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%r46 = mul <8 x i32> %r37, %r30
%r47 = add <8 x i32> %r46, %r15
%r48 = add <8 x i32> %r46, %r33
%f49_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f49_1 = getelementptr float, <8 x float*> %f49_0, <8 x i32> %r46
%f49 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f49_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f50_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f50_1 = getelementptr float, <8 x float*> %f50_0, <8 x i32> %r47
%f50 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f50_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f51_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f51_1 = getelementptr float, <8 x float*> %f51_0, <8 x i32> %r48
%f51 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f51_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%r52 = mul <8 x i32> %r38, %r30
%r53 = add <8 x i32> %r52, %r15
%r54 = add <8 x i32> %r52, %r33
%f55_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f55_1 = getelementptr float, <8 x float*> %f55_0, <8 x i32> %r52
%f55 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f55_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f56_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f56_1 = getelementptr float, <8 x float*> %f56_0, <8 x i32> %r53
%f56 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f56_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f57_0 = bitcast <8 x i8*> %rd42 to <8 x float*>
%f57_1 = getelementptr float, <8 x float*> %f57_0, <8 x i32> %r54
%f57 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f57_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f58 = fsub <8 x float> %f49, %f43
%f59 = fsub <8 x float> %f50, %f44
%f60 = fsub <8 x float> %f51, %f45
%f61 = fsub <8 x float> %f55, %f43
%f62 = fsub <8 x float> %f56, %f44
%f63 = fsub <8 x float> %f57, %f45
%f64_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f64 = shufflevector <8 x float> %f64_1, <8 x float> undef, <8 x i32> zeroinitializer
%f65 = fsub <8 x float> %f64, %f6
%f66_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f66 = shufflevector <8 x float> %f66_1, <8 x float> undef, <8 x i32> zeroinitializer
%f67 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f65, <8 x float> %f66)
%f68 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f67)
%f69 = fsub <8 x float> %f64, %f68
%f70 = fmul <8 x float> %f68, %f29
%f71 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f61, <8 x float> %f70, <8 x float> %f43)
%f72 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f62, <8 x float> %f70, <8 x float> %f44)
%f73 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f63, <8 x float> %f70, <8 x float> %f45)
%f74 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f58, <8 x float> %f69, <8 x float> %f71)
%f75 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f69, <8 x float> %f72)
%f76 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f60, <8 x float> %f69, <8 x float> %f73)
%rd77_p1 = getelementptr inbounds i8, i8* %data, i32 40
%rd77_p2 = getelementptr inbounds i8, i8* %rd77_p1, <8 x i32> %offsets
%rd77_p3 = bitcast <8 x i8*> %rd77_p2 to <8 x i64*>
%rd77_p4 = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %rd77_p3, i32 8, <8 x i1> %mask, <8 x i64> zeroinitializer)
%rd77 = inttoptr <8 x i64> %rd77_p4 to <8 x i8*>
%f78_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f78_1 = getelementptr float, <8 x float*> %f78_0, <8 x i32> %r39
%f78 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f78_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f79_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f79_1 = getelementptr float, <8 x float*> %f79_0, <8 x i32> %r40
%f79 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f79_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f80_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f80_1 = getelementptr float, <8 x float*> %f80_0, <8 x i32> %r41
%f80 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f80_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f81_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f81_1 = getelementptr float, <8 x float*> %f81_0, <8 x i32> %r46
%f81 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f81_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f82_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f82_1 = getelementptr float, <8 x float*> %f82_0, <8 x i32> %r47
%f82 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f82_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f83_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f83_1 = getelementptr float, <8 x float*> %f83_0, <8 x i32> %r48
%f83 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f83_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f84_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f84_1 = getelementptr float, <8 x float*> %f84_0, <8 x i32> %r52
%f84 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f84_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f85_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f85_1 = getelementptr float, <8 x float*> %f85_0, <8 x i32> %r53
%f85 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f85_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f86_0 = bitcast <8 x i8*> %rd77 to <8 x float*>
%f86_1 = getelementptr float, <8 x float*> %f86_0, <8 x i32> %r54
%f86 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f86_1, i32 4, <8 x i1> %p11, <8 x float> zeroinitializer)
%f87 = fsub <8 x float> %f64, %f69
%f88 = fsub <8 x float> %f87, %f70
%f89 = fmul <8 x float> %f84, %f70
%f90 = fmul <8 x float> %f85, %f70
%f91 = fmul <8 x float> %f86, %f70
%f92 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f81, <8 x float> %f69, <8 x float> %f89)
%f93 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f82, <8 x float> %f69, <8 x float> %f90)
%f94 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f83, <8 x float> %f69, <8 x float> %f91)
%f95 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f78, <8 x float> %f88, <8 x float> %f92)
%f96 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f79, <8 x float> %f88, <8 x float> %f93)
%f97 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f80, <8 x float> %f88, <8 x float> %f94)
%f98 = fmul <8 x float> %f95, %f95
%f99 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f96, <8 x float> %f96, <8 x float> %f98)
%f100 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f97, <8 x float> %f97, <8 x float> %f99)
%f101 = fdiv <8 x float> %f64, %f100
%f102 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f101)
%f103 = fmul <8 x float> %f95, %f102
%f104 = fmul <8 x float> %f96, %f102
%f105 = fmul <8 x float> %f97, %f102
%f106 = fsub <8 x float> %f74, %f7
%f107 = fsub <8 x float> %f75, %f8
%f108 = fsub <8 x float> %f76, %f9
%f109 = fmul <8 x float> %f106, %f106
%f110 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f107, <8 x float> %f107, <8 x float> %f109)
%f111 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f108, <8 x float> %f108, <8 x float> %f110)
%f112 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f111)
%f113 = fdiv <8 x float> %f64, %f112
%f114 = fmul <8 x float> %f106, %f113
%f115 = fmul <8 x float> %f107, %f113
%f116 = fmul <8 x float> %f108, %f113
%f117 = fmul <8 x float> %f114, %f103
%f118 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f115, <8 x float> %f104, <8 x float> %f117)
%f119 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f116, <8 x float> %f105, <8 x float> %f118)
%f120_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f120 = shufflevector <8 x float> %f120_1, <8 x float> undef, <8 x i32> zeroinitializer
%f121_0 = bitcast <8 x float> %f119 to <8 x i32>
%f121_1 = bitcast <8 x float> %f120 to <8 x i32>
%f121_2 = and <8 x i32> %f121_0, %f121_1
%f121 = bitcast <8 x i32> %f121_2 to <8 x float>
%f122 = fdiv <8 x float> %f111, %f121
%f123_0 = bitcast <8 x float> %f122 to <8 x i32>
%f123_1 = bitcast <8 x float> %f120 to <8 x i32>
%f123_2 = and <8 x i32> %f123_0, %f123_1
%f123 = bitcast <8 x i32> %f123_2 to <8 x float>
%p124 = fcmp olt <8 x float> %f123, %f13
%f125 = select <8 x i1> %p124, <8 x float> %f122, <8 x float> zeroinitializer
%f126 = fmul <8 x float> %f5, %f125
%p127 = fcmp olt <8 x float> %f119, %f66
%p128 = fcmp one <8 x float> %f126, %f66
%p129 = and <8 x i1> %p127, %p128
%f130 = fdiv <8 x float> %f64, %f126
%f131 = fmul <8 x float> %f1, %f130
%f132 = fmul <8 x float> %f2, %f130
%f133 = fmul <8 x float> %f3, %f130
%f134 = select <8 x i1> %p129, <8 x float> %f131, <8 x float> zeroinitializer
%f135 = select <8 x i1> %p129, <8 x float> %f132, <8 x float> zeroinitializer
%f136 = select <8 x i1> %p129, <8 x float> %f133, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 160
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f74, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 192
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f75, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 224
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f76, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 256
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>*
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x float> %f126, <8 x float> %out_8_2
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 288
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>*
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32
%out_9_3 = select <8 x i1> %mask, <8 x float> %f114, <8 x float> %out_9_2
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32
%out_10_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_10_1 = bitcast i8* %out_10_0 to <8 x float>*
%out_10_2 = load <8 x float>, <8 x float>* %out_10_1, align 32
%out_10_3 = select <8 x i1> %mask, <8 x float> %f115, <8 x float> %out_10_2
store <8 x float> %out_10_3, <8 x float>* %out_10_1, align 32
%out_11_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_11_1 = bitcast i8* %out_11_0 to <8 x float>*
%out_11_2 = load <8 x float>, <8 x float>* %out_11_1, align 32
%out_11_3 = select <8 x i1> %mask, <8 x float> %f116, <8 x float> %out_11_2
store <8 x float> %out_11_3, <8 x float>* %out_11_1, align 32
%out_14_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_14_1 = bitcast i8* %out_14_0 to <8 x float>*
%out_14_2 = load <8 x float>, <8 x float>* %out_14_1, align 32
%out_14_3 = select <8 x i1> %mask, <8 x float> %f134, <8 x float> %out_14_2
store <8 x float> %out_14_3, <8 x float>* %out_14_1, align 32
%out_15_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_15_1 = bitcast i8* %out_15_0 to <8 x float>*
%out_15_2 = load <8 x float>, <8 x float>* %out_15_1, align 32
%out_15_3 = select <8 x i1> %mask, <8 x float> %f135, <8 x float> %out_15_2
store <8 x float> %out_15_3, <8 x float>* %out_15_1, align 32
%out_16_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_16_1 = bitcast i8* %out_16_0 to <8 x float>*
%out_16_2 = load <8 x float>, <8 x float>* %out_16_1, align 32
%out_16_3 = select <8 x i1> %mask, <8 x float> %f136, <8 x float> %out_16_2
store <8 x float> %out_16_3, <8 x float>* %out_16_1, align 32
ret void;
}
define void @func_e9d5179176fa4aaf3f8d2cb1ff4d2c84(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::eval_pdf()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>*
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32
%f6_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%p7 = fcmp ogt <8 x float> %f4, %f6
%p8 = fcmp ogt <8 x float> %f5, %f6
%p9 = and <8 x i1> %p7, %p8
%f10_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11 = fmul <8 x float> %f1, %f10
%f12 = fmul <8 x float> %f2, %f10
%f13 = fmul <8 x float> %f3, %f10
%f14 = fmul <8 x float> %f11, %f5
%f15 = fmul <8 x float> %f12, %f5
%f16 = fmul <8 x float> %f13, %f5
%f17 = fmul <8 x float> %f10, %f5
%f18 = select <8 x i1> %p9, <8 x float> %f14, <8 x float> zeroinitializer
%f19 = select <8 x i1> %p9, <8 x float> %f15, <8 x float> zeroinitializer
%f20 = select <8 x i1> %p9, <8 x float> %f16, <8 x float> zeroinitializer
%f21 = select <8 x i1> %p9, <8 x float> %f17, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f18, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f19, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f20, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
ret void;
}
define void @func_9d53124ccfd48ba20820a9ee3e480cd9(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::eval_pdf()
%f1_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f1 = shufflevector <8 x float> %f1_1, <8 x float> undef, <8 x i32> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f1, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
ret void;
}
define void @func_4448b4670e928f754bf33368168c3f24(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::eval_pdf()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>*
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32
%f6_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%p7 = fcmp ogt <8 x float> %f4, %f6
%p8 = fcmp ogt <8 x float> %f5, %f6
%p9 = and <8 x i1> %p7, %p8
%f10_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11 = fmul <8 x float> %f10, %f5
%f12 = select <8 x i1> %p9, <8 x float> %f11, <8 x float> zeroinitializer
%f13 = fmul <8 x float> %f1, %f10
%f14 = fmul <8 x float> %f2, %f10
%f15 = fmul <8 x float> %f3, %f10
%f16 = fmul <8 x float> %f13, %f5
%f17 = fmul <8 x float> %f14, %f5
%f18 = fmul <8 x float> %f15, %f5
%f19 = select <8 x i1> %p9, <8 x float> %f16, <8 x float> zeroinitializer
%f20 = select <8 x i1> %p9, <8 x float> %f17, <8 x float> zeroinitializer
%f21 = select <8 x i1> %p9, <8 x float> %f18, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 288
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f19, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 320
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f20, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 352
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f21, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 384
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f12, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
ret void;
}
define void @func_60a1a348aca7bdb47e851f6cedce38a2(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::sample()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 352
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>*
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 384
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>*
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32
%f7_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer
%p8 = fcmp ogt <8 x float> %f4, %f7
%f9_1 = insertelement <8 x float> undef, float 0x4000000000000000, i32 0
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer
%f10_1 = insertelement <8 x float> undef, float 0xbff0000000000000, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f5, <8 x float> %f10)
%f12 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f9, <8 x float> %f6, <8 x float> %f10)
%p13 = fcmp oeq <8 x float> %f11, %f7
%p14 = fcmp oeq <8 x float> %f12, %f7
%p15 = and <8 x i1> %p13, %p14
%f16_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f16 = shufflevector <8 x float> %f16_1, <8 x float> undef, <8 x i32> zeroinitializer
%f17_0 = bitcast <8 x float> %f11 to <8 x i32>
%f17_1 = bitcast <8 x float> %f16 to <8 x i32>
%f17_2 = and <8 x i32> %f17_0, %f17_1
%f17 = bitcast <8 x i32> %f17_2 to <8 x float>
%f18_0 = bitcast <8 x float> %f12 to <8 x i32>
%f18_1 = bitcast <8 x float> %f16 to <8 x i32>
%f18_2 = and <8 x i32> %f18_0, %f18_1
%f18 = bitcast <8 x i32> %f18_2 to <8 x float>
%p19 = fcmp olt <8 x float> %f17, %f18
%f20 = select <8 x i1> %p19, <8 x float> %f12, <8 x float> %f11
%f21 = select <8 x i1> %p19, <8 x float> %f11, <8 x float> %f12
%f22_1 = insertelement <8 x float> undef, float 0x3fe921fb60000000, i32 0
%f22 = shufflevector <8 x float> %f22_1, <8 x float> undef, <8 x i32> zeroinitializer
%f23 = fmul <8 x float> %f22, %f21
%f24 = fdiv <8 x float> %f23, %f20
%f25_1 = insertelement <8 x float> undef, float 0x3ff921fb60000000, i32 0
%f25 = shufflevector <8 x float> %f25_1, <8 x float> undef, <8 x i32> zeroinitializer
%f26 = fsub <8 x float> %f25, %f24
%f27 = select <8 x i1> %p19, <8 x float> %f26, <8 x float> %f24
%f28 = select <8 x i1> %p15, <8 x float> %f7, <8 x float> %f27
%f29_0 = bitcast <8 x float> %f28 to <8 x i32>
%f29_1 = bitcast <8 x float> %f16 to <8 x i32>
%f29_2 = and <8 x i32> %f29_0, %f29_1
%f29 = bitcast <8 x i32> %f29_2 to <8 x float>
%f30_1 = insertelement <8 x float> undef, float 0x3ff45f3060000000, i32 0
%f30 = shufflevector <8 x float> %f30_1, <8 x float> undef, <8 x i32> zeroinitializer
%f31 = fmul <8 x float> %f29, %f30
%r32 = fptosi <8 x float> %f31 to <8 x i32>
%r33_1 = insertelement <8 x i32> undef, i32 1, i32 0
%r33 = shufflevector <8 x i32> %r33_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r34 = add <8 x i32> %r32, %r33
%r35_1 = insertelement <8 x i32> undef, i32 4294967294, i32 0
%r35 = shufflevector <8 x i32> %r35_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r36 = and <8 x i32> %r34, %r35
%f37 = sitofp <8 x i32> %r36 to <8 x float>
%r38_1 = insertelement <8 x i32> undef, i32 29, i32 0
%r38 = shufflevector <8 x i32> %r38_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r39 = shl <8 x i32> %r36, %r38
%f40 = bitcast <8 x i32> %r39 to <8 x float>
%f41_0 = bitcast <8 x float> %f40 to <8 x i32>
%f41_1 = bitcast <8 x float> %f28 to <8 x i32>
%f41_2 = xor <8 x i32> %f41_0, %f41_1
%f41 = bitcast <8 x i32> %f41_2 to <8 x float>
%r42_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r43 = sub <8 x i32> %r36, %r42
%r44 = xor <8 x i32> %r43, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%r45 = shl <8 x i32> %r44, %r38
%f46 = bitcast <8 x i32> %r45 to <8 x float>
%f47_1 = insertelement <8 x float> undef, float 0x3fe9200000000000, i32 0
%f47 = shufflevector <8 x float> %f47_1, <8 x float> undef, <8 x i32> zeroinitializer
%f48 = fmul <8 x float> %f37, %f47
%f49 = fsub <8 x float> %f29, %f48
%f50_1 = insertelement <8 x float> undef, float 0x3f2fb40000000000, i32 0
%f50 = shufflevector <8 x float> %f50_1, <8 x float> undef, <8 x i32> zeroinitializer
%f51 = fmul <8 x float> %f37, %f50
%f52 = fsub <8 x float> %f49, %f51
%f53_1 = insertelement <8 x float> undef, float 0x3e64442d20000000, i32 0
%f53 = shufflevector <8 x float> %f53_1, <8 x float> undef, <8 x i32> zeroinitializer
%f54 = fmul <8 x float> %f37, %f53
%f55 = fsub <8 x float> %f52, %f54
%f56 = fmul <8 x float> %f55, %f55
%f57_1 = insertelement <8 x float> undef, float 0x7ff0000000000000, i32 0
%f57 = shufflevector <8 x float> %f57_1, <8 x float> undef, <8 x i32> zeroinitializer
%p58 = fcmp oeq <8 x float> %f29, %f57
%f59_0 = sext <8 x i1> %p58 to <8 x i32>
%f59_1 = bitcast <8 x float> %f56 to <8 x i32>
%f59_2 = or <8 x i32> %f59_0, %f59_1
%f59 = bitcast <8 x i32> %f59_2 to <8 x float>
%f60_1 = insertelement <8 x float> undef, float 0xbfc5555460000000, i32 0
%f60 = shufflevector <8 x float> %f60_1, <8 x float> undef, <8 x i32> zeroinitializer
%f61_1 = insertelement <8 x float> undef, float 0x3f811073c0000000, i32 0
%f61 = shufflevector <8 x float> %f61_1, <8 x float> undef, <8 x i32> zeroinitializer
%f62_1 = insertelement <8 x float> undef, float 0xbf29943f20000000, i32 0
%f62 = shufflevector <8 x float> %f62_1, <8 x float> undef, <8 x i32> zeroinitializer
%f63 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f61, <8 x float> %f60)
%f64 = fmul <8 x float> %f59, %f59
%f65 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f64, <8 x float> %f62, <8 x float> %f63)
%f66 = fmul <8 x float> %f65, %f59
%f67_1 = insertelement <8 x float> undef, float 0x3fa55554a0000000, i32 0
%f67 = shufflevector <8 x float> %f67_1, <8 x float> undef, <8 x i32> zeroinitializer
%f68_1 = insertelement <8 x float> undef, float 0xbf56c0c340000000, i32 0
%f68 = shufflevector <8 x float> %f68_1, <8 x float> undef, <8 x i32> zeroinitializer
%f69_1 = insertelement <8 x float> undef, float 0x3ef99eb9c0000000, i32 0
%f69 = shufflevector <8 x float> %f69_1, <8 x float> undef, <8 x i32> zeroinitializer
%f70 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f68, <8 x float> %f67)
%f71 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f64, <8 x float> %f69, <8 x float> %f70)
%f72 = fmul <8 x float> %f71, %f59
%f73 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f66, <8 x float> %f55, <8 x float> %f55)
%f74_1 = insertelement <8 x float> undef, float 0xbfe0000000000000, i32 0
%f74 = shufflevector <8 x float> %f74_1, <8 x float> undef, <8 x i32> zeroinitializer
%f75_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f75 = shufflevector <8 x float> %f75_1, <8 x float> undef, <8 x i32> zeroinitializer
%f76 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f59, <8 x float> %f74, <8 x float> %f75)
%f77 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f72, <8 x float> %f59, <8 x float> %f76)
%r78 = and <8 x i32> %r36, %r42
%r79_1 = insertelement <8 x i32> undef, i32 0, i32 0
%r79 = shufflevector <8 x i32> %r79_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p80 = icmp eq <8 x i32> %r78, %r79
%f81 = select <8 x i1> %p80, <8 x float> %f73, <8 x float> %f77
%f82_1 = insertelement <8 x float> undef, float 0x8000000000000000, i32 0
%f82 = shufflevector <8 x float> %f82_1, <8 x float> undef, <8 x i32> zeroinitializer
%f83_0 = bitcast <8 x float> %f82 to <8 x i32>
%f83_1 = bitcast <8 x float> %f41 to <8 x i32>
%f83_2 = and <8 x i32> %f83_0, %f83_1
%f83 = bitcast <8 x i32> %f83_2 to <8 x float>
%f84_0 = bitcast <8 x float> %f81 to <8 x i32>
%f84_1 = bitcast <8 x float> %f83 to <8 x i32>
%f84_2 = xor <8 x i32> %f84_0, %f84_1
%f84 = bitcast <8 x i32> %f84_2 to <8 x float>
%f85 = select <8 x i1> %p80, <8 x float> %f77, <8 x float> %f73
%f86_0 = bitcast <8 x float> %f82 to <8 x i32>
%f86_1 = bitcast <8 x float> %f46 to <8 x i32>
%f86_2 = and <8 x i32> %f86_0, %f86_1
%f86 = bitcast <8 x i32> %f86_2 to <8 x float>
%f87_0 = bitcast <8 x float> %f85 to <8 x i32>
%f87_1 = bitcast <8 x float> %f86 to <8 x i32>
%f87_2 = xor <8 x i32> %f87_0, %f87_1
%f87 = bitcast <8 x i32> %f87_2 to <8 x float>
%f88 = fmul <8 x float> %f20, %f87
%f89 = fmul <8 x float> %f20, %f84
%f90 = fmul <8 x float> %f88, %f88
%f91 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f89, <8 x float> %f89, <8 x float> %f90)
%f92 = fsub <8 x float> %f75, %f91
%f93 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f92, <8 x float> %f7)
%f94 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f93)
%f95_1 = insertelement <8 x float> undef, float 0x3fd45f3060000000, i32 0
%f95 = shufflevector <8 x float> %f95_1, <8 x float> undef, <8 x i32> zeroinitializer
%f96 = fmul <8 x float> %f95, %f94
%r97_1 = insertelement <8 x i32> undef, i32 2, i32 0
%r97 = shufflevector <8 x i32> %r97_1, <8 x i32> undef, <8 x i32> zeroinitializer
%p98 = fcmp ogt <8 x float> %f96, %f7
%p99 = and <8 x i1> %p8, %p98
%f100 = select <8 x i1> %p99, <8 x float> %f1, <8 x float> zeroinitializer
%f101 = select <8 x i1> %p99, <8 x float> %f2, <8 x float> zeroinitializer
%f102 = select <8 x i1> %p99, <8 x float> %f3, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f88, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f89, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f94, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f96, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f75, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>*
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r97, <8 x i32> %out_5_2
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f100, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>*
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x float> %f101, <8 x float> %out_8_2
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>*
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32
%out_9_3 = select <8 x i1> %mask, <8 x float> %f102, <8 x float> %out_9_2
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32
ret void;
}
define void @func_e6fff8d6b222436f71518443ce068358(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::sample()
%f1_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f1_i1 = bitcast i8* %f1_i0 to <8 x float>*
%f1 = load <8 x float>, <8 x float>* %f1_i1, align 32
%f2_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f2_i1 = bitcast i8* %f2_i0 to <8 x float>*
%f2 = load <8 x float>, <8 x float>* %f2_i1, align 32
%f3_i0 = getelementptr inbounds i8, i8* %params, i64 224
%f3_i1 = bitcast i8* %f3_i0 to <8 x float>*
%f3 = load <8 x float>, <8 x float>* %f3_i1, align 32
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 320
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f5 = shufflevector <8 x float> %f5_1, <8 x float> undef, <8 x i32> zeroinitializer
%f6_1 = insertelement <8 x float> undef, float 0x7fffffffe0000000, i32 0
%f6 = shufflevector <8 x float> %f6_1, <8 x float> undef, <8 x i32> zeroinitializer
%f7_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer
%f8_1 = insertelement <8 x float> undef, float 0x3ff8112280000000, i32 0
%f8 = shufflevector <8 x float> %f8_1, <8 x float> undef, <8 x i32> zeroinitializer
%p9 = fcmp oge <8 x float> %f1, %f5
%f10_1 = insertelement <8 x float> undef, float 0x3fe5462520000000, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11 = select <8 x i1> %p9, <8 x float> %f8, <8 x float> %f10
%f12 = select <8 x i1> %p9, <8 x float> %f10, <8 x float> %f8
%f13 = fneg <8 x float> %f1
%f14 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f1, <8 x float> %f13, <8 x float> %f7)
%f15 = fmul <8 x float> %f12, %f12
%f16 = fneg <8 x float> %f15
%f17 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f14, <8 x float> %f16, <8 x float> %f7)
%f18_0 = bitcast <8 x float> %f1 to <8 x i32>
%f18_1 = bitcast <8 x float> %f6 to <8 x i32>
%f18_2 = and <8 x i32> %f18_0, %f18_1
%f18 = bitcast <8 x i32> %f18_2 to <8 x float>
%f19 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f17, <8 x float> %f5)
%f20 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f19)
%p21 = fcmp oeq <8 x float> %f18, %f5
%f22 = fneg <8 x float> %f20
%f23 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f22, <8 x float> %f18)
%f24 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f20, <8 x float> %f18)
%f25 = fdiv <8 x float> %f23, %f24
%f26 = fneg <8 x float> %f18
%f27 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f26, <8 x float> %f20)
%f28 = call <8 x float> @llvm.fma.v8f32(<8 x float> %f11, <8 x float> %f18, <8 x float> %f20)
%f29 = fdiv <8 x float> %f27, %f28
%f30 = fmul <8 x float> %f25, %f25
%f31 = fmul <8 x float> %f29, %f29
%f32 = fadd <8 x float> %f30, %f31
%f33_1 = insertelement <8 x float> undef, float 0x3fe0000000000000, i32 0
%f33 = shufflevector <8 x float> %f33_1, <8 x float> undef, <8 x i32> zeroinitializer
%f34 = fmul <8 x float> %f33, %f32
%f35 = select <8 x i1> %p21, <8 x float> %f7, <8 x float> %f34
%f36 = select <8 x i1> %p9, <8 x float> %f22, <8 x float> %f20
%f37 = fsub <8 x float> %f7, %f35
%p38 = fcmp ole <8 x float> %f4, %f35
%f39 = select <8 x i1> %p38, <8 x float> %f35, <8 x float> %f37
%p40 = xor <8 x i1> %p38, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r41_1 = insertelement <8 x i32> undef, i32 32, i32 0
%r41 = shufflevector <8 x i32> %r41_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r42_1 = insertelement <8 x i32> undef, i32 64, i32 0
%r42 = shufflevector <8 x i32> %r42_1, <8 x i32> undef, <8 x i32> zeroinitializer
%r43 = select <8 x i1> %p38, <8 x i32> %r41, <8 x i32> %r42
%f44 = fneg <8 x float> %f2
%f45 = fneg <8 x float> %f3
%f46 = fneg <8 x float> %f12
%f47 = fmul <8 x float> %f46, %f2
%f48 = fmul <8 x float> %f46, %f3
%f49 = select <8 x i1> %p38, <8 x float> %f44, <8 x float> %f47
%f50 = select <8 x i1> %p38, <8 x float> %f45, <8 x float> %f48
%f51 = select <8 x i1> %p38, <8 x float> %f1, <8 x float> %f36
%f52 = select <8 x i1> %p38, <8 x float> %f7, <8 x float> %f11
%f53 = select <8 x i1> %p40, <8 x float> %f15, <8 x float> %f7
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f49, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f50, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f39, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f52, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>*
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r43, <8 x i32> %out_5_2
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>*
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_8_2
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>*
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32
%out_9_3 = select <8 x i1> %mask, <8 x float> %f53, <8 x float> %out_9_2
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32
ret void;
}
define void @func_48d4a1fb2376a0011da8c81578897b84(<8 x i1> %mask, i8* noalias %params, i8* noalias %data, <8 x i32> %offsets) #0 {
entry:
; VCall: mitsuba::BSDF::sample()
%f1_p1 = getelementptr inbounds i8, i8* %data, i32 0
%f1_p2 = getelementptr inbounds i8, i8* %f1_p1, <8 x i32> %offsets
%f1_p3 = bitcast <8 x i8*> %f1_p2 to <8 x float*>
%f1 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f1_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f2_p1 = getelementptr inbounds i8, i8* %data, i32 4
%f2_p2 = getelementptr inbounds i8, i8* %f2_p1, <8 x i32> %offsets
%f2_p3 = bitcast <8 x i8*> %f2_p2 to <8 x float*>
%f2 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f2_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f3_p1 = getelementptr inbounds i8, i8* %data, i32 8
%f3_p2 = getelementptr inbounds i8, i8* %f3_p1, <8 x i32> %offsets
%f3_p3 = bitcast <8 x i8*> %f3_p2 to <8 x float*>
%f3 = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %f3_p3, i32 4, <8 x i1> %mask, <8 x float> zeroinitializer)
%f4_i0 = getelementptr inbounds i8, i8* %params, i64 256
%f4_i1 = bitcast i8* %f4_i0 to <8 x float>*
%f4 = load <8 x float>, <8 x float>* %f4_i1, align 32
%f5_i0 = getelementptr inbounds i8, i8* %params, i64 192
%f5_i1 = bitcast i8* %f5_i0 to <8 x float>*
%f5 = load <8 x float>, <8 x float>* %f5_i1, align 32
%f6_i0 = getelementptr inbounds i8, i8* %params, i64 224
%f6_i1 = bitcast i8* %f6_i0 to <8 x float>*
%f6 = load <8 x float>, <8 x float>* %f6_i1, align 32
%f7_1 = insertelement <8 x float> undef, float 0x0, i32 0
%f7 = shufflevector <8 x float> %f7_1, <8 x float> undef, <8 x i32> zeroinitializer
%p8 = fcmp ogt <8 x float> %f4, %f7
%f9_1 = insertelement <8 x float> undef, float 0x4000000000000000, i32 0
%f9 = shufflevector <8 x float> %f9_1, <8 x float> undef, <8 x i32> zeroinitializer
%f10_1 = insertelement <8 x float> undef, float 0x3ff0000000000000, i32 0
%f10 = shufflevector <8 x float> %f10_1, <8 x float> undef, <8 x i32> zeroinitializer
%f11_1 = insertelement <8 x float> undef, float 0x3fe0000000000000, i32 0
%f11 = shufflevector <8 x float> %f11_1, <8 x float> undef, <8 x i32> zeroinitializer
%r12_1 = insertelement <8 x i32> undef, i32 32, i32 0
%r12 = shufflevector <8 x i32> %r12_1, <8 x i32> undef, <8 x i32> zeroinitializer
%f13 = fneg <8 x float> %f5
%f14 = fneg <8 x float> %f6
%f15 = fmul <8 x float> %f4, %f4
%f16 = fsub <8 x float> %f10, %f15
%f17 = fmul <8 x float> %f16, %f16
%f18 = fmul <8 x float> %f2, %f2
%f19 = fmul <8 x float> %f3, %f3
%f20 = fsub <8 x float> %f18, %f19
%f21 = fsub <8 x float> %f20, %f16
%f22 = fmul <8 x float> %f21, %f21
%f23_1 = insertelement <8 x float> undef, float 0x4010000000000000, i32 0
%f23 = shufflevector <8 x float> %f23_1, <8 x float> undef, <8 x i32> zeroinitializer
%f24 = fmul <8 x float> %f23, %f3
%f25 = fmul <8 x float> %f24, %f3
%f26 = fmul <8 x float> %f25, %f2
%f27 = fmul <8 x float> %f26, %f2
%f28 = fadd <8 x float> %f22, %f27
%f29 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f28, <8 x float> %f7)
%f30 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f29)
%f31 = fadd <8 x float> %f30, %f21
%f32 = fmul <8 x float> %f11, %f31
%f33 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %f32, <8 x float> %f7)
%f34 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f33)
%f35 = fadd <8 x float> %f30, %f15
%f36 = fmul <8 x float> %f9, %f4
%f37 = fmul <8 x float> %f36, %f34
%f38 = fsub <8 x float> %f35, %f37
%f39 = fadd <8 x float> %f35, %f37
%f40 = fdiv <8 x float> %f38, %f39
%f41 = fmul <8 x float> %f30, %f15
%f42 = fadd <8 x float> %f41, %f17
%f43 = fmul <8 x float> %f37, %f16
%f44 = fsub <8 x float> %f42, %f43
%f45 = fmul <8 x float> %f40, %f44
%f46 = fadd <8 x float> %f42, %f43
%f47 = fdiv <8 x float> %f45, %f46
%f48 = fadd <8 x float> %f40, %f47
%f49 = fmul <8 x float> %f11, %f48
%f50 = fmul <8 x float> %f1, %f49
%f51 = select <8 x i1> %p8, <8 x float> %f50, <8 x float> zeroinitializer
%out_0_0 = getelementptr inbounds i8, i8* %params, i64 416
%out_0_1 = bitcast i8* %out_0_0 to <8 x float>*
%out_0_2 = load <8 x float>, <8 x float>* %out_0_1, align 32
%out_0_3 = select <8 x i1> %mask, <8 x float> %f13, <8 x float> %out_0_2
store <8 x float> %out_0_3, <8 x float>* %out_0_1, align 32
%out_1_0 = getelementptr inbounds i8, i8* %params, i64 448
%out_1_1 = bitcast i8* %out_1_0 to <8 x float>*
%out_1_2 = load <8 x float>, <8 x float>* %out_1_1, align 32
%out_1_3 = select <8 x i1> %mask, <8 x float> %f14, <8 x float> %out_1_2
store <8 x float> %out_1_3, <8 x float>* %out_1_1, align 32
%out_2_0 = getelementptr inbounds i8, i8* %params, i64 480
%out_2_1 = bitcast i8* %out_2_0 to <8 x float>*
%out_2_2 = load <8 x float>, <8 x float>* %out_2_1, align 32
%out_2_3 = select <8 x i1> %mask, <8 x float> %f4, <8 x float> %out_2_2
store <8 x float> %out_2_3, <8 x float>* %out_2_1, align 32
%out_3_0 = getelementptr inbounds i8, i8* %params, i64 512
%out_3_1 = bitcast i8* %out_3_0 to <8 x float>*
%out_3_2 = load <8 x float>, <8 x float>* %out_3_1, align 32
%out_3_3 = select <8 x i1> %mask, <8 x float> %f10, <8 x float> %out_3_2
store <8 x float> %out_3_3, <8 x float>* %out_3_1, align 32
%out_4_0 = getelementptr inbounds i8, i8* %params, i64 544
%out_4_1 = bitcast i8* %out_4_0 to <8 x float>*
%out_4_2 = load <8 x float>, <8 x float>* %out_4_1, align 32
%out_4_3 = select <8 x i1> %mask, <8 x float> %f10, <8 x float> %out_4_2
store <8 x float> %out_4_3, <8 x float>* %out_4_1, align 32
%out_5_0 = getelementptr inbounds i8, i8* %params, i64 576
%out_5_1 = bitcast i8* %out_5_0 to <8 x i32>*
%out_5_2 = load <8 x i32>, <8 x i32>* %out_5_1, align 32
%out_5_3 = select <8 x i1> %mask, <8 x i32> %r12, <8 x i32> %out_5_2
store <8 x i32> %out_5_3, <8 x i32>* %out_5_1, align 32
%out_7_0 = getelementptr inbounds i8, i8* %params, i64 608
%out_7_1 = bitcast i8* %out_7_0 to <8 x float>*
%out_7_2 = load <8 x float>, <8 x float>* %out_7_1, align 32
%out_7_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_7_2
store <8 x float> %out_7_3, <8 x float>* %out_7_1, align 32
%out_8_0 = getelementptr inbounds i8, i8* %params, i64 640
%out_8_1 = bitcast i8* %out_8_0 to <8 x float>*
%out_8_2 = load <8 x float>, <8 x float>* %out_8_1, align 32
%out_8_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_8_2
store <8 x float> %out_8_3, <8 x float>* %out_8_1, align 32
%out_9_0 = getelementptr inbounds i8, i8* %params, i64 672
%out_9_1 = bitcast i8* %out_9_0 to <8 x float>*
%out_9_2 = load <8 x float>, <8 x float>* %out_9_1, align 32
%out_9_3 = select <8 x i1> %mask, <8 x float> %f51, <8 x float> %out_9_2
store <8 x float> %out_9_3, <8 x float>* %out_9_1, align 32
ret void;
}
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
declare <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
@callables = internal local_unnamed_addr global i8** null, align 8
define void @set_callables(i8** %ptr) local_unnamed_addr #0 {
store i8** %ptr, i8*** @callables
ret void
}
declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
declare <8 x float> @llvm.floor.v8f32(<8 x float>)
define internal void @ek.scatter_add.v8f32(<8 x float*> %ptrs, <8 x float> %value, <8 x i1> %active_in) #0 {
L0:
br label %L1
L1:
%index = phi i32 [ 0, %L0 ], [ %index_next, %L3 ]
%active = phi <8 x i1> [ %active_in, %L0 ], [ %active_next_2, %L3 ]
%active_i = extractelement <8 x i1> %active, i32 %index
br i1 %active_i, label %L2, label %L3
L2:
%ptr_0 = extractelement <8 x float *> %ptrs, i32 %index
%ptr_1 = insertelement <8 x float *> undef, float* %ptr_0, i32 0
%ptr_2 = shufflevector <8 x float *> %ptr_1, <8 x float *> undef, <8 x i32> zeroinitializer
%ptr_eq = icmp eq <8 x float *> %ptrs, %ptr_2
%active_cur = and <8 x i1> %ptr_eq, %active
%value_cur = select <8 x i1> %active_cur, <8 x float> %value, <8 x float> zeroinitializer
%sum = call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %value_cur)
atomicrmw fadd float* %ptr_0, float %sum monotonic
%active_next = xor <8 x i1> %active, %active_cur
%active_red = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %active_next)
br i1 %active_red, label %L3, label %L4
L3:
%active_next_2 = phi <8 x i1> [ %active, %L1 ], [ %active_next, %L2 ]
%index_next = add nuw nsw i32 %index, 1
%cond_2 = icmp eq i32 %index_next, 8
br i1 %cond_2, label %L4, label %L1
L4:
ret void
}
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
!0 = !{!0}
!1 = !{!1, !0}
!2 = !{!1}
!3 = !{!"llvm.loop.unroll.disable", !"llvm.loop.vectorize.enable", i1 0}
attributes #0 = { norecurse nounwind "frame-pointer"="none" "no-builtins" "no-stack-arg-probe" "target-cpu"="skylake" "target-features"="-vzeroupper,-avx512pf,-tsxldtrk,+cx16,+sahf,-tbm,-avx512ifma,-sha,+crc32,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-avx512bf16,-amx-tile,-uintr,-gfni,+popcnt,-ptwrite,+aes,-avx512bitalg,-movdiri,-widekl,+xsaves,-avx512er,-avxvnni,-avx512fp16,-avx512vnni,-amx-bf16,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,+xsavec,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-kl,-movdir64b,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,-serialize,-hreset,+invpcid,-avx512cd,+avx,-vaes,-amx-int8,+cx8,+fma,-rtm,+bmi,-enqcmd,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+fxsr,-wbnoinvd,+sse,+lzcnt,+pclmul,-rdpru,+f16c,+ssse3,+sgx,-prefetchwt1,+cmov,-avx512vbmi,-shstk,+movbe,-avx512vp2intersect,+xsaveopt,-avx512dq,+sse2,+adx,+sse3" }
drjit_32c09595272bc74a586d2fe8f7e4726e:469:31: error: use of undefined value '%f253_final'
%f253 = phi <8 x float> [ %f253_final, %l_236_tail ], [ %f220, %l_236_start ]
^
Aborted (core dumped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment