Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Created March 15, 2024 04:22
Show Gist options
  • Save raphlinus/5aca9de53f9d6b24933cb24d8a60df63 to your computer and use it in GitHub Desktop.
Save raphlinus/5aca9de53f9d6b24933cb24d8a60df63 to your computer and use it in GitHub Desktop.
apparent miscompilation of flatten.wgsl
1 s_version 0x4004 4 0.01 2
2 s_inst_prefetch 0x3 4 0.01 1
3 s_getpc_b64 s[0:1] 4 0.03 5
4 s_mov_b32 s0, s2 4 0.05 9
5 s_load_dwordx4 s[4:7], s[0:1], null 4 0.01 1
6 s_load_dwordx4 s[12:15], s[0:1], 0x20 4 0.01 1
7 s_load_dwordx4 s[16:19], s[0:1], 0x40 4 0.01 1
8 v_lshl_add_u32 v3, s8, 8, v0 4 0.03 5
9 v_lshrrev_b32_e32 v0, 2, v3 4 0.01 1
10 s_waitcnt lgkmcnt(0) 4 2.30 406
11 s_buffer_load_dwordx2 s[2:3], s[4:7], 0x24 4 0.01 1
12 s_buffer_load_dwordx2 s[4:5], s[4:7], 0x34 4 0.01 1
13 s_waitcnt lgkmcnt(0) 4 2.61 463
14 v_add_lshl_u32 v1, v0, s2, 2 4 0.01 1
15 v_mul_lo_u32 v0, v0, 20 4 0.11 19
16 tbuffer_load_format_x v2, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.01 1
17 s_clause 0x1 4 0.01 1
18 tbuffer_load_format_xyz v[4:6], v0, s[16:19], 0 format:[BUF_FMT_32_32_32_FLOAT] offen offset:8 4 0.01 1
19 tbuffer_load_format_x v1, v0, s[16:19], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.08 14
20 v_lshlrev_b32_e32 v0, 3, v3 4 0.03 5
21 v_and_b32_e32 v7, 24, v0 4 0.01 1
22 s_waitcnt vmcnt(2) 4 2.78 491
23 v_bfe_u32 v8, v2, 0, v7 4 0.01 1
24 v_lshrrev_b32_e32 v2, v7, v2 4 0.02 4
25 v_and_b32_e32 v9, 0x404040, v8 4 0.01 1
26 v_and_b32_e32 v7, 0x101010, v8 4 0.01 1
27 v_and_b32_e32 v11, 16, v2 4 0.01 2
28 v_bcnt_u32_b32 v10, v9, 0 4 0.01 1
29 s_waitcnt vmcnt(1) 4 0.02 3
30 v_bcnt_u32_b32 v7, v7, v6 4 0.18 32
31 v_cmp_ne_i32_e32 vcc_lo, 0, v11 4 0.01 1
32 v_lshl_add_u32 v5, v10, 1, v5 4 0.01 2
33 v_and_b32_e32 v10, 0x202020, v8 4 0.02 3
34 v_add_lshl_u32 v5, v5, s5, 2 4 0.01 1
35 s_waitcnt vmcnt(0) 4 0.01 2
36 v_bcnt_u32_b32 v1, v10, v1 4 0.03 5
37 v_add_nc_u32_e32 v9, -8, v5 4 0.09 15
38 tbuffer_load_format_x v6, v9, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.01 1
39 s_waitcnt_depctr 0xffe3 4 0.06 11
40 s_and_saveexec_b32 s5, vcc_lo 4 0.05 9
41 s_cbranch_execz _L0 4 0.01 1
42 BBF0_0:
43 s_load_dwordx4 s[8:11], s[0:1], 0x60 0 0.00
44 v_mul_lo_u32 v10, v7, 24 0 0.00
45 s_waitcnt vmcnt(0) 0 0.00
46 v_and_b32_e32 v9, 2.0, v6 0 0.00
47 v_add_nc_u32_e32 v12, -1, v1 0 0.00
48 v_cmp_ne_i32_e32 vcc_lo, 0, v9 0 0.00
49 v_cndmask_b32_e64 v11, 0, 1, vcc_lo 0 0.00
50 s_waitcnt lgkmcnt(0) 0 0.00
51 buffer_store_dwordx2 v[11:12], v10, s[8:11], 0 offen offset:16 glc 0 0.00
52 _L0:
53 s_waitcnt_depctr 0xffe3 4 0.23 40
54 s_mov_b32 exec_lo, s5 4 0.05 9
55 v_and_b32_e32 v9, 3, v2 4 0.03 5
56 v_cmp_ne_i32_e32 vcc_lo, 0, v9 4 0.25 45
57 s_and_b32 exec_lo, s5, vcc_lo 4 0.05 9
58 s_cbranch_execz _L1 4 0.18 32
59 BBF0_1:
60 v_lshrrev_b32_e32 v10, 2, v8 4 0.01 1
61 v_lshrrev_b32_e32 v11, 3, v8 4 0.01 1
62 v_and_b32_e32 v8, 0x30303, v8 4 0.01 1
63 v_mul_lo_u32 v1, v1, 6 4 0.02 4
64 v_and_b32_e32 v10, 0x10101, v10 4 0.01 1
65 v_and_b32_e32 v11, 0x10101, v11 4 0.02 3
66 v_add_nc_u32_e32 v1, -6, v1 4 0.01 1
67 v_add_nc_u32_e32 v8, v8, v10 4 0.01 1
68 v_mul_u32_u24_e32 v10, 15, v11 4 0.02 3
69 v_add_lshl_u32 v1, s4, v1, 2 4 0.01 1
70 s_waitcnt_depctr 0xffe3 4 1.43 253
71 s_clause 0x1 4 0.01 1
72 tbuffer_load_format_xyzw v[11:14], v1, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 4 0.01 1
73 tbuffer_load_format_xy v[18:19], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 4 0.07 11
74 v_and_b32_e32 v10, v8, v10 4 0.03 5
75 v_add_nc_u32_e32 v8, v8, v10 4 0.03 5
76 v_lshrrev_b32_e32 v10, 8, v8 4 0.03 5
77 v_add_nc_u32_e32 v8, v8, v10 4 0.01 1
78 v_and_b32_e32 v10, 8, v2 4 0.02 3
79 v_add_nc_u32_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4 0.01 1
80 v_cmp_eq_i32_e32 vcc_lo, 0, v10 4 0.02 3
81 v_and_b32_e32 v1, 63, v8 4 0.12 22
82 v_add3_u32 v1, v4, v1, s3 4 0.04 7
83 s_and_saveexec_b32 s4, vcc_lo 4 0.05 9
84 s_cbranch_execz _L2 4 0.13 22
85 BBF0_2:
86 v_lshlrev_b32_e32 v1, 2, v1 0 0.00
87 tbuffer_load_format_xy v[15:16], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen 0 0.00
88 v_cmp_gt_u32_e32 vcc_lo, 2, v9 0 0.00
89 s_waitcnt vmcnt(0) 0 0.00
90 v_cvt_f32_i32_sdwa v26, sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
91 v_cvt_f32_i32_sdwa v27, sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
92 v_cvt_f32_i32_sdwa v28, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
93 v_cvt_f32_i32_sdwa v29, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
94 s_and_saveexec_b32 s6, vcc_lo 0 0.00
95 v_mov_b32_e32 v25, 0 0 0.00
96 v_mov_b32_e32 v111, 0 0 0.00
97 v_mov_b32_e32 v110, 0 0 0.00
98 v_mov_b32_e32 v109, 0 0 0.00
99 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00
100 s_cbranch_execz _L3 0 0.00
101 BBF0_3:
102 tbuffer_load_format_x v16, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00
103 v_cmp_ne_i32_e32 vcc_lo, 3, v9 0 0.00
104 s_waitcnt vmcnt(0) 0 0.00
105 v_cvt_f32_i32_sdwa v109, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
106 v_cvt_f32_i32_sdwa v110, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
107 s_and_saveexec_b32 s7, vcc_lo 0 0.00
108 v_mov_b32_e32 v25, 0 0 0.00
109 v_mov_b32_e32 v111, 0 0 0.00
110 s_andn2_b32 exec_lo, s7, exec_lo 0 0.00
111 s_cbranch_execz _L3 0 0.00
112 BBF0_4:
113 tbuffer_load_format_x v1, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:12 0 0.00
114 s_waitcnt vmcnt(0) 0 0.00
115 v_cvt_f32_i32_sdwa v111, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
116 v_cvt_f32_i32_sdwa v25, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
117 _L3:
118 s_mov_b32 exec_lo, s6 0 0.00
119 _L2:
120 s_andn2_b32 exec_lo, s4, exec_lo 4 0.05 9
121 s_cbranch_execz _L4 4 0.02 3
122 BBF0_5:
123 v_lshlrev_b32_e32 v1, 2, v1 4 0.01 1
124 s_waitcnt_depctr 0xffe3 4 1.94 343
125 tbuffer_load_format_xyzw v[26:29], v1, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 4 0.06 10
126 v_cmp_gt_u32_e32 vcc_lo, 2, v9 4 0.09 16
127 s_and_saveexec_b32 s6, vcc_lo 4 0.05 9
128 v_mov_b32_e32 v25, 0 4 0.01 1
129 v_mov_b32_e32 v111, 0 4 0.01 1
130 v_mov_b32_e32 v110, 0 4 0.01 1
131 v_mov_b32_e32 v109, 0 4 0.01 1
132 s_andn2_b32 exec_lo, s6, exec_lo 4 0.05 9
133 s_cbranch_execz _L4 4 0.03 5
134 BBF0_6:
135 tbuffer_load_format_xy v[109:110], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 4 0.06 10
136 v_cmp_ne_i32_e32 vcc_lo, 3, v9 4 0.09 16
137 s_and_saveexec_b32 s7, vcc_lo 4 0.05 9
138 v_mov_b32_e32 v25, 0 4 0.01 1
139 v_mov_b32_e32 v111, 0 4 0.01 1
140 s_andn2_b32 exec_lo, s7, exec_lo 4 0.05 9
141 s_cbranch_execz _L4 4 0.06 10
142 BBF0_7:
143 tbuffer_load_format_xy v[24:25], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:24 0 0.00
144 s_waitcnt vmcnt(0) 0 0.00
145 v_mov_b32_e32 v111, v24 0 0.00
146 _L4:
147 s_mov_b32 exec_lo, s4 4 0.05 9
148 v_and_b32_e32 v2, 4, v2 4 0.01 1
149 s_waitcnt vmcnt(3) 4 0.01 2
150 v_cmp_lt_i32_e64 s4, v6, 0 4 0.01 1
151 v_cmp_eq_i32_e64 s8, v9, 2 4 0.01 2
152 v_cmp_ne_i32_e64 s6, v2, 0 4 0.09 16
153 s_and_b32 s7, s4, s6 4 0.01 2
154 s_nand_b32 s9, s7, s8 4 0.01 1
155 s_and_b32 vcc_lo, s7, s8 4 0.01 1
156 s_waitcnt vmcnt(0) 4 2.64 467
157 v_cndmask_b32_e32 v23, v26, v28, vcc_lo 4 0.01 1
158 v_cndmask_b32_e32 v22, v27, v29, vcc_lo 4 0.01 1
159 v_cndmask_b32_e32 v16, v28, v109, vcc_lo 4 0.01 1
160 v_cndmask_b32_e32 v15, v29, v110, vcc_lo 4 0.01 1
161 v_cndmask_b32_e64 v2, 1, v9, s9 4 0.03 5
162 v_cmp_ne_i32_e32 vcc_lo, 1, v2 4 0.10 17
163 s_and_saveexec_b32 s7, vcc_lo 4 0.05 9
164 s_cbranch_execz _L5 4 0.02 3
165 BBF0_8:
166 v_cmp_eq_i32_e32 vcc_lo, 2, v2 4 0.09 16
167 s_and_saveexec_b32 s8, vcc_lo 4 0.05 9
168 s_cbranch_execz _L6 4 0.06 10
169 BBF0_9:
170 v_sub_f32_e32 v1, v109, v16 4 0.01 1
171 v_sub_f32_e32 v2, v110, v15 4 0.01 1
172 v_sub_f32_e32 v17, v23, v16 4 0.01 1
173 v_sub_f32_e32 v4, v22, v15 4 0.01 1
174 v_mov_b32_e32 v25, v110 4 0.02 3
175 v_madmk_f32 v1, v1, 0x3eaaaaab, v16 4 0.01 1
176 v_madmk_f32 v2, v2, 0x3eaaaaab, v15 4 0.01 1
177 v_mov_b32_e32 v111, v109 4 0.05 9
178 v_madmk_f32 v16, v17, 0x3eaaaaab, v16 4 0.01 1
179 v_madmk_f32 v108, v4, 0x3eaaaaab, v15 4 0.01 1
180 v_mov_b32_e32 v109, v1 4 0.01 1
181 v_mov_b32_e32 v110, v2 4 0.01 1
182 _L6:
183 s_andn2_b32 exec_lo, s8, exec_lo 4 0.05 9
184 v_mov_b32_e32 v108, v15 4 0.01 1
185 s_mov_b32 exec_lo, s8 4 0.08 14
186 v_mov_b32_e32 v4, v16 4 0.02 3
187 v_mov_b32_e32 v15, v25 4 0.01 1
188 _L5:
189 s_andn2_b32 exec_lo, s7, exec_lo 4 0.05 9
190 s_cbranch_execz _L7 4 0.03 4
191 BBF0_10:
192 v_sub_f32_e32 v1, v23, v16 4 0.01 1
193 v_sub_f32_e32 v2, v22, v15 4 0.01 1
194 v_sub_f32_e32 v4, v16, v23 4 0.01 1
195 v_sub_f32_e32 v8, v15, v22 4 0.01 1
196 v_mov_b32_e32 v111, v16 4 0.01 1
197 v_madmk_f32 v109, v1, 0x3eaaaaab, v16 4 0.01 1
198 v_madmk_f32 v110, v2, 0x3eaaaaab, v15 4 0.01 1
199 v_madmk_f32 v4, v4, 0x3eaaaaab, v23 4 0.01 1
200 v_madmk_f32 v108, v8, 0x3eaaaaab, v22 4 0.01 1
201 _L7:
202 s_andn2_b32 exec_lo, s7, s4 4 0.05 9
203 s_cbranch_execz _L8 4 0.02 3
204 BBF0_11:
205 v_mul_f32_e32 v1, v14, v108 4 0.01 1
206 v_mul_f32_e32 v6, v13, v108 4 0.01 1
207 v_mul_f32_e32 v0, v13, v22 4 0.01 1
208 v_mul_f32_e32 v5, v14, v22 4 0.01 1
209 v_mul_f32_e32 v10, v13, v110 4 0.01 1
210 v_mac_f32_e32 v1, v12, v4 4 0.01 1
211 v_mac_f32_e32 v6, v11, v4 4 0.01 1
212 v_mul_f32_e32 v4, v13, v15 4 0.01 1
213 v_mac_f32_e32 v0, v11, v23 4 0.01 1
214 v_mac_f32_e32 v5, v12, v23 4 0.01 1
215 v_add_f32_e32 v27, v19, v1 4 1.83 323
216 v_mul_f32_e32 v1, v14, v110 4 0.01 1
217 v_mac_f32_e32 v10, v11, v109 4 0.01 1
218 v_mac_f32_e32 v4, v11, v111 4 0.01 1
219 v_add_f32_e32 v0, v18, v0 4 0.01 1
220 v_add_f32_e32 v5, v19, v5 4 0.01 1
221 v_mac_f32_e32 v1, v12, v109 4 0.01 1
222 v_add_f32_e32 v23, v18, v6 4 0.01 1
223 v_add_f32_e32 v6, v18, v10 4 0.01 1
224 v_mul_f32_e32 v9, v14, v15 4 0.01 1
225 v_cmp_eq_f32_sdwa s8, v5, v27 src0_sel:DWORD src1_sel:DWORD 4 0.01 1
226 v_add_f32_e32 v8, v19, v1 4 0.01 1
227 v_add_f32_e32 v1, v18, v4 4 0.01 1
228 v_cmp_eq_f32_sdwa s4, v0, v23 src0_sel:DWORD src1_sel:DWORD 4 0.01 1
229 v_cmp_eq_f32_sdwa s9, v0, v6 src0_sel:DWORD src1_sel:DWORD 4 0.01 1
230 v_mac_f32_e32 v9, v12, v111 4 0.01 1
231 v_cmp_eq_f32_e32 vcc_lo, v5, v8 4 0.01 1
232 v_cmp_eq_f32_sdwa s10, v0, v1 src0_sel:DWORD src1_sel:DWORD 4 0.02 2
233 v_add_f32_e32 v2, v19, v9 4 0.08 14
234 s_and_b32 s4, s4, s8 4 0.01 1
235 s_and_b32 vcc_lo, s9, vcc_lo 4 0.01 1
236 s_and_b32 s4, s4, vcc_lo 4 0.06 10
237 v_cmp_eq_f32_e32 vcc_lo, v5, v2 4 0.10 17
238 s_and_b32 vcc_lo, s10, vcc_lo 4 0.01 2
239 s_and_b32 vcc_lo, s4, vcc_lo 4 0.01 2
240 s_and_saveexec_b32 s4, vcc_lo 4 0.06 10
241 v_mov_b32_e32 v106, 0xf2fc6f7c 4 0.01 1
242 v_mov_b32_e32 v107, 0xf2fc6f7c 4 0.01 1
243 v_mov_b32_e32 v42, 0x72fc6f7c 4 0.01 1
244 v_mov_b32_e32 v48, 0x72fc6f7c 4 0.01 1
245 s_andn2_b32 exec_lo, s4, exec_lo 4 0.05 9
246 s_cbranch_execz _L9 4 0.02 3
247 BBF0_12:
248 v_sub_f32_e32 v9, v23, v0 4 0.01 1
249 v_sub_f32_e32 v11, v27, v5 4 0.02 4
250 v_mul_f32_e32 v4, v9, v9 4 0.03 5
251 v_mac_f32_e32 v4, v11, v11 4 0.03 5
252 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v4 4 0.09 16
253 s_and_saveexec_b32 s8, vcc_lo 4 0.05 9
254 s_cbranch_execz _L10 4 0.06 10
255 BBF0_13:
256 v_sub_f32_e32 v4, v6, v23 0 0.00
257 v_sub_f32_e32 v10, v8, v27 0 0.00
258 v_sub_f32_e32 v13, v2, v8 0 0.00
259 v_mul_f32_e32 v14, 0x360637b4, v4 0 0.00
260 v_mul_f32_e32 v4, 0x360637b4, v10 0 0.00
261 v_sub_f32_e32 v10, v1, v6 0 0.00
262 v_madmk_f32 v12, v9, 0x3f7fffde, v14 0 0.00
263 v_madmk_f32 v4, v11, 0x3f7fffde, v4 0 0.00
264 v_madmk_f32 v26, v10, 0x2b8cbccc, v12 0 0.00
265 v_madmk_f32 v25, v13, 0x2b8cbccc, v4 0 0.00
266 _L10:
267 s_andn2_b32 exec_lo, s8, exec_lo 4 1.51 267
268 v_mov_b32_e32 v26, v9 4 0.01 1
269 v_mov_b32_e32 v25, v11 4 0.01 2
270 s_mov_b32 exec_lo, s8 4 0.06 10
271 v_sub_f32_e32 v12, v6, v23 4 0.01 1
272 v_sub_f32_e32 v14, v8, v27 4 0.01 1
273 v_sub_f32_e32 v13, v1, v6 4 0.01 1
274 v_sub_f32_e32 v17, v2, v8 4 0.01 1
275 v_add_nc_u32_e32 v64, 0x2000, v3 4 0.03 5
276 v_add_nc_u32_e32 v78, 0x1000, v3 4 0.01 1
277 s_load_dwordx4 s[8:11], s[0:1], 0x80 4 0.01 1
278 s_load_dwordx4 s[20:23], s[0:1], 0xc0 4 0.01 1
279 s_load_dwordx4 s[24:27], s[0:1], 0xa0 4 0.01 1
280 s_mov_b32 s28, exec_lo 4 0.01 1
281 s_mov_b32 s29, exec_lo 4 0.01 1
282 v_mov_b32_e32 v82, v5 4 0.01 1
283 v_mov_b32_e32 v81, v0 4 0.01 1
284 v_mov_b32_e32 v19, 0 4 0.01 1
285 v_mov_b32_e32 v106, 0xf2fc6f7c 4 0.01 1
286 v_mov_b32_e32 v107, 0xf2fc6f7c 4 0.01 1
287 v_mov_b32_e32 v21, 0x72fc6f7c 4 0.01 1
288 v_mov_b32_e32 v22, 0x72fc6f7c 4 0.01 1
289 v_mov_b32_e32 v29, v5 4 0.01 1
290 v_mov_b32_e32 v24, v0 4 0.31 55
291 v_mov_b32_e32 v80, 1.0 4 0.01 1
292 v_mov_b32_e32 v72, 0 4 0.03 5
293 _L30:
294 v_cvt_f32_u32_e32 v34, v72 8 0.06 5
295 v_mul_f32_e32 v34, v80, v34 8 0.06 5
296 v_readfirstlane_b32 s30, v34 8 0.06 5
297 v_cmp_eq_f32_e64 vcc_lo, s30, 1.0 8 0.19 16
298 s_andn1_saveexec_b32 s31, vcc_lo 8 0.02 2
299 s_andn2_b32 exec_lo, s31, exec_lo 8 0.03 2
300 s_andn2_b32 s29, s29, exec_lo 8 0.12 10
301 s_cbranch_scc0 _L11 8 0.14 12
302 BBF0_14:
303 s_mov_b32 exec_lo, s31 4 0.01 1
304 s_and_b32 exec_lo, exec_lo, s29 4 0.01 1
305 s_mov_b32 s31, exec_lo 4 0.01 1
306 s_mov_b32 s32, exec_lo 4 0.07 11
307 v_mul_f32_e32 v34, v26, v26 4 0.04 6
308 v_mac_f32_e32 v34, v25, v25 4 0.01 2
309 _L22:
310 v_add_f32_e32 v35, s30, v80 5 0.04 5
311 v_sub_f32_e32 v36, 1.0, v35 5 0.02 2
312 v_mul_f32_e32 v40, v35, v35 5 0.02 3
313 v_mul_f32_e32 v37, v35, v36 5 0.01 1
314 v_mul_f32_e32 v38, v36, v36 5 0.03 4
315 v_mul_f32_e32 v39, 0x40400000, v37 5 0.01 1
316 v_mul_f32_e64 v43, v37, v12 mul:2 5 0.01 1
317 v_mul_f32_e32 v42, 0x40400000, v38 5 0.01 1
318 v_mul_f32_e64 v48, v37, v14 mul:2 5 0.01 2
319 v_mul_f32_e32 v41, v6, v39 5 0.01 1
320 v_mac_f32_e32 v43, v9, v38 5 1.30 183
321 v_mul_f32_e32 v44, v8, v39 5 0.01 2
322 v_mac_f32_e32 v48, v11, v38 5 0.01 1
323 v_mul_f32_e32 v38, v36, v38 5 0.01 2
324 v_mac_f32_e32 v41, v23, v42 5 0.01 1
325 v_mad_f32 v75, v13, v40, v43 5 0.01 2
326 v_mac_f32_e32 v44, v27, v42 5 0.01 1
327 v_mad_f32 v74, v17, v40, v48 5 0.01 1
328 v_mac_f32_e32 v41, v1, v40 5 0.01 1
329 v_mul_f32_e32 v37, v75, v75 5 0.01 1
330 v_mac_f32_e32 v44, v2, v40 5 0.02 2
331 v_mul_f32_e32 v39, v35, v41 5 0.01 1
332 v_mac_f32_e32 v37, v74, v74 5 0.01 1
333 v_mul_f32_e32 v36, v35, v44 5 0.02 2
334 v_mad_f32 v76, v0, v38, v39 5 0.01 1
335 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v37 5 0.01 1
336 v_mad_f32 v73, v5, v38, v36 5 0.10 14
337 s_and_saveexec_b32 s33, vcc_lo 5 0.06 9
338 s_cbranch_execz _L12 5 0.53 75
339 BBF0_15:
340 v_add_f32_e32 v40, 0xb58637bd, v35 0 0.00
341 v_cmp_gt_f32_e32 vcc_lo, 1.0, v35 0 0.00
342 v_sub_f32_e32 v37, 1.0, v40 0 0.00
343 v_mul_f32_e32 v47, v40, v40 0 0.00
344 v_mul_f32_e32 v41, v40, v37 0 0.00
345 v_mul_f32_e32 v38, v37, v37 0 0.00
346 v_mul_f32_e64 v43, v41, v12 mul:2 0 0.00
347 v_mul_f32_e64 v44, v41, v14 mul:2 0 0.00
348 v_mac_f32_e32 v43, v9, v38 0 0.00
349 v_mac_f32_e32 v44, v11, v38 0 0.00
350 v_mad_f32 v75, v13, v47, v43 0 0.00
351 v_mad_f32 v74, v17, v47, v44 0 0.00
352 s_and_saveexec_b32 s34, vcc_lo 0 0.00
353 s_cbranch_execz _L13 0 0.00
354 BBF0_16:
355 v_mul_f32_e32 v35, 0x40400000, v41 0 0.00
356 v_mul_f32_e32 v36, 0x40400000, v38 0 0.00
357 v_mul_f32_e32 v41, v6, v35 0 0.00
358 v_mul_f32_e32 v42, v8, v35 0 0.00
359 v_mac_f32_e32 v41, v23, v36 0 0.00
360 v_mac_f32_e32 v42, v27, v36 0 0.00
361 v_mac_f32_e32 v41, v1, v47 0 0.00
362 v_mac_f32_e32 v42, v2, v47 0 0.00
363 v_mul_f32_e32 v38, v37, v38 0 0.00
364 v_mul_f32_e32 v35, v40, v41 0 0.00
365 v_mul_f32_e32 v36, v40, v42 0 0.00
366 v_mad_f32 v76, v0, v38, v35 0 0.00
367 v_mad_f32 v73, v5, v38, v36 0 0.00
368 _L13:
369 s_andn2_b32 exec_lo, s34, exec_lo 0 0.00
370 v_mov_b32_e32 v40, v35 0 0.00
371 s_mov_b32 exec_lo, s34 0 0.00
372 _L12:
373 s_andn2_b32 exec_lo, s33, exec_lo 5 0.06 9
374 v_mov_b32_e32 v40, v35 5 1.45 205
375 s_mov_b32 exec_lo, s33 5 0.01 2
376 s_ff1_i32_b32 s34, exec_lo 5 0.01 1
377 s_mov_b32 s33, exec_lo 5 0.01 1
378 s_lshl_b32 s35, 1, s34 5 0.02 2
379 s_and_b32 s35, s35, exec_lo 5 0.02 2
380 s_and_saveexec_b32 s35, s35 5 0.07 9
381 s_cbranch_execz _L14 5 0.02 3
382 BBF0_17:
383 s_bcnt1_i32_b32 s36, s33 5 0.01 2
384 v_mov_b32_e32 v39, s36 5 0.01 1
385 s_waitcnt lgkmcnt(0) 5 0.01 2
386 s_waitcnt_depctr 0xffe3 5 0.10 14
387 buffer_atomic_add v39, off, s[8:11], 0 offset:32 glc 5 0.01 1
388 _L14:
389 s_waitcnt_depctr 0xffe3 5 0.08 11
390 s_mov_b32 exec_lo, s35 5 0.01 1
391 s_waitcnt vmcnt(0) 5 2.84 402
392 v_readlane_b32 s34, v39, s34 5 0.01 1
393 v_mbcnt_lo_u32_b32 v39, s33, 0 5 0.01 1
394 v_mov_b32_e32 v46, v3 5 0.02 2
395 v_mov_b32_e32 v47, s30 5 0.01 1
396 v_mov_b32_e32 v48, v40 5 0.01 1
397 v_sub_f32_e32 v41, v73, v29 5 0.01 1
398 v_sub_f32_e32 v43, v76, v24 5 0.01 1
399 v_sub_f32_e32 v44, v40, v19 5 0.01 1
400 v_mul_f32_e32 v45, v75, v75 5 0.02 3
401 v_mul_f32_e32 v42, v41, v41 5 0.02 3
402 v_mac_f32_e32 v45, v74, v74 5 0.01 2
403 v_mad_f32 v18, v43, v43, v42 5 0.01 1
404 v_add_nc_i32 v39, s34, v39 5 0.04 6
405 v_mul_lo_u32 v39, v39, 12 5 0.03 4
406 s_waitcnt lgkmcnt(0) 5 0.01 2
407 s_waitcnt_depctr 0xffe3 5 0.12 17
408 buffer_store_dwordx3 v[46:48], v39, s[20:23], 0 offen glc 5 0.09 13
409 v_mul_f32_e32 v48, v44, v44 5 0.01 1
410 v_sqrt_f32_e32 v46, v18 5 0.03 4
411 v_mul_f32_e32 v39, v34, v48 5 0.01 1
412 v_mul_f32_e32 v47, v48, v45 5 0.03 4
413 v_cmp_lt_f32_e64 s33, v39, 0x2b8cbccc 5 0.01 1
414 v_cmp_lt_f32_e64 s34, v47, 0x2b8cbccc 5 0.01 1
415 v_cmp_ge_f32_e64 s35, v46, 0x358637bd 5 0.12 16
416 s_and_b32 vcc_lo, s33, s34 5 0.01 2
417 s_or_b32 vcc_lo, s35, vcc_lo 5 0.01 2
418 s_and_saveexec_b32 s34, vcc_lo 5 0.06 9
419 s_cbranch_execz _L15 5 0.02 3
420 BBF0_18:
421 v_cmp_lt_f32_e64 s33, v18, 0x358637bd 5 0.11 16
422 s_andn1_saveexec_b32 s35, s33 5 0.06 9
423 s_cbranch_execz _L16 5 0.02 3
424 BBF0_19:
425 v_mul_f32_e32 v28, v25, v41 5 0.01 1
426 v_mul_f32_e32 v30, v26, v41 5 0.01 1
427 v_max_f32_e32 v39, 0x358637bd, v18 5 0.02 2
428 v_mac_f32_e32 v28, v26, v43 5 0.01 1
429 v_mad_f32 v30, v25, v43, -v30 5 0.01 1
430 v_rcp_f32_e32 v39, v39 5 0.03 3
431 v_mul_f32_e32 v31, v28, v28 5 0.04 5
432 v_mac_f32_e32 v31, v30, v30 5 0.02 2
433 v_mul_f32_e32 v42, v44, v39 5 0.02 2
434 v_sqrt_f32_e32 v31, v31 5 0.07 10
435 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v31 5 0.12 16
436 s_and_saveexec_b32 s36, vcc_lo 5 0.06 9
437 v_mov_b32_e32 v71, 0x3eaaaaab 5 0.01 1
438 v_mov_b32_e32 v35, 0 5 0.01 1
439 s_andn2_b32 exec_lo, s36, exec_lo 5 0.06 9
440 s_cbranch_execz _L17 5 0.02 3
441 BBF0_20:
442 v_max_f32_e64 v39, |v28|, |v30| 5 0.01 1
443 v_min_f32_e64 v44, |v28|, |v30| 5 0.04 5
444 s_mov_b32 s37, 0x3caaae5f 5 0.01 1
445 v_min_f32_e32 v48, v28, v30 5 0.04 6
446 v_cmp_gt_f32_e64 vcc_lo, |v30|, |v28| 5 0.01 1
447 v_rcp_f32_e32 v39, v39 5 0.02 3
448 v_mul_f32_e32 v71, v42, v31 5 0.06 8
449 v_mul_f32_e32 v45, v44, v39 5 0.04 5
450 v_mul_f32_e32 v39, v45, v45 5 0.04 5
451 v_madak_f32 v49, s37, v39, 0xbdae5a36 5 1.25 177
452 v_cmp_gt_f32_e64 s37, -v48, v48 5 0.18 25
453 v_madak_f32 v49, v39, v49, 0x3e3876e2 5 0.04 5
454 v_madak_f32 v49, v39, v49, 0xbea91d04 5 0.21 29
455 v_madak_f32 v44, v39, v49, 0x3f7ff738 5 0.21 30
456 v_mul_f32_e32 v39, v45, v44 5 0.21 29
457 v_madak_f32 v39, -2.0, v39, 0x3fc90fdb 5 0.04 5
458 v_cndmask_b32_e32 v50, 0, v39, vcc_lo 5 0.01 1
459 v_max_f32_e32 v39, v28, v30 5 0.01 1
460 v_cmp_gt_f32_e64 vcc_lo, -v28, v28 5 0.01 1
461 v_cndmask_b32_e64 v28, 0, 0xc0490fdb, vcc_lo 5 0.01 1
462 v_mac_f32_e32 v50, v45, v44 5 0.01 2
463 v_cmp_ge_f32_e64 vcc_lo, v39, -v39 5 0.02 3
464 v_add_f32_e32 v28, v50, v28 5 0.10 14
465 s_and_b32 vcc_lo, s37, vcc_lo 5 0.01 1
466 v_cndmask_b32_e64 v30, 0, 0x80000000, vcc_lo 5 0.04 5
467 v_xor_b32_e32 v35, v28, v30 5 0.02 3
468 _L17:
469 s_mov_b32 exec_lo, s36 5 0.06 9
470 v_mul_f32_e32 v44, v74, v41 5 0.01 1
471 v_mul_f32_e32 v48, v74, v43 5 0.03 4
472 v_mac_f32_e32 v44, v75, v43 5 0.24 34
473 v_mad_f32 v48, v75, v41, -v48 5 0.01 1
474 v_mul_f32_e32 v31, v44, v44 5 0.04 5
475 v_mac_f32_e32 v31, v48, v48 5 0.04 6
476 v_sqrt_f32_e32 v18, v31 5 0.07 10
477 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v18 5 0.12 16
478 s_and_b32 exec_lo, s36, vcc_lo 5 0.06 9
479 v_mov_b32_e32 v70, 0x3eaaaaab 5 0.01 1
480 v_mov_b32_e32 v77, 0 5 0.01 1
481 s_andn2_b32 exec_lo, s36, exec_lo 5 0.06 9
482 s_cbranch_execz _L18 5 0.03 4
483 BBF0_21:
484 v_max_f32_e64 v39, |v44|, |v48| 5 0.04 5
485 s_mov_b32 s37, 0x3caaae5f 5 0.05 7
486 v_cmp_gt_f32_e64 vcc_lo, |v48|, |v44| 5 0.01 1
487 v_mul_f32_e32 v70, v42, v18 5 0.01 1
488 v_rcp_f32_e32 v45, v39 5 0.02 3
489 v_min_f32_e64 v39, |v44|, |v48| 5 0.06 8
490 v_mul_f32_e32 v45, v39, v45 5 0.04 5
491 v_mul_f32_e32 v39, v45, v45 5 0.04 5
492 v_madak_f32 v49, s37, v39, 0xbdae5a36 5 0.04 5
493 v_madak_f32 v49, v39, v49, 0x3e3876e2 5 0.04 5
494 v_madak_f32 v49, v39, v49, 0xbea91d04 5 0.04 5
495 v_madak_f32 v39, v39, v49, 0x3f7ff738 5 1.10 155
496 v_max_f32_e32 v49, v44, v48 5 0.01 1
497 v_mul_f32_e32 v47, v45, v39 5 0.06 9
498 v_madak_f32 v47, -2.0, v47, 0x3fc90fdb 5 0.04 6
499 v_cndmask_b32_e32 v52, 0, v47, vcc_lo 5 0.01 1
500 v_min_f32_e32 v47, v44, v48 5 0.01 1
501 v_cmp_gt_f32_e64 vcc_lo, -v44, v44 5 0.01 1
502 v_cndmask_b32_e64 v50, 0, 0xc0490fdb, vcc_lo 5 0.01 1
503 v_cmp_ge_f32_e64 vcc_lo, v49, -v49 5 0.01 1
504 v_mac_f32_e32 v52, v45, v39 5 0.01 1
505 v_cmp_gt_f32_e64 s37, -v47, v47 5 0.03 4
506 v_add_f32_e32 v39, v52, v50 5 0.11 15
507 s_and_b32 vcc_lo, s37, vcc_lo 5 0.01 1
508 v_cndmask_b32_e64 v45, 0, 0x80000000, vcc_lo 5 0.04 5
509 v_xor_b32_e32 v77, v39, v45 5 0.03 4
510 _L18:
511 s_mov_b32 exec_lo, s36 5 0.01 1
512 _L16:
513 s_andn2_b32 exec_lo, s35, exec_lo 5 0.07 10
514 v_cndmask_b32_e64 v35, v45, 0, s33 5 0.01 1
515 v_cndmask_b32_e64 v77, v77, 0, s33 5 0.01 1
516 v_cndmask_b32_e64 v71, v71, 0x3eaaaaab, s33 5 0.61 86
517 v_cndmask_b32_e64 v70, v70, 0x3eaaaaab, s33 5 0.01 1
518 s_mov_b32 exec_lo, s35 5 0.07 9
519 v_mul_f32_e32 v42, 0.15915494, v35 5 0.01 1
520 v_mul_f32_e32 v44, 0.15915494, v77 5 0.05 7
521 v_cos_f32_e32 v47, v42 5 0.03 4
522 v_cos_f32_e32 v42, v44 5 0.07 10
523 v_mul_f32_e32 v44, v47, v42 5 0.04 5
524 v_cmp_lt_f32_e64 s33, v44, 0 5 0.11 16
525 s_andn2_b32 exec_lo, s35, s33 5 0.06 9
526 s_cbranch_execz _L19 5 0.06 8
527 BBF0_22:
528 v_add_f32_e32 v39, 1.0, v47 5 0.01 1
529 v_add_f32_e32 v44, 1.0, v42 5 0.01 1
530 v_mul_f32_e32 v48, 0.15915494, v35 5 0.01 1
531 v_mul_f32_e32 v49, 0.15915494, v77 5 0.01 1
532 v_mul_f32_e32 v51, v70, v71 5 0.01 1
533 v_max_f32_e32 v39, 0x3089705f, v39 5 0.02 2
534 v_max_f32_e32 v44, 0x3089705f, v44 5 0.01 1
535 v_sin_f32_e32 v48, v48 5 0.02 3
536 v_add_f32_e32 v53, v77, v35 5 0.02 2
537 v_rcp_f32_e32 v39, v39 5 0.03 4
538 v_sin_f32_e32 v49, v49 5 0.03 3
539 v_mul_f32_e64 v30, v48, v71 mul:2 5 0.01 1
540 v_mul_f32_e32 v42, v42, v48 5 0.02 3
541 v_mul_f32_e32 v50, 0x3f2aaaab, v39 5 0.01 1
542 v_madmk_f32 v52, v39, 0xbf2aaaab, v71 5 1.03 146
543 v_rcp_f32_e32 v39, v44 5 0.05 7
544 v_mul_f32_e32 v71, v53, v53 5 0.01 1
545 v_mac_f32_e32 v42, v47, v49 5 0.01 1
546 v_mul_f32_e32 v55, v50, v48 5 0.01 1
547 v_mac_f32_e32 v30, v49, v70 5 0.01 1
548 v_mul_f32_e32 v54, v52, v52 5 0.02 3
549 v_mac_f32_e32 v55, v48, v50 5 0.01 1
550 v_mac_f32_e32 v30, v49, v70 5 0.01 1
551 v_mul_f32_e32 v44, 0x3f2aaaab, v39 5 0.01 1
552 v_madmk_f32 v57, v39, 0xbf2aaaab, v70 5 0.01 1
553 v_mul_f32_e64 v70, |v53|, v71 5 0.01 2
554 v_mad_f32 v30, -v51, v42, v30 5 0.01 2
555 v_mac_f32_e32 v55, v49, v44 5 0.01 1
556 v_mul_f32_e32 v28, v50, v44 5 0.01 1
557 v_mac_f32_e32 v54, v57, v57 5 0.02 2
558 v_mac_f32_e32 v55, v49, v44 5 0.02 2
559 v_sqrt_f32_e32 v49, v54 5 0.03 3
560 v_mad_f32 v55, -v28, v42, v55 5 0.01 1
561 v_sub_f32_e32 v28, v35, v77 5 0.03 4
562 v_mul_f32_e32 v39, 0x3e19999a, v55 5 0.01 1
563 v_mul_f32_e64 v50, |v28|, 0x3bf5c28f 5 0.01 1
564 v_mul_f32_e64 v47, |v28|, 0x3d8f5c29 5 0.90 128
565 v_mad_f32 v39, v30, 0x3e19999a, -v39 5 0.01 1
566 v_madmk_f32 v51, v70, 0x369b3073, v50 5 0.01 1
567 v_mad_f32 v47, |v53|, 0x3ba3d70a, v47 5 0.02 2
568 v_mul_f32_e64 v28, |v39|, 0x3fc66666 5 0.04 5
569 v_mac_f32_e32 v28, v51, v71 5 0.04 5
570 v_mad_f32 v39, v47, v49, v28 5 0.01 1
571 _L19:
572 s_andn2_b32 exec_lo, s35, exec_lo 5 0.06 9
573 v_cndmask_b32_e64 v39, v18, 2.0, s33 5 0.01 1
574 s_mov_b32 exec_lo, s35 5 0.06 9
575 v_mul_f32_e32 v39, v46, v39 5 0.04 5
576 v_cmp_le_f32_e64 s33, v39, 0x3e800000 5 0.01 1
577 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v80 5 0.12 17
578 s_or_b32 vcc_lo, s33, vcc_lo 5 0.01 2
579 s_andn1_saveexec_b32 s35, vcc_lo 5 0.01 2
580 s_andn2_b32 exec_lo, s35, exec_lo 5 0.01 1
581 s_andn2_b32 s32, s32, exec_lo 5 0.07 10
582 s_cbranch_scc0 _L20 5 0.90 128
583 BBF0_23:
584 s_and_b32 exec_lo, s35, s32 1 0.00 1
585 _L15:
586 s_andn2_b32 exec_lo, s34, exec_lo 1 0.00 1
587 s_and_b32 exec_lo, s34, s32 1 0.28 197
588 s_ff1_i32_b32 s34, exec_lo 1 0.00 1
589 s_mov_b32 s33, exec_lo 1 0.00 1
590 s_lshl_b32 s35, 1, s34 1 0.00 1
591 v_lshlrev_b32_e32 v72, 1, v72 1 0.00 1
592 s_and_b32 s35, s35, exec_lo 1 0.00 1
593 v_ldexp_f32 v80, v80, -1 1 0.01 5
594 s_and_saveexec_b32 s35, s35 1 0.01 9
595 s_cbranch_execz _L21 1 0.00 3
596 BBF0_24:
597 s_bcnt1_i32_b32 s36, s33 1 0.00 2
598 v_mov_b32_e32 v32, s36 1 0.00 1
599 s_waitcnt_depctr 0xffe3 1 0.02 16
600 buffer_atomic_add v32, off, s[8:11], 0 offset:32 glc 1 0.00 1
601 _L21:
602 s_waitcnt_depctr 0xffe3 1 0.02 11
603 s_mov_b32 exec_lo, s35 1 0.01 9
604 v_mbcnt_lo_u32_b32 v35, s33, 0 1 0.00 1
605 s_waitcnt vmcnt(0) 1 0.28 197
606 v_readlane_b32 s33, v32, s34 1 0.00 1
607 v_cvt_f32_u32_e32 v65, v72 1 0.00 1
608 v_mov_b32_e32 v66, v80 1 0.00 3
609 v_add_nc_i32 v32, s33, v35 1 0.01 5
610 v_mul_lo_u32 v35, v32, 12 1 0.01 4
611 s_waitcnt_depctr 0xffe3 1 0.03 19
612 buffer_store_dwordx3 v[64:66], v35, s[20:23], 0 offen glc 1 0.00 1
613 s_branch _L22 1 0.03 21
614 _L20:
615 s_mov_b32 exec_lo, s31 4 0.71 126
616 v_add_nc_u32_e32 v19, 1, v72 4 0.01 1
617 s_mov_b32 s30, exec_lo 4 0.01 1
618 s_ff1_i32_b32 s31, exec_lo 4 0.01 2
619 v_ffbl_b32_e32 v25, v19 4 0.01 1
620 s_lshl_b32 s32, 1, s31 4 0.01 2
621 s_and_b32 s32, s32, exec_lo 4 0.01 2
622 v_min_u32_e32 v25, 32, v25 4 0.03 5
623 v_lshlrev_b32_e64 v26, v25, 1 4 0.03 5
624 v_cvt_f32_u32_e32 v26, v26 4 0.01 1
625 v_lshrrev_b32_e32 v72, v25, v19 4 0.02 4
626 v_mul_f32_e32 v80, v80, v26 4 0.01 1
627 s_and_saveexec_b32 s32, s32 4 0.05 9
628 s_cbranch_execz _L23 4 0.02 3
629 BBF0_25:
630 s_bcnt1_i32_b32 s33, s30 4 0.01 2
631 v_mov_b32_e32 v25, s33 4 0.01 1
632 s_waitcnt_depctr 0xffe3 4 0.09 16
633 buffer_atomic_add v25, off, s[8:11], 0 offset:32 glc 4 0.01 1
634 _L23:
635 s_waitcnt_depctr 0xffe3 4 0.06 11
636 s_mov_b32 exec_lo, s32 4 0.07 12
637 v_sub_f32_e32 v28, v77, v35 4 0.01 1
638 v_add_f32_e32 v26, v35, v77 4 0.01 1
639 s_mov_b32 s32, 0x3b21e3b8 4 0.01 1
640 s_mov_b32 s33, 0xb84c68e7 4 0.01 1
641 v_cvt_f32_u32_e32 v79, v72 4 0.01 1
642 v_mul_f32_e32 v31, v28, v28 4 0.01 1
643 v_mul_f32_e32 v30, v26, v26 4 0.02 4
644 v_mad_f32 v33, v31, 0xbccccccd, 1.0 4 0.01 1
645 s_waitcnt vmcnt(0) 4 1.06 187
646 v_readlane_b32 s31, v25, s31 4 0.01 2
647 v_mul_f32_e32 v32, v31, v31 4 0.01 2
648 v_madak_f32 v34, s32, v31, 0xbd2aaaab 4 0.01 1
649 v_madak_f32 v44, s33, v31, 0x3a088889 4 0.01 1
650 v_mbcnt_lo_u32_b32 v25, s30, 0 4 0.01 1
651 v_madmk_f32 v33, v32, 0x39b3719e, v33 4 0.01 1
652 v_madmk_f32 v47, v32, 0xb81c6fca, v34 4 0.01 1
653 v_mul_f32_e32 v34, v31, v32 4 0.01 1
654 v_madmk_f32 v44, v30, 0xb6500cec, v44 4 0.02 4
655 v_madmk_f32 v48, v34, 0xb601da25, v33 4 0.01 1
656 v_mac_f32_e32 v47, v44, v30 4 0.01 2
657 s_mov_b32 s30, 0xbc6a0ea1 4 0.01 1
658 s_mov_b32 s32, 0x3979a934 4 0.01 1
659 s_mov_b32 s33, 0x388fa325 4 0.01 1
660 v_mac_f32_e32 v48, v47, v30 4 0.03 5
661 v_ldexp_f32 v42, v48, -2 4 0.01 1
662 v_add_nc_i32 v25, s31, v25 4 0.72 127
663 v_madak_f32 v33, s30, v31, 0x40c00000 4 0.01 2
664 v_madak_f32 v44, s33, v31, 0xba3b3ee7 4 0.01 1
665 v_madak_f32 v31, s32, v31, 0xbdcccccd 4 0.01 1
666 v_mul_lo_u32 v25, v25, 12 4 0.04 6
667 v_madmk_f32 v33, v32, 0xb8c28a7f, v33 4 0.01 1
668 v_madmk_f32 v44, v30, 0xb70526e7, v44 4 0.01 1
669 v_madmk_f32 v31, v32, 0x378e44a1, v31 4 0.01 1
670 v_rcp_f32_e32 v32, v42 4 0.02 4
671 v_ldexp_f32 v42, v46, -3 4 0.01 1
672 v_madmk_f32 v47, v34, 0x3494ab4c, v33 4 0.01 2
673 v_mac_f32_e32 v31, v44, v30 4 0.03 5
674 v_mac_f32_e32 v47, v31, v30 4 0.01 1
675 s_waitcnt_depctr 0xffe3 4 0.09 16
676 buffer_store_dwordx3 v[78:80], v25, s[20:23], 0 offen glc 4 0.09 16
677 v_mul_f32_e32 v25, v28, v47 4 0.01 1
678 v_mul_f32_e32 v28, v42, v32 4 0.02 4
679 v_ldexp_f32 v33, v25, -1 4 0.01 1
680 v_sqrt_f32_e32 v28, v28 4 0.02 4
681 v_cmp_gt_f32_e64 s30, 0x3a83126f, |v25| 4 0.82 144
682 s_andn1_saveexec_b32 s31, s30 4 0.05 9
683 s_cbranch_execz _L24 4 0.02 3
684 BBF0_26:
685 v_mad_f32 v44, v25, -0.5, v26 4 0.01 1
686 v_mov_b32_e32 v77, v25 4 0.03 5
687 v_sqrt_f32_e64 v31, |v44| 4 0.02 4
688 v_add_f32_e32 v32, v25, v44 4 0.05 9
689 v_sqrt_f32_e64 v47, |v32| 4 0.02 4
690 v_mul_f32_e32 v70, v44, v31 4 0.04 7
691 v_mad_f32 v34, v32, v47, -v70 4 0.02 3
692 v_rcp_f32_e32 v32, v25 4 0.03 5
693 v_mul_f32_e32 v42, 0x3f2aaaab, v34 4 0.05 8
694 v_mul_f32_e32 v18, v42, v32 4 0.01 1
695 _L24:
696 s_andn2_b32 exec_lo, s31, exec_lo 4 0.05 9
697 v_mov_b32_e32 v70, 0 4 0.01 1
698 v_mov_b32_e32 v77, 0 4 0.01 1
699 v_mov_b32_e32 v44, 0 4 0.01 1
700 v_mov_b32_e32 v34, 0 4 0.01 2
701 v_sqrt_f32_e64 v18, |v26| 4 0.03 5
702 s_mov_b32 exec_lo, s31 4 0.05 9
703 v_mul_f32_e32 v28, v28, v18 4 0.01 2
704 v_mov_b32_e32 v18, 0 4 0.01 1
705 s_movk_i32 s34, 0xffff 4 0.01 1
706 v_rcp_f32_e32 v49, v77 4 0.02 4
707 s_mov_b32 s32, exec_lo 4 0.01 1
708 v_ceil_f32_e32 v28, v28 4 0.01 1
709 s_mov_b32 s33, exec_lo 4 0.24 43
710 v_rcp_f32_e32 v48, v48 4 0.02 4
711 v_max_f32_e32 v28, 1.0, v28 4 0.01 2
712 v_cmp_eq_f32_e64 s31, v40, 1.0 4 0.02 2
713 v_cvt_u32_f32_e32 v71, v28 4 0.01 1
714 v_rcp_f32_e32 v28, v28 4 0.02 4
715 s_nop 0 4 0.01 1
716 s_nop 0 4 0.01 1
717 _L29:
718 v_cmp_eq_i32_e64 s34, s34, 0 13 0.03 1
719 v_add_co_ci_u32_e64 v20, vcc_lo, v18, 0, s34 13 0.09 5
720 v_cmp_gt_u32_e32 vcc_lo, v71, v20 13 0.31 17
721 s_and_saveexec_b32 s35, vcc_lo 13 0.04 2
722 s_andn2_b32 exec_lo, s35, exec_lo 13 0.02 1
723 s_andn2_b32 s33, s33, exec_lo 13 0.18 10
724 s_cbranch_scc0 _L25 13 0.18 9
725 BBF0_27:
726 s_and_b32 exec_lo, s35, s33 9 0.12 9
727 v_add_co_ci_u32_e64 v18, vcc_lo, v18, 1, s34 9 0.06 5
728 v_cmp_eq_i32_e32 vcc_lo, v71, v18 9 0.22 17
729 s_and_b32 vcc_lo, vcc_lo, s31 9 0.03 2
730 s_andn1_saveexec_b32 s34, vcc_lo 9 0.11 9
731 s_cbranch_execz _L26 9 0.12 9
732 BBF0_28:
733 v_cvt_f32_u32_e32 v18, v18 6 0.06 7
734 v_mul_f32_e32 v57, v18, v28 6 0.01 1
735 s_andn1_saveexec_b32 s35, s30 6 0.08 9
736 s_cbranch_execz _L27 6 0.04 4
737 BBF0_29:
738 v_mad_f32 v18, v34, v57, v70 6 0.05 6
739 v_log_f32_e64 v30, |v18| 6 0.67 79
740 v_cmp_gt_f32_e32 vcc_lo, 0, v18 6 0.01 1
741 v_cndmask_b32_e64 v42, 0, -1, vcc_lo 6 0.01 1
742 v_cmp_lt_f32_e32 vcc_lo, 0, v18 6 0.03 3
743 v_mul_f32_e32 v18, 0x3f2aaaab, v30 6 0.02 1
744 v_add_co_ci_u32_e64 v30, vcc_lo, v42, 0, vcc_lo 6 0.03 3
745 v_exp_f32_e32 v18, v18 6 0.03 3
746 v_cvt_f32_i32_e32 v52, v30 6 0.07 8
747 v_mul_f32_e32 v18, v52, v18 6 0.05 5
748 v_sub_f32_e32 v18, v18, v44 6 0.05 5
749 v_mul_f32_e32 v57, v18, v49 6 0.01 1
750 _L27:
751 s_mov_b32 exec_lo, s35 6 0.08 9
752 v_add_f32_e64 v55, v57, -1.0 div:2 6 0.03 3
753 v_mul_f32_e32 v18, v57, v57 6 0.01 1
754 v_add_f32_e64 v59, v57, -2.0 div:2 6 0.01 1
755 v_ldexp_f32 v54, v57, -1 6 0.02 2
756 v_mad_f32 v52, v25, v55, v26 6 0.01 1
757 v_mul_f32_e32 v18, v25, v18 6 0.03 3
758 v_mad_f32 v59, v33, v59, v26 6 0.04 5
759 v_mul_f32_e32 v55, v57, v52 6 0.01 1
760 v_ldexp_f32 v52, v18, -1 6 0.02 1
761 v_mad_f32 v54, v59, v54, -v35 6 0.02 2
762 v_mul_f32_e32 v56, v55, v55 6 0.48 57
763 v_mul_f32_e64 v61, v18, v52 div:2 6 0.01 1
764 v_mul_f32_e64 v30, v18, v55 div:2 6 0.01 1
765 v_mul_f32_e32 v54, 0.15915494, v54 6 0.02 2
766 v_mul_f32_e32 v46, v56, v56 6 0.01 1
767 v_mul_f32_e32 v42, v56, v61 6 0.01 1
768 v_mac_f32_e32 v30, v55, v52 6 0.02 2
769 v_mul_f32_e32 v58, v61, v61 6 0.02 2
770 v_mad_f32 v62, v61, 0xbbcccccd, 1.0 6 0.01 1
771 v_mac_f32_e32 v42, v56, v61 6 0.01 1
772 v_mul_f32_e64 v65, v30, v56 mul:2 6 0.01 1
773 v_mul_f32_e32 v50, v30, v61 6 0.02 2
774 v_mac_f32_e32 v42, v30, v30 6 0.01 1
775 v_mul_f32_e32 v63, v30, v65 6 0.01 1
776 v_mul_f32_e32 v69, v55, v50 6 0.02 2
777 v_mul_f32_e32 v60, 0x38c30c31, v42 6 0.01 1
778 v_mac_f32_e32 v63, v46, v61 6 0.01 1
779 v_mac_f32_e32 v69, v55, v50 6 0.02 2
780 v_madmk_f32 v60, v46, 0x3a088889, v60 6 0.01 1
781 v_mac_f32_e32 v63, v42, v56 6 0.01 1
782 v_mac_f32_e32 v69, v42, v52 6 0.01 1
783 v_mul_f32_e64 v42, v18, v61 div:2 6 0.01 1
784 v_mul_f32_e32 v61, v55, v65 6 0.01 1
785 v_madmk_f32 v50, v58, 0x3797b426, v60 6 1.04 122
786 v_madmk_f32 v60, v56, 0xbd2aaaab, v62 6 0.02 2
787 v_mad_f32 v58, v52, v55, v30 6 0.01 1
788 v_mac_f32_e32 v65, v30, v56 6 0.01 1
789 v_mul_f32_e32 v67, 0x39c30c31, v42 6 0.01 1
790 v_mul_f32_e32 v59, 0x3672b9d6, v69 6 0.01 1
791 v_add_f32_e32 v50, v60, v50 6 0.01 1
792 v_mul_f32_e32 v58, v58, v55 6 0.01 1
793 v_mul_f32_e32 v55, v55, v65 6 0.01 1
794 v_mul_f32_e32 v42, v56, v46 6 0.01 1
795 v_mac_f32_e32 v61, v46, v52 6 0.01 1
796 v_madmk_f32 v60, v63, 0xb521d13a, v50 6 0.01 1
797 v_madmk_f32 v63, v58, 0x3b088889, v67 6 0.01 1
798 v_mac_f32_e32 v55, v52, v46 6 0.01 1
799 v_sin_f32_e32 v52, v54 6 0.03 3
800 v_madmk_f32 v30, v61, 0x379c09c1, v59 6 0.01 1
801 v_mul_f32_e32 v59, v46, v46 6 0.01 1
802 v_mad_f32 v63, v18, 0x3d2aaaab, -v63 6 0.01 1
803 v_madmk_f32 v18, v42, 0xb6500d01, v60 6 0.01 1
804 v_mul_f32_e32 v42, v57, v48 6 0.01 1
805 v_mul_f32_e32 v55, v55, v56 6 0.01 1
806 v_cos_f32_e32 v56, v54 6 0.03 3
807 v_add_f32_e32 v30, v63, v30 6 0.71 84
808 v_madmk_f32 v18, v59, 0x3238ef1d, v18 6 0.03 3
809 v_mul_f32_e32 v52, v42, v52 6 0.03 3
810 v_madmk_f32 v55, v55, 0xb3b8ef1d, v30 6 0.02 2
811 v_mul_f32_e32 v30, v18, v52 6 0.02 2
812 v_mul_f32_e32 v56, v42, v56 6 0.02 2
813 v_mul_f32_e32 v59, v55, v52 6 0.03 3
814 v_mad_f32 v30, -v55, v56, -v30 6 0.02 1
815 v_mad_f32 v59, v18, v56, -v59 6 0.03 3
816 v_mul_f32_e32 v18, v41, v30 6 0.01 1
817 v_mul_f32_e32 v30, v43, v30 6 0.07 8
818 v_mad_f32 v18, v43, v59, -v18 6 0.01 1
819 v_mac_f32_e32 v30, v41, v59 6 0.04 4
820 v_add_f32_e32 v83, v24, v18 6 0.01 1
821 v_add_f32_e32 v84, v29, v30 6 0.01 1
822 _L26:
823 s_andn2_b32 exec_lo, s34, exec_lo 9 0.12 9
824 v_mov_b32_e32 v84, v2 9 0.01 1
825 v_mov_b32_e32 v83, v1 9 0.01 1
826 s_mov_b32 exec_lo, s34 9 0.01 1
827 s_ff1_i32_b32 s35, exec_lo 9 0.01 1
828 s_mov_b32 s34, exec_lo 9 0.18 14
829 s_lshl_b32 s36, 1, s35 9 0.03 2
830 s_and_b32 s36, s36, exec_lo 9 0.03 2
831 s_and_saveexec_b32 s36, s36 9 0.13 10
832 s_cbranch_execz _L28 9 0.04 3
833 BBF0_30:
834 s_bcnt1_i32_b32 s37, s34 9 0.03 2
835 v_mov_b32_e32 v18, s37 9 0.01 1
836 s_waitcnt_depctr 0xffe3 9 0.21 16
837 buffer_atomic_add v18, off, s[8:11], 0 offset:28 glc 9 0.01 1
838 _L28:
839 s_waitcnt_depctr 0xffe3 9 0.14 11
840 s_mov_b32 exec_lo, s36 9 0.12 9
841 v_mbcnt_lo_u32_b32 v30, s34, 0 9 0.01 1
842 s_waitcnt vmcnt(0) 9 2.99 235
843 v_readlane_b32 s34, v18, s35 9 0.01 1
844 v_min3_f32 v22, v81, v83, v22 9 0.02 1
845 v_min3_f32 v21, v82, v84, v21 9 0.01 1
846 v_max3_f32 v107, v81, v83, v107 9 0.01 1
847 v_max3_f32 v106, v82, v84, v106 9 0.02 1
848 v_add_nc_i32 v18, s34, v30 9 0.14 11
849 s_movk_i32 s34, 0x0 9 0.01 1
850 v_mul_lo_u32 v18, v18, 24 9 0.05 4
851 s_waitcnt_depctr 0xffe3 9 0.24 18
852 buffer_store_dword v7, v18, s[24:27], 0 offen glc 9 0.01 1
853 buffer_store_dwordx4 v[81:84], v18, s[24:27], 0 offen offset:8 glc 9 0.21 16
854 v_mov_b32_e32 v82, v84 9 0.01 1
855 v_mov_b32_e32 v18, v20 9 0.01 1
856 v_mov_b32_e32 v81, v83 9 0.01 1
857 s_branch _L29 9 0.32 24
858 _L25:
859 s_mov_b32 exec_lo, s32 4 0.05 9
860 v_mov_b32_e32 v19, v40 4 0.01 1
861 v_mov_b32_e32 v25, v74 4 0.01 1
862 v_mov_b32_e32 v26, v75 4 0.01 1
863 v_mov_b32_e32 v29, v73 4 0.01 1
864 v_mov_b32_e32 v24, v76 4 0.01 1
865 s_branch _L30 4 0.48 84
866 _L11:
867 s_mov_b32 exec_lo, s28 4 0.05 9
868 v_mov_b32_e32 v42, v21 4 0.01 1
869 v_mov_b32_e32 v48, v22 4 0.01 1
870 _L9:
871 s_mov_b32 exec_lo, s4 4 0.01 1
872 _L8:
873 s_andn2_b32 exec_lo, s7, exec_lo 4 0.06 10
874 s_cbranch_execz _L31 4 0.01 1
875 BBF0_31:
876 v_add_nc_u32_e32 v5, -4, v5 0 0.00
877 s_waitcnt_depctr 0xffe3 0 0.00
878 tbuffer_load_format_x v5, v5, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 0 0.00
879 s_waitcnt vmcnt(0) 0 0.00
880 v_ldexp_f32 v20, v5, -1 0 0.00
881 s_andn1_saveexec_b32 s4, s6 0 0.00
882 s_cbranch_execz _L32 0 0.00
883 BBF0_32:
884 v_add_nc_u32_e32 v9, 1, v3 0 0.00
885 v_lshrrev_b32_e32 v9, 2, v9 0 0.00
886 v_add_lshl_u32 v10, s2, v9, 2 0 0.00
887 tbuffer_load_format_x v10, v10, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 0 0.00
888 v_add_nc_u32_e32 v0, 8, v0 0 0.00
889 v_and_b32_e32 v17, 24, v0 0 0.00
890 v_mul_lo_u32 v0, v9, 20 0 0.00
891 tbuffer_load_format_x v0, v0, s[16:19], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00
892 s_waitcnt vmcnt(1) 0 0.00
893 v_bfe_u32 v9, v10, 0, v17 0 0.00
894 v_lshrrev_b32_e32 v10, v17, v10 0 0.00
895 v_and_b32_e32 v21, 0x30303, v9 0 0.00
896 v_lshrrev_b32_e32 v24, 2, v9 0 0.00
897 v_lshrrev_b32_e32 v9, 3, v9 0 0.00
898 v_and_b32_e32 v17, 3, v10 0 0.00
899 v_and_b32_e32 v24, 0x10101, v24 0 0.00
900 v_and_b32_e32 v9, 0x10101, v9 0 0.00
901 v_add_nc_u32_e32 v21, v21, v24 0 0.00
902 v_mul_u32_u24_e32 v24, 15, v9 0 0.00
903 v_and_b32_e32 v24, v21, v24 0 0.00
904 v_add_nc_u32_e32 v9, v21, v24 0 0.00
905 v_and_b32_e32 v21, 8, v10 0 0.00
906 v_lshrrev_b32_e32 v24, 8, v9 0 0.00
907 v_cmp_eq_i32_e32 vcc_lo, 0, v21 0 0.00
908 v_add_nc_u32_e32 v9, v9, v24 0 0.00
909 v_add_nc_u32_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 0 0.00
910 v_and_b32_e32 v9, 63, v9 0 0.00
911 s_waitcnt vmcnt(0) 0 0.00
912 v_add3_u32 v0, v9, v0, s3 0 0.00
913 s_and_saveexec_b32 s2, vcc_lo 0 0.00
914 s_cbranch_execz _L33 0 0.00
915 BBF0_33:
916 v_lshlrev_b32_e32 v0, 2, v0 0 0.00
917 tbuffer_load_format_xy v[24:25], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen 0 0.00
918 v_cmp_gt_u32_e32 vcc_lo, 2, v17 0 0.00
919 s_waitcnt vmcnt(0) 0 0.00
920 v_cvt_f32_i32_sdwa v30, sext(v24) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
921 v_cvt_f32_i32_sdwa v31, sext(v24) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
922 v_cvt_f32_i32_sdwa v32, sext(v25) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
923 v_cvt_f32_i32_sdwa v33, sext(v25) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
924 s_and_saveexec_b32 s3, vcc_lo 0 0.00
925 v_mov_b32_e32 v2, 0 0 0.00
926 v_mov_b32_e32 v1, 0 0 0.00
927 v_mov_b32_e32 v42, 0 0 0.00
928 v_mov_b32_e32 v41, 0 0 0.00
929 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
930 s_cbranch_execz _L34 0 0.00
931 BBF0_34:
932 tbuffer_load_format_x v26, v0, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00
933 v_cmp_ne_i32_e32 vcc_lo, 3, v17 0 0.00
934 s_waitcnt vmcnt(0) 0 0.00
935 v_cvt_f32_i32_sdwa v1, sext(v26) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
936 v_cvt_f32_i32_sdwa v2, sext(v26) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
937 s_and_saveexec_b32 s6, vcc_lo 0 0.00
938 v_mov_b32_e32 v42, 0 0 0.00
939 v_mov_b32_e32 v41, 0 0 0.00
940 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00
941 s_cbranch_execz _L34 0 0.00
942 BBF0_35:
943 tbuffer_load_format_x v0, v0, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:12 0 0.00
944 s_waitcnt vmcnt(0) 0 0.00
945 v_cvt_f32_i32_sdwa v41, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00
946 v_cvt_f32_i32_sdwa v42, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00
947 _L34:
948 s_mov_b32 exec_lo, s3 0 0.00
949 _L33:
950 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
951 s_cbranch_execz _L35 0 0.00
952 BBF0_36:
953 v_lshlrev_b32_e32 v0, 2, v0 0 0.00
954 s_waitcnt_depctr 0xffe3 0 0.00
955 tbuffer_load_format_xyzw v[30:33], v0, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 0 0.00
956 v_cmp_gt_u32_e32 vcc_lo, 2, v17 0 0.00
957 s_and_saveexec_b32 s3, vcc_lo 0 0.00
958 v_mov_b32_e32 v2, 0 0 0.00
959 v_mov_b32_e32 v1, 0 0 0.00
960 v_mov_b32_e32 v42, 0 0 0.00
961 v_mov_b32_e32 v41, 0 0 0.00
962 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
963 s_cbranch_execz _L35 0 0.00
964 BBF0_37:
965 tbuffer_load_format_xy v[1:2], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 0 0.00
966 v_cmp_ne_i32_e32 vcc_lo, 3, v17 0 0.00
967 s_and_saveexec_b32 s6, vcc_lo 0 0.00
968 v_mov_b32_e32 v42, 0 0 0.00
969 v_mov_b32_e32 v41, 0 0 0.00
970 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00
971 s_cbranch_execz _L35 0 0.00
972 BBF0_38:
973 tbuffer_load_format_xy v[41:42], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:24 0 0.00
974 _L35:
975 s_waitcnt_depctr 0xffe3 0 0.00
976 s_mov_b32 exec_lo, s2 0 0.00
977 v_and_b32_e32 v10, 4, v10 0 0.00
978 v_cmp_eq_i32_e64 s6, v17, 2 0 0.00
979 v_cmp_ne_i32_e64 s2, v10, 0 0 0.00
980 s_and_b32 vcc_lo, s2, s6 0 0.00
981 s_nand_b32 s2, s2, s6 0 0.00
982 s_waitcnt vmcnt(0) 0 0.00
983 v_cndmask_b32_e32 v30, v30, v32, vcc_lo 0 0.00
984 v_cndmask_b32_e32 v9, v31, v33, vcc_lo 0 0.00
985 v_cndmask_b32_e32 v0, v32, v1, vcc_lo 0 0.00
986 v_cndmask_b32_e32 v16, v33, v2, vcc_lo 0 0.00
987 v_cndmask_b32_e64 v29, 1, v17, s2 0 0.00
988 v_cmp_ne_i32_e32 vcc_lo, 1, v29 0 0.00
989 s_and_saveexec_b32 s2, vcc_lo 0 0.00
990 s_cbranch_execz _L36 0 0.00
991 BBF0_39:
992 v_cmp_eq_i32_e32 vcc_lo, 2, v29 0 0.00
993 s_and_saveexec_b32 s3, vcc_lo 0 0.00
994 s_cbranch_execz _L37 0 0.00
995 BBF0_40:
996 v_sub_f32_e32 v21, v1, v0 0 0.00
997 v_sub_f32_e32 v26, v2, v16 0 0.00
998 v_sub_f32_e32 v27, v30, v0 0 0.00
999 v_sub_f32_e32 v28, v9, v16 0 0.00
1000 v_mov_b32_e32 v42, v2 0 0.00
1001 v_madmk_f32 v21, v21, 0x3eaaaaab, v0 0 0.00
1002 v_mov_b32_e32 v41, v1 0 0.00
1003 v_madmk_f32 v24, v26, 0x3eaaaaab, v16 0 0.00
1004 v_madmk_f32 v25, v27, 0x3eaaaaab, v0 0 0.00
1005 v_madmk_f32 v16, v28, 0x3eaaaaab, v16 0 0.00
1006 v_mov_b32_e32 v1, v21 0 0.00
1007 _L37:
1008 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
1009 v_mov_b32_e32 v25, v0 0 0.00
1010 v_mov_b32_e32 v24, v2 0 0.00
1011 s_mov_b32 exec_lo, s3 0 0.00
1012 _L36:
1013 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
1014 s_cbranch_execz _L38 0 0.00
1015 BBF0_41:
1016 v_sub_f32_e32 v26, v16, v9 0 0.00
1017 v_sub_f32_e32 v24, v9, v16 0 0.00
1018 v_sub_f32_e32 v21, v30, v0 0 0.00
1019 v_sub_f32_e32 v25, v0, v30 0 0.00
1020 v_mov_b32_e32 v42, v16 0 0.00
1021 v_madmk_f32 v26, v26, 0x3eaaaaab, v9 0 0.00
1022 v_madmk_f32 v24, v24, 0x3eaaaaab, v16 0 0.00
1023 v_mov_b32_e32 v41, v0 0 0.00
1024 v_madmk_f32 v1, v21, 0x3eaaaaab, v0 0 0.00
1025 v_madmk_f32 v25, v25, 0x3eaaaaab, v30 0 0.00
1026 v_mov_b32_e32 v16, v26 0 0.00
1027 _L38:
1028 s_mov_b32 exec_lo, s2 0 0.00
1029 v_sub_f32_e32 v29, v111, v4 0 0.00
1030 v_sub_f32_e32 v33, v15, v108 0 0.00
1031 v_sub_f32_e32 v31, v111, v109 0 0.00
1032 v_sub_f32_e32 v32, v15, v110 0 0.00
1033 v_subrev_f32_e32 v34, v23, v109 0 0.00
1034 v_mul_f32_e32 v28, v29, v29 0 0.00
1035 v_subrev_f32_e32 v36, v23, v111 0 0.00
1036 v_mul_f32_e32 v35, v31, v31 0 0.00
1037 v_subrev_f32_e32 v37, v22, v110 0 0.00
1038 v_mul_f32_e32 v40, v34, v34 0 0.00
1039 v_mac_f32_e32 v28, v33, v33 0 0.00
1040 v_subrev_f32_e32 v44, v22, v15 0 0.00
1041 v_mac_f32_e32 v35, v32, v32 0 0.00
1042 v_subrev_f32_e32 v39, v23, v4 0 0.00
1043 v_mac_f32_e32 v40, v37, v37 0 0.00
1044 v_cmp_gt_f32_e64 s3, v28, 0x2b8cbccc 0 0.00
1045 v_subrev_f32_e32 v27, v30, v1 0 0.00
1046 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v35 0 0.00
1047 v_subrev_f32_e32 v35, v22, v108 0 0.00
1048 v_subrev_f32_e32 v25, v30, v25 0 0.00
1049 v_subrev_f32_e32 v30, v30, v41 0 0.00
1050 v_subrev_f32_e32 v0, v9, v42 0 0.00
1051 v_cndmask_b32_e64 v28, v36, v29, s3 0 0.00
1052 v_mul_f32_e32 v29, v39, v39 0 0.00
1053 v_cndmask_b32_e32 v28, v28, v31, vcc_lo 0 0.00
1054 v_cndmask_b32_e64 v31, v44, v33, s3 0 0.00
1055 v_mac_f32_e32 v29, v35, v35 0 0.00
1056 v_cndmask_b32_e32 v33, v31, v32, vcc_lo 0 0.00
1057 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v40 0 0.00
1058 v_cndmask_b32_e32 v32, v36, v34, vcc_lo 0 0.00
1059 v_mul_f32_e32 v34, v28, v28 0 0.00
1060 v_cndmask_b32_e32 v36, v44, v37, vcc_lo 0 0.00
1061 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v29 0 0.00
1062 v_subrev_f32_e32 v37, v9, v24 0 0.00
1063 v_cndmask_b32_e32 v31, v32, v39, vcc_lo 0 0.00
1064 v_mac_f32_e32 v34, v33, v33 0 0.00
1065 v_cndmask_b32_e32 v29, v36, v35, vcc_lo 0 0.00
1066 v_subrev_f32_e32 v24, v9, v16 0 0.00
1067 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v34 0 0.00
1068 v_cndmask_b32_e32 v35, 0x358637bd, v28, vcc_lo 0 0.00
1069 v_mul_f32_e32 v28, v27, v27 0 0.00
1070 v_mul_f32_e32 v32, v29, v29 0 0.00
1071 v_cndmask_b32_e32 v33, 0, v33, vcc_lo 0 0.00
1072 v_mul_f32_e32 v34, v25, v25 0 0.00
1073 v_mul_f32_e32 v26, v35, v35 0 0.00
1074 v_mac_f32_e32 v28, v37, v37 0 0.00
1075 v_mac_f32_e32 v32, v31, v31 0 0.00
1076 v_mac_f32_e32 v34, v24, v24 0 0.00
1077 v_mac_f32_e32 v26, v33, v33 0 0.00
1078 v_cmp_gt_f32_e64 s3, v28, 0x2b8cbccc 0 0.00
1079 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v32 0 0.00
1080 v_cndmask_b32_e32 v29, 0, v29, vcc_lo 0 0.00
1081 v_cndmask_b32_e32 v31, 0x358637bd, v31, vcc_lo 0 0.00
1082 v_rsq_f32_e32 v28, v26 0 0.00
1083 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v34 0 0.00
1084 v_cndmask_b32_e64 v26, v30, v27, s3 0 0.00
1085 v_mul_f32_e32 v30, v29, v29 0 0.00
1086 v_cndmask_b32_e64 v9, v0, v37, s3 0 0.00
1087 v_mul_f32_e32 v27, v33, v28 0 0.00
1088 v_cndmask_b32_e32 v26, v26, v25, vcc_lo 0 0.00
1089 v_mac_f32_e32 v30, v31, v31 0 0.00
1090 v_cndmask_b32_e32 v0, v9, v24, vcc_lo 0 0.00
1091 v_mul_f32_e64 v25, v5, v27 div:2 0 0.00
1092 v_mul_f32_e32 v9, v26, v26 0 0.00
1093 v_rsq_f32_e32 v24, v30 0 0.00
1094 v_mul_f32_e32 v30, v35, v28 0 0.00
1095 v_subrev_f32_e32 v39, v25, v111 0 0.00
1096 v_mac_f32_e32 v9, v0, v0 0 0.00
1097 v_mul_f32_e64 v21, v5, v30 div:2 0 0.00
1098 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v9 0 0.00
1099 v_cndmask_b32_e32 v26, 0x358637bd, v26, vcc_lo 0 0.00
1100 v_cndmask_b32_e32 v9, 0, v0, vcc_lo 0 0.00
1101 v_add_f32_e32 v37, v21, v15 0 0.00
1102 v_mul_f32_e64 v29, -v29, v24 0 0.00
1103 v_mul_f32_e32 v31, v31, v24 0 0.00
1104 v_xor_b32_e32 v0, 0x80000000, v25 0 0.00
1105 v_cmp_neq_f32_e32 vcc_lo, 0, v5 0 0.00
1106 s_and_b32 exec_lo, s2, vcc_lo 0 0.00
1107 s_cbranch_execz _L39 0 0.00
1108 BBF0_42:
1109 v_add_f32_e32 v24, v11, v14 0 0.00
1110 v_add_f32_e32 v34, v12, v13 0 0.00
1111 v_sub_f32_e32 v28, v12, v13 0 0.00
1112 v_sub_f32_e32 v41, v11, v14 0 0.00
1113 v_mov_b32_e32 v1, v22 0 0.00
1114 v_mul_f32_e32 v38, v24, v24 0 0.00
1115 v_mul_f32_e32 v24, v34, v34 0 0.00
1116 v_mov_b32_e32 v2, v23 0 0.00
1117 v_mov_b32_e32 v43, v15 0 0.00
1118 v_mov_b32_e32 v8, v111 0 0.00
1119 v_mac_f32_e32 v38, v28, v28 0 0.00
1120 v_mac_f32_e32 v24, v41, v41 0 0.00
1121 v_mov_b32_e32 v28, v18 0 0.00
1122 v_mov_b32_e32 v45, v11 0 0.00
1123 v_mov_b32_e32 v42, v110 0 0.00
1124 v_sqrt_f32_e32 v34, v38 0 0.00
1125 v_mov_b32_e32 v38, v37 0 0.00
1126 v_mov_b32_e32 v40, v14 0 0.00
1127 v_mov_b32_e32 v44, v13 0 0.00
1128 v_sqrt_f32_e32 v24, v24 0 0.00
1129 v_mov_b32_e32 v46, v109 0 0.00
1130 v_mov_b32_e32 v47, v108 0 0.00
1131 v_mov_b32_e32 v48, v39 0 0.00
1132 v_mov_b32_e32 v51, v4 0 0.00
1133 v_mad_f32 v41, v20, v29, v23 0 0.00
1134 v_mad_f32 v36, v20, v31, v22 0 0.00
1135 v_mad_f32 v52, v34, 0.5, v24 0 0.00
1136 v_mov_b32_e32 v24, v19 0 0.00
1137 v_mov_b32_e32 v34, v12 0 0.00
1138 _L39:
1139 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
1140 s_cbranch_execz _L40 0 0.00
1141 BBF0_43:
1142 v_mul_f32_e32 v41, v14, v22 0 0.00
1143 v_mul_f32_e32 v24, v13, v22 0 0.00
1144 v_mul_f32_e32 v43, v14, v108 0 0.00
1145 v_mul_f32_e32 v40, v13, v15 0 0.00
1146 v_mul_f32_e32 v45, v14, v15 0 0.00
1147 v_mac_f32_e32 v41, v12, v23 0 0.00
1148 v_mul_f32_e32 v34, v13, v108 0 0.00
1149 v_mul_f32_e32 v38, v13, v110 0 0.00
1150 v_mac_f32_e32 v24, v11, v23 0 0.00
1151 v_mac_f32_e32 v43, v12, v4 0 0.00
1152 v_add_f32_e32 v1, v19, v41 0 0.00
1153 v_mul_f32_e32 v41, v14, v110 0 0.00
1154 v_mac_f32_e32 v40, v11, v111 0 0.00
1155 v_mac_f32_e32 v45, v12, v111 0 0.00
1156 v_mac_f32_e32 v34, v11, v4 0 0.00
1157 v_mac_f32_e32 v38, v11, v109 0 0.00
1158 v_add_f32_e32 v2, v18, v24 0 0.00
1159 v_add_f32_e32 v47, v19, v43 0 0.00
1160 v_mac_f32_e32 v41, v12, v109 0 0.00
1161 v_add_f32_e32 v8, v18, v40 0 0.00
1162 v_add_f32_e32 v43, v19, v45 0 0.00
1163 v_add_f32_e32 v51, v18, v34 0 0.00
1164 v_add_f32_e32 v46, v18, v38 0 0.00
1165 v_add_f32_e32 v42, v19, v41 0 0.00
1166 v_mov_b32_e32 v24, 0 0 0.00
1167 v_mov_b32_e32 v28, 0 0 0.00
1168 v_mov_b32_e32 v34, 0 0 0.00
1169 v_mov_b32_e32 v45, 1.0 0 0.00
1170 v_mov_b32_e32 v38, v43 0 0.00
1171 v_mov_b32_e32 v40, 1.0 0 0.00
1172 v_mov_b32_e32 v44, 0 0 0.00
1173 v_mov_b32_e32 v48, v8 0 0.00
1174 v_mov_b32_e32 v36, v1 0 0.00
1175 v_mov_b32_e32 v41, v2 0 0.00
1176 v_mov_b32_e32 v52, 1.0 0 0.00
1177 _L40:
1178 s_mov_b32 exec_lo, s2 0 0.00
1179 v_cmp_eq_f32_sdwa s2, v2, v51 src0_sel:DWORD src1_sel:DWORD 0 0.00
1180 v_cmp_eq_f32_sdwa s6, v1, v47 src0_sel:DWORD src1_sel:DWORD 0 0.00
1181 v_cmp_eq_f32_sdwa s3, v2, v46 src0_sel:DWORD src1_sel:DWORD 0 0.00
1182 v_cmp_eq_f32_e32 vcc_lo, v1, v42 0 0.00
1183 s_and_b32 s2, s2, s6 0 0.00
1184 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00
1185 s_and_b32 s2, s2, vcc_lo 0 0.00
1186 v_cmp_eq_f32_sdwa s3, v2, v8 src0_sel:DWORD src1_sel:DWORD 0 0.00
1187 v_cmp_eq_f32_e32 vcc_lo, v1, v43 0 0.00
1188 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00
1189 s_and_b32 vcc_lo, s2, vcc_lo 0 0.00
1190 s_and_saveexec_b32 s2, vcc_lo 0 0.00
1191 v_mov_b32_e32 v67, 0xf2fc6f7c 0 0.00
1192 v_mov_b32_e32 v68, 0xf2fc6f7c 0 0.00
1193 v_mov_b32_e32 v71, 0x72fc6f7c 0 0.00
1194 v_mov_b32_e32 v72, 0x72fc6f7c 0 0.00
1195 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
1196 s_cbranch_execz _L41 0 0.00
1197 BBF0_44:
1198 v_sub_f32_e32 v55, v51, v2 0 0.00
1199 v_sub_f32_e32 v59, v47, v1 0 0.00
1200 v_mul_f32_e32 v53, v55, v55 0 0.00
1201 v_mac_f32_e32 v53, v59, v59 0 0.00
1202 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v53 0 0.00
1203 s_and_saveexec_b32 s3, vcc_lo 0 0.00
1204 s_cbranch_execz _L42 0 0.00
1205 BBF0_45:
1206 v_sub_f32_e32 v53, v46, v51 0 0.00
1207 v_sub_f32_e32 v54, v42, v47 0 0.00
1208 v_sub_f32_e32 v56, v8, v46 0 0.00
1209 v_sub_f32_e32 v57, v43, v42 0 0.00
1210 v_mul_f32_e32 v53, 0x360637b4, v53 0 0.00
1211 v_mul_f32_e32 v54, 0x360637b4, v54 0 0.00
1212 v_madmk_f32 v53, v55, 0x3f7fffde, v53 0 0.00
1213 v_madmk_f32 v54, v59, 0x3f7fffde, v54 0 0.00
1214 v_madmk_f32 v32, v56, 0x2b8cbccc, v53 0 0.00
1215 v_madmk_f32 v53, v57, 0x2b8cbccc, v54 0 0.00
1216 _L42:
1217 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
1218 v_mov_b32_e32 v53, v59 0 0.00
1219 v_mov_b32_e32 v32, v55 0 0.00
1220 s_mov_b32 exec_lo, s3 0 0.00
1221 s_waitcnt lgkmcnt(0) 0 0.00
1222 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
1223 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00
1224 s_load_dwordx4 s[16:19], s[0:1], 0xc0 0 0.00
1225 v_mov_b32_e32 v16, 1.0 0 0.00
1226 v_mov_b32_e32 v49, 0 0 0.00
1227 v_mov_b32_e32 v64, 0 0 0.00
1228 v_mov_b32_e32 v70, v1 0 0.00
1229 v_mov_b32_e32 v65, v2 0 0.00
1230 v_mov_b32_e32 v67, 0xf2fc6f7c 0 0.00
1231 v_mov_b32_e32 v68, 0xf2fc6f7c 0 0.00
1232 v_mov_b32_e32 v71, 0x72fc6f7c 0 0.00
1233 v_mov_b32_e32 v72, 0x72fc6f7c 0 0.00
1234 v_sub_f32_e32 v56, v46, v51 0 0.00
1235 v_sub_f32_e32 v57, v42, v47 0 0.00
1236 v_sub_f32_e32 v58, v8, v46 0 0.00
1237 v_sub_f32_e32 v60, v43, v42 0 0.00
1238 v_add_nc_u32_e32 v61, 0x2000, v3 0 0.00
1239 v_add_nc_u32_e32 v62, 0x1000, v3 0 0.00
1240 v_ldexp_f32 v63, v52, -3 0 0.00
1241 v_cmp_ge_f32_e64 s3, v5, 0 0 0.00
1242 s_mov_b32 s6, exec_lo 0 0.00
1243 s_mov_b32 s20, exec_lo 0 0.00
1244 _L72:
1245 v_cvt_f32_u32_e32 v77, v49 0 0.00
1246 v_mul_f32_e32 v77, v16, v77 0 0.00
1247 v_cmp_eq_f32_e32 vcc_lo, 1.0, v77 0 0.00
1248 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00
1249 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00
1250 s_andn2_b32 s20, s20, exec_lo 0 0.00
1251 s_cbranch_scc0 _L41 0 0.00
1252 BBF0_46:
1253 s_mov_b32 exec_lo, s21 0 0.00
1254 s_and_b32 exec_lo, exec_lo, s20 0 0.00
1255 s_mov_b32 s21, exec_lo 0 0.00
1256 s_mov_b32 s22, exec_lo 0 0.00
1257 v_mul_f32_e32 v73, v32, v32 0 0.00
1258 v_mov_b32_e32 v54, v91 0 0.00
1259 v_mov_b32_e32 v76, v49 0 0.00
1260 v_mov_b32_e32 v49, v86 0 0.00
1261 v_mac_f32_e32 v73, v53, v53 0 0.00
1262 s_nop 0 0 0.00
1263 s_nop 0 0 0.00
1264 s_nop 0 0 0.00
1265 _L53:
1266 v_add_f32_e32 v78, v77, v16 0 0.00
1267 v_sub_f32_e32 v79, 1.0, v78 0 0.00
1268 v_mul_f32_e32 v82, v78, v78 0 0.00
1269 v_mul_f32_e32 v83, v78, v79 0 0.00
1270 v_mul_f32_e32 v81, v79, v79 0 0.00
1271 v_mul_f32_e32 v80, 0x40400000, v83 0 0.00
1272 v_mul_f32_e64 v84, v83, v56 mul:2 0 0.00
1273 v_mul_f32_e32 v86, 0x40400000, v81 0 0.00
1274 v_mul_f32_e64 v90, v83, v57 mul:2 0 0.00
1275 v_mul_f32_e32 v79, v79, v81 0 0.00
1276 v_mul_f32_e32 v85, v46, v80 0 0.00
1277 v_mac_f32_e32 v84, v55, v81 0 0.00
1278 v_mul_f32_e32 v80, v42, v80 0 0.00
1279 v_mac_f32_e32 v90, v59, v81 0 0.00
1280 v_mac_f32_e32 v85, v51, v86 0 0.00
1281 v_mac_f32_e32 v84, v58, v82 0 0.00
1282 v_mac_f32_e32 v80, v47, v86 0 0.00
1283 v_mac_f32_e32 v90, v60, v82 0 0.00
1284 v_mac_f32_e32 v85, v8, v82 0 0.00
1285 v_mul_f32_e32 v86, v84, v84 0 0.00
1286 v_mac_f32_e32 v80, v43, v82 0 0.00
1287 v_mul_f32_e32 v88, v78, v85 0 0.00
1288 v_mac_f32_e32 v86, v90, v90 0 0.00
1289 v_mul_f32_e32 v81, v78, v80 0 0.00
1290 v_mad_f32 v66, v2, v79, v88 0 0.00
1291 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v86 0 0.00
1292 v_mad_f32 v79, v1, v79, v81 0 0.00
1293 s_and_saveexec_b32 s23, vcc_lo 0 0.00
1294 s_cbranch_execz _L43 0 0.00
1295 BBF0_47:
1296 v_add_f32_e32 v86, 0xb58637bd, v78 0 0.00
1297 v_cmp_gt_f32_e32 vcc_lo, 1.0, v78 0 0.00
1298 v_sub_f32_e32 v80, 1.0, v86 0 0.00
1299 v_mul_f32_e32 v89, v86, v86 0 0.00
1300 v_mul_f32_e32 v83, v86, v80 0 0.00
1301 v_mul_f32_e32 v85, v80, v80 0 0.00
1302 v_mul_f32_e64 v84, v83, v56 mul:2 0 0.00
1303 v_mul_f32_e64 v82, v83, v57 mul:2 0 0.00
1304 v_mac_f32_e32 v84, v55, v85 0 0.00
1305 v_mac_f32_e32 v82, v59, v85 0 0.00
1306 v_mac_f32_e32 v84, v58, v89 0 0.00
1307 v_mac_f32_e32 v82, v60, v89 0 0.00
1308 s_and_saveexec_b32 s24, vcc_lo 0 0.00
1309 s_cbranch_execz _L44 0 0.00
1310 BBF0_48:
1311 v_mul_f32_e32 v83, 0x40400000, v83 0 0.00
1312 v_mul_f32_e32 v88, 0x40400000, v85 0 0.00
1313 v_mul_f32_e32 v78, v46, v83 0 0.00
1314 v_mul_f32_e32 v81, v42, v83 0 0.00
1315 v_mac_f32_e32 v78, v51, v88 0 0.00
1316 v_mac_f32_e32 v81, v47, v88 0 0.00
1317 v_mac_f32_e32 v78, v8, v89 0 0.00
1318 v_mac_f32_e32 v81, v43, v89 0 0.00
1319 v_mul_f32_e32 v83, v80, v85 0 0.00
1320 v_mul_f32_e32 v78, v86, v78 0 0.00
1321 v_mul_f32_e32 v81, v86, v81 0 0.00
1322 v_mad_f32 v66, v2, v83, v78 0 0.00
1323 v_mad_f32 v79, v1, v83, v81 0 0.00
1324 _L44:
1325 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
1326 v_mov_b32_e32 v86, v78 0 0.00
1327 s_mov_b32 exec_lo, s24 0 0.00
1328 v_mov_b32_e32 v78, v66 0 0.00
1329 v_mov_b32_e32 v80, v82 0 0.00
1330 v_mov_b32_e32 v82, v84 0 0.00
1331 _L43:
1332 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00
1333 v_mov_b32_e32 v80, v90 0 0.00
1334 v_mov_b32_e32 v82, v84 0 0.00
1335 v_mov_b32_e32 v86, v78 0 0.00
1336 v_mov_b32_e32 v78, v66 0 0.00
1337 s_mov_b32 exec_lo, s23 0 0.00
1338 s_ff1_i32_b32 s24, exec_lo 0 0.00
1339 s_mov_b32 s23, exec_lo 0 0.00
1340 s_lshl_b32 s25, 1, s24 0 0.00
1341 s_and_b32 s25, s25, exec_lo 0 0.00
1342 s_and_saveexec_b32 s25, s25 0 0.00
1343 s_cbranch_execz _L45 0 0.00
1344 BBF0_49:
1345 s_bcnt1_i32_b32 s26, s23 0 0.00
1346 v_mov_b32_e32 v81, s26 0 0.00
1347 s_waitcnt lgkmcnt(0) 0 0.00
1348 s_waitcnt_depctr 0xffe3 0 0.00
1349 buffer_atomic_add v81, off, s[8:11], 0 offset:32 glc 0 0.00
1350 _L45:
1351 s_waitcnt_depctr 0xffe3 0 0.00
1352 s_mov_b32 exec_lo, s25 0 0.00
1353 s_waitcnt vmcnt(0) 0 0.00
1354 v_readlane_b32 s24, v81, s24 0 0.00
1355 v_mbcnt_lo_u32_b32 v81, s23, 0 0 0.00
1356 v_mov_b32_e32 v90, v3 0 0.00
1357 v_mov_b32_e32 v91, v77 0 0.00
1358 v_mov_b32_e32 v92, v86 0 0.00
1359 v_sub_f32_e32 v84, v78, v65 0 0.00
1360 v_sub_f32_e32 v85, v79, v70 0 0.00
1361 v_sub_f32_e32 v87, v86, v64 0 0.00
1362 v_mul_f32_e32 v89, v82, v82 0 0.00
1363 v_mul_f32_e32 v83, v84, v84 0 0.00
1364 v_mul_f32_e32 v88, v87, v87 0 0.00
1365 v_mac_f32_e32 v89, v80, v80 0 0.00
1366 v_mac_f32_e32 v83, v85, v85 0 0.00
1367 v_add_nc_i32 v81, s24, v81 0 0.00
1368 v_mul_lo_u32 v81, v81, 12 0 0.00
1369 s_waitcnt lgkmcnt(0) 0 0.00
1370 s_waitcnt_depctr 0xffe3 0 0.00
1371 buffer_store_dwordx3 v[90:92], v81, s[16:19], 0 offen glc 0 0.00
1372 v_sqrt_f32_e32 v81, v83 0 0.00
1373 v_mul_f32_e32 v90, v73, v88 0 0.00
1374 v_mul_f32_e32 v88, v88, v89 0 0.00
1375 v_cmp_lt_f32_e64 s23, v90, 0x2b8cbccc 0 0.00
1376 v_cmp_lt_f32_e64 s24, v88, 0x2b8cbccc 0 0.00
1377 v_cmp_ge_f32_e64 s25, v81, 0x358637bd 0 0.00
1378 s_and_b32 vcc_lo, s23, s24 0 0.00
1379 s_or_b32 s23, s25, vcc_lo 0 0.00
1380 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00
1381 s_and_saveexec_b32 s24, s23 0 0.00
1382 s_cbranch_execz _L46 0 0.00
1383 BBF0_50:
1384 v_cmp_lt_f32_e64 s23, v83, 0x358637bd 0 0.00
1385 s_andn1_saveexec_b32 s25, s23 0 0.00
1386 s_cbranch_execz _L47 0 0.00
1387 BBF0_51:
1388 v_mul_f32_e32 v92, v32, v84 0 0.00
1389 v_mul_f32_e32 v49, v32, v85 0 0.00
1390 v_max_f32_e32 v74, 0x358637bd, v83 0 0.00
1391 v_mac_f32_e32 v92, v53, v85 0 0.00
1392 v_mad_f32 v49, v53, v84, -v49 0 0.00
1393 v_rcp_f32_e32 v74, v74 0 0.00
1394 v_mul_f32_e32 v50, v92, v92 0 0.00
1395 v_mac_f32_e32 v50, v49, v49 0 0.00
1396 v_sqrt_f32_e32 v83, v50 0 0.00
1397 v_mul_f32_e32 v50, v87, v74 0 0.00
1398 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v83 0 0.00
1399 s_and_saveexec_b32 s26, vcc_lo 0 0.00
1400 v_mov_b32_e32 v54, 0x3eaaaaab 0 0.00
1401 v_mov_b32_e32 v74, 0 0 0.00
1402 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00
1403 s_cbranch_execz _L48 0 0.00
1404 BBF0_52:
1405 v_max_f32_e64 v74, |v92|, |v49| 0 0.00
1406 v_min_f32_e64 v88, |v92|, |v49| 0 0.00
1407 s_mov_b32 s27, 0x3caaae5f 0 0.00
1408 v_min_f32_e32 v90, v92, v49 0 0.00
1409 v_cmp_gt_f32_e64 vcc_lo, |v49|, |v92| 0 0.00
1410 v_rcp_f32_e32 v74, v74 0 0.00
1411 v_max_f32_e32 v49, v92, v49 0 0.00
1412 v_mul_f32_e32 v54, v50, v83 0 0.00
1413 v_mul_f32_e32 v89, v88, v74 0 0.00
1414 v_mul_f32_e32 v74, v89, v89 0 0.00
1415 v_madak_f32 v87, s27, v74, 0xbdae5a36 0 0.00
1416 v_cmp_gt_f32_e64 s27, -v90, v90 0 0.00
1417 v_madak_f32 v87, v74, v87, 0x3e3876e2 0 0.00
1418 v_madak_f32 v87, v74, v87, 0xbea91d04 0 0.00
1419 v_madak_f32 v88, v74, v87, 0x3f7ff738 0 0.00
1420 v_mul_f32_e32 v74, v89, v88 0 0.00
1421 v_madak_f32 v74, -2.0, v74, 0x3fc90fdb 0 0.00
1422 v_cndmask_b32_e32 v74, 0, v74, vcc_lo 0 0.00
1423 v_cmp_gt_f32_e64 vcc_lo, -v92, v92 0 0.00
1424 v_cndmask_b32_e64 v87, 0, 0xc0490fdb, vcc_lo 0 0.00
1425 v_cmp_ge_f32_e64 vcc_lo, v49, -v49 0 0.00
1426 v_mac_f32_e32 v74, v89, v88 0 0.00
1427 v_add_f32_e32 v49, v74, v87 0 0.00
1428 s_and_b32 vcc_lo, s27, vcc_lo 0 0.00
1429 v_cndmask_b32_e64 v74, 0, 0x80000000, vcc_lo 0 0.00
1430 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00
1431 v_xor_b32_e32 v74, v49, v74 0 0.00
1432 _L48:
1433 s_mov_b32 exec_lo, s26 0 0.00
1434 v_mul_f32_e32 v90, v82, v84 0 0.00
1435 v_mul_f32_e32 v87, v80, v84 0 0.00
1436 v_mac_f32_e32 v90, v80, v85 0 0.00
1437 v_mad_f32 v87, v82, v85, -v87 0 0.00
1438 v_mul_f32_e32 v88, v90, v90 0 0.00
1439 v_mac_f32_e32 v88, v87, v87 0 0.00
1440 v_sqrt_f32_e32 v88, v88 0 0.00
1441 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v88 0 0.00
1442 s_and_b32 exec_lo, s26, vcc_lo 0 0.00
1443 v_mov_b32_e32 v49, 0x3eaaaaab 0 0.00
1444 v_mov_b32_e32 v69, 0 0 0.00
1445 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00
1446 s_cbranch_execz _L49 0 0.00
1447 BBF0_53:
1448 v_max_f32_e64 v83, |v90|, |v87| 0 0.00
1449 v_min_f32_e64 v92, |v90|, |v87| 0 0.00
1450 s_mov_b32 s27, 0x3caaae5f 0 0.00
1451 v_cmp_gt_f32_e64 vcc_lo, |v87|, |v90| 0 0.00
1452 v_max_f32_e32 v93, v90, v87 0 0.00
1453 v_rcp_f32_e32 v83, v83 0 0.00
1454 v_mul_f32_e32 v49, v50, v88 0 0.00
1455 v_mul_f32_e32 v83, v92, v83 0 0.00
1456 v_mul_f32_e32 v89, v83, v83 0 0.00
1457 v_madak_f32 v91, s27, v89, 0xbdae5a36 0 0.00
1458 v_madak_f32 v91, v89, v91, 0x3e3876e2 0 0.00
1459 v_madak_f32 v91, v89, v91, 0xbea91d04 0 0.00
1460 v_madak_f32 v92, v89, v91, 0x3f7ff738 0 0.00
1461 v_mul_f32_e32 v89, v83, v92 0 0.00
1462 v_madak_f32 v89, -2.0, v89, 0x3fc90fdb 0 0.00
1463 v_cndmask_b32_e32 v94, 0, v89, vcc_lo 0 0.00
1464 v_min_f32_e32 v89, v90, v87 0 0.00
1465 v_cmp_gt_f32_e64 vcc_lo, -v90, v90 0 0.00
1466 v_cndmask_b32_e64 v87, 0, 0xc0490fdb, vcc_lo 0 0.00
1467 v_cmp_ge_f32_e64 vcc_lo, v93, -v93 0 0.00
1468 v_mac_f32_e32 v94, v83, v92 0 0.00
1469 v_cmp_gt_f32_e64 s27, -v89, v89 0 0.00
1470 v_add_f32_e32 v89, v94, v87 0 0.00
1471 s_and_b32 vcc_lo, s27, vcc_lo 0 0.00
1472 v_cndmask_b32_e64 v83, 0, 0x80000000, vcc_lo 0 0.00
1473 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00
1474 v_xor_b32_e32 v69, v89, v83 0 0.00
1475 _L49:
1476 s_mov_b32 exec_lo, s26 0 0.00
1477 v_mov_b32_e32 v50, v74 0 0.00
1478 _L47:
1479 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00
1480 v_cndmask_b32_e64 v50, v89, 0, s23 0 0.00
1481 v_cndmask_b32_e64 v69, v69, 0, s23 0 0.00
1482 v_cndmask_b32_e64 v54, v54, 0x3eaaaaab, s23 0 0.00
1483 v_cndmask_b32_e64 v49, v49, 0x3eaaaaab, s23 0 0.00
1484 s_mov_b32 exec_lo, s25 0 0.00
1485 v_mul_f32_e32 v87, 0.15915494, v50 0 0.00
1486 v_mul_f32_e32 v89, 0.15915494, v69 0 0.00
1487 v_cos_f32_e32 v92, v87 0 0.00
1488 v_cos_f32_e32 v87, v89 0 0.00
1489 v_mul_f32_e32 v89, v92, v87 0 0.00
1490 v_cmp_lt_f32_e64 s23, v89, 0 0 0.00
1491 s_andn2_b32 exec_lo, s25, s23 0 0.00
1492 s_cbranch_execz _L50 0 0.00
1493 BBF0_54:
1494 v_add_f32_e32 v74, 1.0, v92 0 0.00
1495 v_add_f32_e32 v89, 1.0, v87 0 0.00
1496 v_mul_f32_e32 v90, 0.15915494, v50 0 0.00
1497 v_mul_f32_e32 v91, 0.15915494, v69 0 0.00
1498 v_mul_f32_e32 v97, v49, v54 0 0.00
1499 v_max_f32_e32 v74, 0x3089705f, v74 0 0.00
1500 v_max_f32_e32 v89, 0x3089705f, v89 0 0.00
1501 v_sin_f32_e32 v90, v90 0 0.00
1502 v_add_f32_e32 v101, v69, v50 0 0.00
1503 v_rcp_f32_e32 v74, v74 0 0.00
1504 v_rcp_f32_e32 v96, v89 0 0.00
1505 v_mul_f32_e64 v88, v90, v54 mul:2 0 0.00
1506 v_mul_f32_e32 v87, v87, v90 0 0.00
1507 v_sin_f32_e32 v94, v91 0 0.00
1508 v_mul_f32_e32 v93, 0x3f2aaaab, v74 0 0.00
1509 v_madmk_f32 v74, v74, 0xbf2aaaab, v54 0 0.00
1510 v_madmk_f32 v89, v96, 0xbf2aaaab, v49 0 0.00
1511 v_mul_f32_e32 v91, v93, v90 0 0.00
1512 v_mul_f32_e32 v74, v74, v74 0 0.00
1513 v_mul_f32_e32 v95, 0x3f2aaaab, v96 0 0.00
1514 v_mac_f32_e32 v87, v92, v94 0 0.00
1515 v_mac_f32_e32 v88, v94, v49 0 0.00
1516 v_mac_f32_e32 v91, v90, v93 0 0.00
1517 v_mac_f32_e32 v74, v89, v89 0 0.00
1518 v_mul_f32_e32 v90, v101, v101 0 0.00
1519 v_mac_f32_e32 v88, v94, v49 0 0.00
1520 v_mac_f32_e32 v91, v94, v95 0 0.00
1521 v_sqrt_f32_e32 v92, v74 0 0.00
1522 v_mul_f32_e32 v74, v93, v95 0 0.00
1523 v_mul_f32_e64 v89, |v101|, v90 0 0.00
1524 v_mad_f32 v88, -v97, v87, v88 0 0.00
1525 v_mac_f32_e32 v91, v94, v95 0 0.00
1526 v_mad_f32 v91, -v74, v87, v91 0 0.00
1527 v_sub_f32_e32 v74, v50, v69 0 0.00
1528 v_mul_f32_e32 v87, 0x3e19999a, v91 0 0.00
1529 v_mul_f32_e64 v94, |v74|, 0x3d8f5c29 0 0.00
1530 v_mul_f32_e64 v74, |v74|, 0x3bf5c28f 0 0.00
1531 v_mad_f32 v87, v88, 0x3e19999a, -v87 0 0.00
1532 v_mad_f32 v94, |v101|, 0x3ba3d70a, v94 0 0.00
1533 v_madmk_f32 v54, v89, 0x369b3073, v74 0 0.00
1534 v_mul_f32_e64 v89, |v87|, 0x3fc66666 0 0.00
1535 v_mac_f32_e32 v89, v54, v90 0 0.00
1536 v_mad_f32 v87, v94, v92, v89 0 0.00
1537 _L50:
1538 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00
1539 v_cndmask_b32_e64 v87, v66, 2.0, s23 0 0.00
1540 s_mov_b32 exec_lo, s25 0 0.00
1541 v_mul_f32_e32 v87, v81, v87 0 0.00
1542 v_mul_f32_e32 v87, v52, v87 0 0.00
1543 v_cmp_le_f32_e64 s23, v87, 0x3e800000 0 0.00
1544 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v16 0 0.00
1545 s_or_b32 vcc_lo, s23, vcc_lo 0 0.00
1546 s_andn1_saveexec_b32 s25, vcc_lo 0 0.00
1547 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00
1548 s_andn2_b32 s22, s22, exec_lo 0 0.00
1549 s_cbranch_scc0 _L51 0 0.00
1550 BBF0_55:
1551 s_and_b32 exec_lo, s25, s22 0 0.00
1552 _L46:
1553 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
1554 s_and_b32 exec_lo, s24, s22 0 0.00
1555 s_ff1_i32_b32 s24, exec_lo 0 0.00
1556 s_mov_b32 s23, exec_lo 0 0.00
1557 s_lshl_b32 s25, 1, s24 0 0.00
1558 v_lshlrev_b32_e32 v76, 1, v76 0 0.00
1559 s_and_b32 s25, s25, exec_lo 0 0.00
1560 v_ldexp_f32 v16, v16, -1 0 0.00
1561 s_and_saveexec_b32 s25, s25 0 0.00
1562 s_cbranch_execz _L52 0 0.00
1563 BBF0_56:
1564 s_bcnt1_i32_b32 s26, s23 0 0.00
1565 v_mov_b32_e32 v78, s26 0 0.00
1566 s_waitcnt_depctr 0xffe3 0 0.00
1567 buffer_atomic_add v78, off, s[8:11], 0 offset:32 glc 0 0.00
1568 _L52:
1569 s_waitcnt_depctr 0xffe3 0 0.00
1570 s_mov_b32 exec_lo, s25 0 0.00
1571 v_mbcnt_lo_u32_b32 v79, s23, 0 0 0.00
1572 s_waitcnt vmcnt(0) 0 0.00
1573 v_readlane_b32 s23, v78, s24 0 0.00
1574 v_cvt_f32_u32_e32 v80, v76 0 0.00
1575 v_mov_b32_e32 v81, v16 0 0.00
1576 v_add_nc_i32 v78, s23, v79 0 0.00
1577 v_mov_b32_e32 v79, v61 0 0.00
1578 v_mul_lo_u32 v78, v78, 12 0 0.00
1579 s_waitcnt_depctr 0xffe3 0 0.00
1580 buffer_store_dwordx3 v[79:81], v78, s[16:19], 0 offen glc 0 0.00
1581 s_branch _L53 0 0.00
1582 _L51:
1583 s_mov_b32 exec_lo, s21 0 0.00
1584 v_add_nc_u32_e32 v49, 1, v76 0 0.00
1585 s_mov_b32 s21, exec_lo 0 0.00
1586 s_ff1_i32_b32 s22, exec_lo 0 0.00
1587 v_ffbl_b32_e32 v64, v49 0 0.00
1588 s_lshl_b32 s23, 1, s22 0 0.00
1589 s_and_b32 s23, s23, exec_lo 0 0.00
1590 v_min_u32_e32 v64, 32, v64 0 0.00
1591 v_lshlrev_b32_e64 v66, v64, 1 0 0.00
1592 v_cvt_f32_u32_e32 v66, v66 0 0.00
1593 v_lshrrev_b32_e32 v49, v64, v49 0 0.00
1594 v_mul_f32_e32 v16, v16, v66 0 0.00
1595 s_and_saveexec_b32 s23, s23 0 0.00
1596 s_cbranch_execz _L54 0 0.00
1597 BBF0_57:
1598 s_bcnt1_i32_b32 s24, s21 0 0.00
1599 v_mov_b32_e32 v64, s24 0 0.00
1600 s_waitcnt_depctr 0xffe3 0 0.00
1601 buffer_atomic_add v64, off, s[8:11], 0 offset:32 glc 0 0.00
1602 _L54:
1603 s_waitcnt_depctr 0xffe3 0 0.00
1604 s_mov_b32 exec_lo, s23 0 0.00
1605 s_waitcnt vmcnt(0) 0 0.00
1606 v_readlane_b32 s22, v64, s22 0 0.00
1607 v_sub_f32_e32 v73, v69, v50 0 0.00
1608 v_add_f32_e32 v75, v50, v69 0 0.00
1609 v_mbcnt_lo_u32_b32 v64, s21, 0 0 0.00
1610 v_cvt_f32_u32_e32 v92, v49 0 0.00
1611 v_mov_b32_e32 v94, v16 0 0.00
1612 v_mul_f32_e32 v77, v73, v73 0 0.00
1613 v_mul_f32_e32 v76, v75, v75 0 0.00
1614 v_mov_b32_e32 v93, v92 0 0.00
1615 v_mov_b32_e32 v92, v62 0 0.00
1616 v_mad_f32 v89, v77, 0xbccccccd, 1.0 0 0.00
1617 s_mov_b32 s26, 0xb84c68e7 0 0.00
1618 s_mov_b32 s21, 0xbc6a0ea1 0 0.00
1619 s_mov_b32 s23, 0x3979a934 0 0.00
1620 s_mov_b32 s24, 0x388fa325 0 0.00
1621 s_mov_b32 s25, 0x3b21e3b8 0 0.00
1622 v_madak_f32 v66, s26, v77, 0x3a088889 0 0.00
1623 v_add_nc_i32 v64, s22, v64 0 0.00
1624 v_madak_f32 v83, s21, v77, 0x40c00000 0 0.00
1625 v_madak_f32 v87, s25, v77, 0xbd2aaaab 0 0.00
1626 v_madak_f32 v74, s24, v77, 0xba3b3ee7 0 0.00
1627 v_madak_f32 v88, s23, v77, 0xbdcccccd 0 0.00
1628 v_madmk_f32 v91, v76, 0xb6500cec, v66 0 0.00
1629 v_mul_f32_e32 v66, v77, v77 0 0.00
1630 v_mul_lo_u32 v64, v64, 12 0 0.00
1631 v_madmk_f32 v74, v76, 0xb70526e7, v74 0 0.00
1632 v_madmk_f32 v83, v66, 0xb8c28a7f, v83 0 0.00
1633 v_madmk_f32 v89, v66, 0x39b3719e, v89 0 0.00
1634 v_mul_f32_e32 v90, v77, v66 0 0.00
1635 v_madmk_f32 v77, v66, 0x378e44a1, v88 0 0.00
1636 v_madmk_f32 v66, v66, 0xb81c6fca, v87 0 0.00
1637 v_madmk_f32 v83, v90, 0x3494ab4c, v83 0 0.00
1638 v_mac_f32_e32 v77, v74, v76 0 0.00
1639 v_mac_f32_e32 v66, v91, v76 0 0.00
1640 v_madmk_f32 v87, v90, 0xb601da25, v89 0 0.00
1641 v_mac_f32_e32 v83, v77, v76 0 0.00
1642 v_mac_f32_e32 v87, v66, v76 0 0.00
1643 v_rcp_f32_e32 v66, v81 0 0.00
1644 s_waitcnt_depctr 0xffe3 0 0.00
1645 buffer_store_dwordx3 v[92:94], v64, s[16:19], 0 offen glc 0 0.00
1646 v_mul_f32_e32 v64, v73, v83 0 0.00
1647 v_mul_f32_e64 v73, v5, v87 div:2 0 0.00
1648 v_cmp_gt_f32_e64 vcc_lo, 0x3a83126f, |v64| 0 0.00
1649 v_mul_f32_e32 v66, v73, v66 0 0.00
1650 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00
1651 s_cbranch_execz _L55 0 0.00
1652 BBF0_58:
1653 v_mad_f32 v83, v64, -0.5, v75 0 0.00
1654 v_cmp_gt_f32_e64 s22, 0x3a83126f, |v66| 0 0.00
1655 s_andn1_saveexec_b32 s23, s22 0 0.00
1656 s_cbranch_execz _L56 0 0.00
1657 BBF0_59:
1658 v_mad_f32 v73, -v66, v83, -1.0 0 0.00
1659 v_mul_f32_e32 v74, v64, v66 0 0.00
1660 v_mad_f32 v73, -v83, v66, v73 0 0.00
1661 v_ldexp_f32 v32, -v74, 1 0 0.00
1662 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v73| 0 0.00
1663 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00
1664 s_cbranch_execz _L57 0 0.00
1665 BBF0_60:
1666 v_add_f32_e64 v77, |v73|, -1.0 0 0.00
1667 v_mov_b32_e32 v88, 0xbf4f5c29 0 0.00
1668 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v73| 0 0.00
1669 v_cndmask_b32_e64 v91, 0.5, 0x3f23fe5d, vcc_lo 0 0.00
1670 v_mov_b32_e32 v89, 0x3f6a311b 0 0.00
1671 v_sqrt_f32_e64 v90, |v77| 0 0.00
1672 v_cndmask_b32_e32 v88, 0xbe1fbe77, v88, vcc_lo 0 0.00
1673 s_mov_b32 s25, 0x3f715bef 0 0.00
1674 v_cndmask_b32_e32 v94, 0x3e255531, v89, vcc_lo 0 0.00
1675 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v73| 0 0.00
1676 v_mad_f32 v88, v91, |v73|, v88 0 0.00
1677 v_mul_f32_e32 v77, v77, v90 0 0.00
1678 v_mad_f32 v94, v88, |v73|, v94 0 0.00
1679 v_madak_f32 v77, s25, v77, 0x3f490fdb 0 0.00
1680 v_cndmask_b32_e32 v77, v94, v77, vcc_lo 0 0.00
1681 _L57:
1682 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
1683 v_mul_f32_e64 v77, |v73|, 0x3e32e5ab 0 0.00
1684 v_sin_f32_e32 v77, v77 0 0.00
1685 v_mul_f32_e32 v77, 0x3f693710, v77 0 0.00
1686 s_mov_b32 exec_lo, s24 0 0.00
1687 v_cmp_gt_f32_e32 vcc_lo, 0, v73 0 0.00
1688 v_cndmask_b32_e64 v88, 0, -1, vcc_lo 0 0.00
1689 v_cmp_lt_f32_e32 vcc_lo, 0, v73 0 0.00
1690 v_mad_f32 v91, v74, -2.0, v73 0 0.00
1691 v_add_co_ci_u32_e64 v88, vcc_lo, v88, 0, vcc_lo 0 0.00
1692 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v91| 0 0.00
1693 v_cvt_f32_i32_e32 v88, v88 0 0.00
1694 v_mul_f32_e32 v53, v77, v88 0 0.00
1695 s_andn2_b32 exec_lo, s24, vcc_lo 0 0.00
1696 s_cbranch_execz _L58 0 0.00
1697 BBF0_61:
1698 v_add_f32_e64 v88, |v91|, -1.0 0 0.00
1699 v_mov_b32_e32 v74, 0xbf4f5c29 0 0.00
1700 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v91| 0 0.00
1701 v_cndmask_b32_e64 v90, 0.5, 0x3f23fe5d, vcc_lo 0 0.00
1702 s_mov_b32 s25, 0x3f715bef 0 0.00
1703 v_sqrt_f32_e64 v89, |v88| 0 0.00
1704 v_cndmask_b32_e32 v93, 0xbe1fbe77, v74, vcc_lo 0 0.00
1705 v_mov_b32_e32 v74, 0x3f6a311b 0 0.00
1706 v_mad_f32 v93, v90, |v91|, v93 0 0.00
1707 v_cndmask_b32_e32 v74, 0x3e255531, v74, vcc_lo 0 0.00
1708 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v91| 0 0.00
1709 v_mul_f32_e32 v88, v88, v89 0 0.00
1710 v_mad_f32 v74, v93, |v91|, v74 0 0.00
1711 v_madak_f32 v88, s25, v88, 0x3f490fdb 0 0.00
1712 v_cndmask_b32_e32 v88, v74, v88, vcc_lo 0 0.00
1713 _L58:
1714 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
1715 v_mul_f32_e64 v74, |v91|, 0x3e32e5ab 0 0.00
1716 v_sin_f32_e32 v74, v74 0 0.00
1717 v_mul_f32_e32 v88, 0x3f693710, v74 0 0.00
1718 s_mov_b32 exec_lo, s24 0 0.00
1719 v_rcp_f32_e32 v89, v32 0 0.00
1720 v_mul_f32_e32 v74, v64, v73 0 0.00
1721 v_mad_f32 v83, -v74, v89, v83 0 0.00
1722 v_cmp_gt_f32_e32 vcc_lo, 0, v91 0 0.00
1723 v_cndmask_b32_e64 v74, 0, -1, vcc_lo 0 0.00
1724 v_cmp_lt_f32_e32 vcc_lo, 0, v91 0 0.00
1725 v_mad_f32 v66, v83, v66, 1.0 0 0.00
1726 v_add_co_ci_u32_e64 v74, vcc_lo, v74, 0, vcc_lo 0 0.00
1727 v_mul_f32_e32 v66, v83, v66 0 0.00
1728 v_cvt_f32_i32_e32 v74, v74 0 0.00
1729 v_sqrt_f32_e64 v66, |v66| 0 0.00
1730 v_mad_f32 v88, v88, v74, -v53 0 0.00
1731 v_mul_f32_e32 v66, v88, v66 0 0.00
1732 v_mul_f32_e32 v54, v66, v89 0 0.00
1733 _L56:
1734 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00
1735 s_cbranch_execz _L59 0 0.00
1736 BBF0_62:
1737 v_sqrt_f32_e64 v66, |v83| 0 0.00
1738 v_add_f32_e32 v73, v64, v83 0 0.00
1739 v_mov_b32_e32 v32, v64 0 0.00
1740 v_sqrt_f32_e64 v74, |v73| 0 0.00
1741 v_mul_f32_e32 v53, v83, v66 0 0.00
1742 v_mad_f32 v88, v73, v74, -v53 0 0.00
1743 v_rcp_f32_e32 v73, v64 0 0.00
1744 v_mul_f32_e32 v74, 0x3f2aaaab, v88 0 0.00
1745 v_mul_f32_e32 v54, v74, v73 0 0.00
1746 v_mov_b32_e32 v73, v83 0 0.00
1747 _L59:
1748 s_mov_b32 exec_lo, s23 0 0.00
1749 v_mov_b32_e32 v74, v73 0 0.00
1750 v_mov_b32_e32 v83, v53 0 0.00
1751 v_cndmask_b32_e64 v76, 0, 2, s22 0 0.00
1752 _L55:
1753 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00
1754 s_cbranch_execz _L60 0 0.00
1755 BBF0_63:
1756 v_mad_f32 v66, v75, v66, 1.0 0 0.00
1757 v_mov_b32_e32 v32, 0 0 0.00
1758 v_mov_b32_e32 v74, 0 0 0.00
1759 v_mov_b32_e32 v83, 0 0 0.00
1760 v_mov_b32_e32 v76, 1 0 0.00
1761 v_mul_f32_e32 v66, v75, v66 0 0.00
1762 v_mov_b32_e32 v88, 0 0 0.00
1763 v_sqrt_f32_e64 v54, |v66| 0 0.00
1764 _L60:
1765 s_mov_b32 exec_lo, s21 0 0.00
1766 v_ldexp_f32 v77, v87, -2 0 0.00
1767 v_mov_b32_e32 v69, 0 0 0.00
1768 s_movk_i32 s23, 0xffff 0 0.00
1769 s_mov_b32 s21, exec_lo 0 0.00
1770 s_mov_b32 s22, exec_lo 0 0.00
1771 v_rcp_f32_e32 v90, v77 0 0.00
1772 v_mul_f32_e32 v77, v63, v81 0 0.00
1773 v_mul_f32_e32 v77, v77, v90 0 0.00
1774 v_sqrt_f32_e32 v90, v77 0 0.00
1775 v_max_f32_e32 v77, 0x358637bd, v81 0 0.00
1776 v_mul_f32_e32 v73, v90, v54 0 0.00
1777 v_rcp_f32_e32 v90, v77 0 0.00
1778 v_ldexp_f32 v77, v64, -1 0 0.00
1779 v_ceil_f32_e32 v73, v73 0 0.00
1780 v_max_f32_e32 v73, 1.0, v73 0 0.00
1781 v_mul_f32_e64 v92, v5, v90 div:2 0 0.00
1782 v_cvt_u32_f32_e32 v91, v73 0 0.00
1783 s_nop 0 0 0.00
1784 s_nop 0 0 0.00
1785 s_nop 0 0 0.00
1786 s_nop 0 0 0.00
1787 _L71:
1788 v_cmp_eq_i32_e64 s23, s23, 0 0 0.00
1789 v_add_co_ci_u32_e64 v81, vcc_lo, v69, 0, s23 0 0.00
1790 v_cmp_gt_u32_e32 vcc_lo, v91, v81 0 0.00
1791 s_and_saveexec_b32 s24, vcc_lo 0 0.00
1792 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
1793 s_andn2_b32 s22, s22, exec_lo 0 0.00
1794 s_cbranch_scc0 _L61 0 0.00
1795 BBF0_64:
1796 s_and_b32 exec_lo, s24, s22 0 0.00
1797 v_add_co_ci_u32_e64 v69, vcc_lo, v69, 1, s23 0 0.00
1798 v_cmp_eq_f32_e64 s23, v86, 1.0 0 0.00
1799 v_cmp_eq_i32_e32 vcc_lo, v91, v69 0 0.00
1800 s_and_b32 vcc_lo, vcc_lo, s23 0 0.00
1801 s_andn1_saveexec_b32 s23, vcc_lo 0 0.00
1802 s_cbranch_execz _L62 0 0.00
1803 BBF0_65:
1804 v_rcp_f32_e32 v94, v73 0 0.00
1805 v_cvt_f32_u32_e32 v69, v69 0 0.00
1806 v_cmp_ne_i32_e32 vcc_lo, 1, v76 0 0.00
1807 v_mul_f32_e32 v96, v69, v94 0 0.00
1808 s_and_saveexec_b32 s24, vcc_lo 0 0.00
1809 s_cbranch_execz _L63 0 0.00
1810 BBF0_66:
1811 v_mad_f32 v69, v88, v96, v83 0 0.00
1812 v_cmp_ne_i32_e32 vcc_lo, 2, v76 0 0.00
1813 s_and_saveexec_b32 s25, vcc_lo 0 0.00
1814 s_cbranch_execz _L64 0 0.00
1815 BBF0_67:
1816 v_cmp_gt_f32_e64 vcc_lo, 0x3f337960, |v69| 0 0.00
1817 s_andn1_saveexec_b32 s26, vcc_lo 0 0.00
1818 s_cbranch_execz _L65 0 0.00
1819 BBF0_68:
1820 v_cmp_gt_f32_e64 vcc_lo, 0x3f673b59, |v69| 0 0.00
1821 s_andn1_saveexec_b32 s27, vcc_lo 0 0.00
1822 s_cbranch_execz _L66 0 0.00
1823 BBF0_69:
1824 v_mov_b32_e32 v89, 0xbf83a110 0 0.00
1825 v_cmp_gt_f32_e64 vcc_lo, 0x40027ca5, |v69| 0 0.00
1826 v_cndmask_b32_e64 v94, 2.0, 0x3fc7d00b, vcc_lo 0 0.00
1827 v_cndmask_b32_e32 v95, 0xbe98df6c, v89, vcc_lo 0 0.00
1828 v_mad_f32 v95, v94, |v69|, v95 0 0.00
1829 v_mov_b32_e32 v89, 0x3f21d928 0 0.00
1830 v_sqrt_f32_e32 v93, v95 0 0.00
1831 v_cndmask_b32_e32 v94, 0x3e1fbe77, v89, vcc_lo 0 0.00
1832 v_add_f32_e32 v94, v94, v93 0 0.00
1833 _L66:
1834 s_andn2_b32 exec_lo, s27, exec_lo 0 0.00
1835 s_cbranch_execz _L67 0 0.00
1836 BBF0_70:
1837 v_add_f32_e64 v89, |v69|, 0xbf490fdb 0 0.00
1838 v_log_f32_e64 v93, |v89| 0 0.00
1839 v_cmp_gt_f32_e32 vcc_lo, 0, v89 0 0.00
1840 v_cndmask_b32_e64 v94, 0, -1, vcc_lo 0 0.00
1841 v_cmp_lt_f32_e32 vcc_lo, 0, v89 0 0.00
1842 v_mul_f32_e32 v89, 0x3f2aaaab, v93 0 0.00
1843 v_add_co_ci_u32_e64 v93, vcc_lo, v94, 0, vcc_lo 0 0.00
1844 v_exp_f32_e32 v94, v89 0 0.00
1845 v_cvt_f32_i32_e32 v89, v93 0 0.00
1846 v_mul_f32_e32 v89, v94, v89 0 0.00
1847 v_mad_f32 v94, v89, 0x3f852018, 1.0 0 0.00
1848 _L67:
1849 s_mov_b32 exec_lo, s27 0 0.00
1850 _L65:
1851 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00
1852 s_cbranch_execz _L68 0 0.00
1853 BBF0_71:
1854 s_mov_b32 s27, 0xbca86ba3 0 0.00
1855 v_mul_f32_e64 v89, |v69|, 0x3f8c8168 0 0.00
1856 v_mad_f32 v93, |v69|, 0xbf8c8168, 1.0 0 0.00
1857 v_mad_f32 v95, |v69|, s27, 0x3d981627 0 0.00
1858 v_sqrt_f32_e32 v93, v93 0 0.00
1859 v_madak_f32 v95, v95, v89, 0xbe593484 0 0.00
1860 v_madak_f32 v94, v95, v89, 0x3fc90da4 0 0.00
1861 v_mad_f32 v89, -v94, v93, 0x3fc90fdb 0 0.00
1862 v_mul_f32_e32 v94, 0x3f693710, v89 0 0.00
1863 _L68:
1864 s_mov_b32 exec_lo, s26 0 0.00
1865 v_cmp_gt_f32_e32 vcc_lo, 0, v69 0 0.00
1866 v_cndmask_b32_e64 v89, 0, -1, vcc_lo 0 0.00
1867 v_cmp_lt_f32_e32 vcc_lo, 0, v69 0 0.00
1868 v_add_co_ci_u32_e64 v69, vcc_lo, v89, 0, vcc_lo 0 0.00
1869 v_cvt_f32_i32_e32 v69, v69 0 0.00
1870 v_mul_f32_e32 v69, v94, v69 0 0.00
1871 _L64:
1872 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00
1873 s_cbranch_execz _L69 0 0.00
1874 BBF0_72:
1875 v_log_f32_e64 v89, |v69| 0 0.00
1876 v_cmp_gt_f32_e32 vcc_lo, 0, v69 0 0.00
1877 v_cndmask_b32_e64 v93, 0, -1, vcc_lo 0 0.00
1878 v_cmp_lt_f32_e32 vcc_lo, 0, v69 0 0.00
1879 v_mul_f32_e32 v69, 0x3f2aaaab, v89 0 0.00
1880 v_add_co_ci_u32_e64 v89, vcc_lo, v93, 0, vcc_lo 0 0.00
1881 v_exp_f32_e32 v69, v69 0 0.00
1882 v_cvt_f32_i32_e32 v94, v89 0 0.00
1883 v_mul_f32_e32 v69, v94, v69 0 0.00
1884 _L69:
1885 s_mov_b32 exec_lo, s25 0 0.00
1886 v_rcp_f32_e32 v94, v32 0 0.00
1887 v_sub_f32_e32 v69, v69, v74 0 0.00
1888 v_mul_f32_e32 v96, v69, v94 0 0.00
1889 _L63:
1890 s_mov_b32 exec_lo, s24 0 0.00
1891 v_add_f32_e64 v94, v96, -1.0 div:2 0 0.00
1892 v_mul_f32_e32 v95, v96, v96 0 0.00
1893 v_add_f32_e64 v97, v96, -2.0 div:2 0 0.00
1894 v_mad_f32 v94, v64, v94, v75 0 0.00
1895 v_mul_f32_e32 v69, v64, v95 0 0.00
1896 v_mad_f32 v103, v77, v97, v75 0 0.00
1897 v_ldexp_f32 v97, v96, -1 0 0.00
1898 v_mul_f32_e32 v94, v96, v94 0 0.00
1899 v_ldexp_f32 v100, v69, -1 0 0.00
1900 v_mad_f32 v97, v103, v97, -v50 0 0.00
1901 v_mul_f32_e32 v104, v94, v94 0 0.00
1902 v_mul_f32_e64 v95, v69, v100 div:2 0 0.00
1903 v_mul_f32_e64 v89, v69, v94 div:2 0 0.00
1904 v_mul_f32_e32 v97, 0.15915494, v97 0 0.00
1905 v_mul_f32_e32 v101, v104, v104 0 0.00
1906 v_mul_f32_e32 v93, v104, v95 0 0.00
1907 v_mac_f32_e32 v89, v94, v100 0 0.00
1908 v_mul_f32_e32 v107, v95, v95 0 0.00
1909 v_mad_f32 v103, v95, 0xbbcccccd, 1.0 0 0.00
1910 v_mac_f32_e32 v93, v104, v95 0 0.00
1911 v_mul_f32_e64 v99, v89, v104 mul:2 0 0.00
1912 v_mul_f32_e32 v105, v89, v95 0 0.00
1913 v_madmk_f32 v103, v104, 0xbd2aaaab, v103 0 0.00
1914 v_mac_f32_e32 v93, v89, v89 0 0.00
1915 v_mul_f32_e32 v106, v89, v99 0 0.00
1916 v_mul_f32_e32 v102, v94, v105 0 0.00
1917 v_mul_f32_e32 v98, 0x38c30c31, v93 0 0.00
1918 v_mac_f32_e32 v106, v101, v95 0 0.00
1919 v_mac_f32_e32 v102, v94, v105 0 0.00
1920 v_mad_f32 v105, v100, v94, v89 0 0.00
1921 v_madmk_f32 v98, v101, 0x3a088889, v98 0 0.00
1922 v_mac_f32_e32 v106, v93, v104 0 0.00
1923 v_mac_f32_e32 v102, v93, v100 0 0.00
1924 v_mul_f32_e64 v93, v69, v95 div:2 0 0.00
1925 v_madmk_f32 v98, v107, 0x3797b426, v98 0 0.00
1926 v_mul_f32_e32 v93, 0x39c30c31, v93 0 0.00
1927 v_add_f32_e32 v95, v103, v98 0 0.00
1928 v_mul_f32_e32 v98, 0x3672b9d6, v102 0 0.00
1929 v_mul_f32_e32 v102, v105, v94 0 0.00
1930 v_mul_f32_e32 v103, v94, v99 0 0.00
1931 v_mul_f32_e32 v105, v104, v101 0 0.00
1932 v_madmk_f32 v95, v106, 0xb521d13a, v95 0 0.00
1933 v_add_f32_e32 v106, -1.0, v96 0 0.00
1934 v_madmk_f32 v102, v102, 0x3b088889, v93 0 0.00
1935 v_mac_f32_e32 v103, v101, v100 0 0.00
1936 v_mac_f32_e32 v99, v89, v104 0 0.00
1937 v_madmk_f32 v89, v105, 0xb6500d01, v95 0 0.00
1938 v_mad_f32 v95, v77, v106, v75 0 0.00
1939 v_mad_f32 v102, v69, 0x3d2aaaab, -v102 0 0.00
1940 v_madmk_f32 v93, v103, 0x379c09c1, v98 0 0.00
1941 v_mul_f32_e32 v94, v94, v99 0 0.00
1942 v_mul_f32_e32 v98, v101, v101 0 0.00
1943 v_mad_f32 v69, v95, v96, -v50 0 0.00
1944 v_rcp_f32_e32 v95, v87 0 0.00
1945 v_add_f32_e32 v99, v102, v93 0 0.00
1946 v_mac_f32_e32 v94, v100, v101 0 0.00
1947 v_madmk_f32 v89, v98, 0x3238ef1d, v89 0 0.00
1948 v_sin_f32_e32 v93, v97 0 0.00
1949 v_mul_f32_e32 v69, 0.15915494, v69 0 0.00
1950 v_mul_f32_e32 v94, v94, v104 0 0.00
1951 v_mul_f32_e32 v96, v96, v95 0 0.00
1952 v_cos_f32_e32 v95, v97 0 0.00
1953 v_mul_f32_e32 v98, v96, v93 0 0.00
1954 v_madmk_f32 v93, v94, 0xb3b8ef1d, v99 0 0.00
1955 v_cos_f32_e32 v97, v69 0 0.00
1956 v_mul_f32_e32 v94, v89, v98 0 0.00
1957 v_mul_f32_e32 v96, v96, v95 0 0.00
1958 v_sin_f32_e32 v69, v69 0 0.00
1959 v_mul_f32_e32 v98, v93, v98 0 0.00
1960 v_mad_f32 v94, -v93, v96, -v94 0 0.00
1961 v_mad_f32 v98, v89, v96, -v98 0 0.00
1962 v_mac_f32_e32 v94, v92, v97 0 0.00
1963 v_mac_f32_e32 v98, v92, v69 0 0.00
1964 v_mul_f32_e32 v95, v85, v94 0 0.00
1965 v_mul_f32_e32 v96, v84, v94 0 0.00
1966 v_mad_f32 v95, v84, v98, -v95 0 0.00
1967 v_mac_f32_e32 v96, v85, v98 0 0.00
1968 v_add_f32_e32 v94, v65, v95 0 0.00
1969 v_add_f32_e32 v66, v70, v96 0 0.00
1970 _L62:
1971 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00
1972 v_mov_b32_e32 v66, v38 0 0.00
1973 v_mov_b32_e32 v94, v48 0 0.00
1974 s_mov_b32 exec_lo, s23 0 0.00
1975 s_ff1_i32_b32 s24, exec_lo 0 0.00
1976 s_mov_b32 s23, exec_lo 0 0.00
1977 s_lshl_b32 s25, 1, s24 0 0.00
1978 s_and_b32 s25, s25, exec_lo 0 0.00
1979 s_and_saveexec_b32 s25, s25 0 0.00
1980 s_cbranch_execz _L70 0 0.00
1981 BBF0_73:
1982 s_bcnt1_i32_b32 s26, s23 0 0.00
1983 v_mov_b32_e32 v89, s26 0 0.00
1984 s_waitcnt_depctr 0xffe3 0 0.00
1985 buffer_atomic_add v89, off, s[8:11], 0 offset:28 glc 0 0.00
1986 _L70:
1987 s_waitcnt_depctr 0xffe3 0 0.00
1988 s_mov_b32 exec_lo, s25 0 0.00
1989 s_waitcnt vmcnt(0) 0 0.00
1990 v_readlane_b32 s24, v89, s24 0 0.00
1991 v_cndmask_b32_e64 v93, v36, v66, s3 0 0.00
1992 v_cndmask_b32_e64 v54, v66, v36, s3 0 0.00
1993 v_cndmask_b32_e64 v95, v41, v94, s3 0 0.00
1994 v_cndmask_b32_e64 v96, v94, v41, s3 0 0.00
1995 v_mbcnt_lo_u32_b32 v89, s23, 0 0 0.00
1996 v_mov_b32_e32 v36, v66 0 0.00
1997 v_mul_f32_e32 v98, v44, v54 0 0.00
1998 v_mul_f32_e32 v53, v40, v54 0 0.00
1999 v_mul_f32_e32 v54, v44, v93 0 0.00
2000 v_mul_f32_e32 v93, v40, v93 0 0.00
2001 v_mov_b32_e32 v41, v94 0 0.00
2002 v_mac_f32_e32 v98, v45, v96 0 0.00
2003 v_mac_f32_e32 v53, v34, v96 0 0.00
2004 v_mac_f32_e32 v54, v45, v95 0 0.00
2005 v_mac_f32_e32 v93, v34, v95 0 0.00
2006 v_add_nc_i32 v89, s24, v89 0 0.00
2007 v_add_f32_e32 v97, v28, v98 0 0.00
2008 v_add_f32_e32 v98, v24, v53 0 0.00
2009 v_add_f32_e32 v99, v28, v54 0 0.00
2010 v_add_f32_e32 v100, v24, v93 0 0.00
2011 v_mul_lo_u32 v89, v89, 24 0 0.00
2012 s_movk_i32 s23, 0x0 0 0.00
2013 v_mov_b32_e32 v69, v81 0 0.00
2014 s_waitcnt_depctr 0xffe3 0 0.00
2015 s_clause 0x1 0 0.00
2016 buffer_store_dword v7, v89, s[12:15], 0 offen glc 0 0.00
2017 buffer_store_dwordx4 v[97:100], v89, s[12:15], 0 offen offset:8 glc 0 0.00
2018 v_min3_f32 v72, v97, v99, v72 0 0.00
2019 v_min3_f32 v71, v98, v100, v71 0 0.00
2020 v_max3_f32 v68, v97, v99, v68 0 0.00
2021 v_max3_f32 v67, v98, v100, v67 0 0.00
2022 s_branch _L71 0 0.00
2023 _L61:
2024 s_mov_b32 exec_lo, s21 0 0.00
2025 v_mov_b32_e32 v64, v86 0 0.00
2026 v_mov_b32_e32 v53, v80 0 0.00
2027 v_mov_b32_e32 v32, v82 0 0.00
2028 v_mov_b32_e32 v70, v79 0 0.00
2029 v_mov_b32_e32 v65, v78 0 0.00
2030 v_mov_b32_e32 v104, v76 0 0.00
2031 v_mov_b32_e32 v69, v73 0 0.00
2032 s_branch _L72 0 0.00
2033 _L41:
2034 s_mov_b32 exec_lo, s2 0 0.00
2035 v_add_f32_e32 v38, v25, v111 0 0.00
2036 v_subrev_f32_e32 v43, v21, v15 0 0.00
2037 v_cmp_eq_f32_e64 vcc_lo, -v20, 0 0 0.00
2038 s_andn2_b32 exec_lo, s2, vcc_lo 0 0.00
2039 s_cbranch_execz _L73 0 0.00
2040 BBF0_74:
2041 v_add_f32_e32 v25, v11, v14 0 0.00
2042 v_add_f32_e32 v40, v12, v13 0 0.00
2043 v_sub_f32_e32 v41, v12, v13 0 0.00
2044 v_sub_f32_e32 v42, v11, v14 0 0.00
2045 v_mad_f32 v64, -v20, v29, v23 0 0.00
2046 v_mul_f32_e32 v44, v25, v25 0 0.00
2047 v_mul_f32_e32 v25, v40, v40 0 0.00
2048 v_mad_f32 v63, -v20, v31, v22 0 0.00
2049 v_mov_b32_e32 v102, v15 0 0.00
2050 v_mov_b32_e32 v103, v111 0 0.00
2051 v_mac_f32_e32 v44, v41, v41 0 0.00
2052 v_mac_f32_e32 v25, v42, v42 0 0.00
2053 v_mov_b32_e32 v8, v14 0 0.00
2054 v_mov_b32_e32 v16, v19 0 0.00
2055 v_mov_b32_e32 v29, v12 0 0.00
2056 v_sqrt_f32_e32 v40, v44 0 0.00
2057 v_mov_b32_e32 v31, v11 0 0.00
2058 v_mov_b32_e32 v44, v43 0 0.00
2059 v_mov_b32_e32 v45, v38 0 0.00
2060 v_sqrt_f32_e32 v25, v25 0 0.00
2061 v_mov_b32_e32 v49, v23 0 0.00
2062 v_mov_b32_e32 v23, v18 0 0.00
2063 v_mad_f32 v2, v40, 0.5, v25 0 0.00
2064 v_mov_b32_e32 v25, v13 0 0.00
2065 _L73:
2066 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
2067 s_cbranch_execz _L74 0 0.00
2068 BBF0_75:
2069 v_mul_f32_e32 v40, v13, v22 0 0.00
2070 v_mul_f32_e32 v25, v14, v22 0 0.00
2071 v_mul_f32_e32 v42, v13, v110 0 0.00
2072 v_mul_f32_e32 v8, v13, v15 0 0.00
2073 v_mul_f32_e32 v41, v14, v15 0 0.00
2074 v_mul_f32_e32 v22, v13, v108 0 0.00
2075 v_mac_f32_e32 v40, v11, v23 0 0.00
2076 v_mac_f32_e32 v25, v12, v23 0 0.00
2077 v_mac_f32_e32 v42, v11, v109 0 0.00
2078 v_mul_f32_e32 v29, v14, v110 0 0.00
2079 v_mac_f32_e32 v8, v11, v111 0 0.00
2080 v_mac_f32_e32 v41, v12, v111 0 0.00
2081 v_mul_f32_e32 v1, v14, v108 0 0.00
2082 v_mac_f32_e32 v22, v11, v4 0 0.00
2083 v_add_f32_e32 v49, v18, v40 0 0.00
2084 v_add_f32_e32 v63, v19, v25 0 0.00
2085 v_add_f32_e32 v25, v18, v42 0 0.00
2086 v_mac_f32_e32 v29, v12, v109 0 0.00
2087 v_add_f32_e32 v103, v18, v8 0 0.00
2088 v_add_f32_e32 v102, v19, v41 0 0.00
2089 v_mac_f32_e32 v1, v12, v4 0 0.00
2090 v_add_f32_e32 v4, v18, v22 0 0.00
2091 v_add_f32_e32 v110, v19, v29 0 0.00
2092 v_mov_b32_e32 v2, 1.0 0 0.00
2093 v_mov_b32_e32 v8, 1.0 0 0.00
2094 v_mov_b32_e32 v16, 0 0 0.00
2095 v_mov_b32_e32 v23, 0 0 0.00
2096 v_mov_b32_e32 v29, 0 0 0.00
2097 v_mov_b32_e32 v31, 1.0 0 0.00
2098 v_mov_b32_e32 v109, v25 0 0.00
2099 v_mov_b32_e32 v44, v102 0 0.00
2100 v_mov_b32_e32 v45, v103 0 0.00
2101 v_mov_b32_e32 v22, v63 0 0.00
2102 v_mov_b32_e32 v64, v49 0 0.00
2103 v_mov_b32_e32 v25, 0 0 0.00
2104 v_add_f32_e32 v108, v19, v1 0 0.00
2105 _L74:
2106 s_mov_b32 exec_lo, s2 0 0.00
2107 v_cmp_eq_f32_sdwa s2, v4, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00
2108 v_cmp_eq_f32_sdwa s6, v108, v22 src0_sel:DWORD src1_sel:DWORD 0 0.00
2109 v_cmp_eq_f32_sdwa s3, v109, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00
2110 v_cmp_eq_f32_e32 vcc_lo, v110, v22 0 0.00
2111 s_and_b32 s2, s2, s6 0 0.00
2112 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00
2113 s_and_b32 s2, s2, vcc_lo 0 0.00
2114 v_cmp_eq_f32_sdwa s3, v103, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00
2115 v_cmp_eq_f32_e32 vcc_lo, v102, v22 0 0.00
2116 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00
2117 s_and_b32 vcc_lo, s2, vcc_lo 0 0.00
2118 s_andn1_saveexec_b32 s2, vcc_lo 0 0.00
2119 s_cbranch_execz _L75 0 0.00
2120 BBF0_76:
2121 v_sub_f32_e32 v54, v4, v49 0 0.00
2122 v_sub_f32_e32 v58, v108, v22 0 0.00
2123 v_mul_f32_e32 v51, v54, v54 0 0.00
2124 v_mac_f32_e32 v51, v58, v58 0 0.00
2125 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v51 0 0.00
2126 s_and_saveexec_b32 s3, vcc_lo 0 0.00
2127 s_cbranch_execz _L76 0 0.00
2128 BBF0_77:
2129 v_sub_f32_e32 v51, v109, v4 0 0.00
2130 v_sub_f32_e32 v52, v110, v108 0 0.00
2131 v_sub_f32_e32 v55, v103, v109 0 0.00
2132 v_mul_f32_e32 v53, 0x360637b4, v51 0 0.00
2133 v_mul_f32_e32 v51, 0x360637b4, v52 0 0.00
2134 v_madmk_f32 v52, v54, 0x3f7fffde, v53 0 0.00
2135 v_madmk_f32 v51, v58, 0x3f7fffde, v51 0 0.00
2136 v_sub_f32_e32 v53, v102, v110 0 0.00
2137 v_madmk_f32 v32, v55, 0x2b8cbccc, v52 0 0.00
2138 v_madmk_f32 v34, v53, 0x2b8cbccc, v51 0 0.00
2139 _L76:
2140 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
2141 v_mov_b32_e32 v34, v58 0 0.00
2142 v_mov_b32_e32 v32, v54 0 0.00
2143 s_mov_b32 exec_lo, s3 0 0.00
2144 s_waitcnt lgkmcnt(0) 0 0.00
2145 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
2146 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00
2147 s_load_dwordx4 s[16:19], s[0:1], 0xc0 0 0.00
2148 v_mov_b32_e32 v28, v49 0 0.00
2149 v_mov_b32_e32 v106, v67 0 0.00
2150 v_mov_b32_e32 v36, v22 0 0.00
2151 v_mov_b32_e32 v40, 1.0 0 0.00
2152 v_mov_b32_e32 v41, 0 0 0.00
2153 v_mov_b32_e32 v107, v68 0 0.00
2154 v_mov_b32_e32 v42, v71 0 0.00
2155 v_mov_b32_e32 v48, v72 0 0.00
2156 v_mov_b32_e32 v65, 0 0 0.00
2157 v_sub_f32_e32 v55, v109, v4 0 0.00
2158 v_sub_f32_e32 v56, v110, v108 0 0.00
2159 v_sub_f32_e32 v53, v103, v109 0 0.00
2160 v_sub_f32_e32 v57, v102, v110 0 0.00
2161 v_add_nc_u32_e32 v59, 0x2000, v3 0 0.00
2162 v_add_nc_u32_e32 v61, 0x1000, v3 0 0.00
2163 v_ldexp_f32 v60, v2, -3 0 0.00
2164 v_cmp_ge_f32_e64 s3, -v20, 0 0 0.00
2165 s_mov_b32 s6, exec_lo 0 0.00
2166 s_mov_b32 s20, exec_lo 0 0.00
2167 _L106:
2168 v_cvt_f32_u32_e32 v69, v41 0 0.00
2169 v_mul_f32_e32 v66, v40, v69 0 0.00
2170 v_readfirstlane_b32 s21, v66 0 0.00
2171 v_cmp_eq_f32_e64 vcc_lo, s21, 1.0 0 0.00
2172 s_andn1_saveexec_b32 s22, vcc_lo 0 0.00
2173 s_andn2_b32 exec_lo, s22, exec_lo 0 0.00
2174 s_andn2_b32 s20, s20, exec_lo 0 0.00
2175 s_cbranch_scc0 _L77 0 0.00
2176 BBF0_78:
2177 s_mov_b32 exec_lo, s22 0 0.00
2178 s_and_b32 exec_lo, exec_lo, s20 0 0.00
2179 s_mov_b32 s22, exec_lo 0 0.00
2180 s_mov_b32 s23, exec_lo 0 0.00
2181 v_mul_f32_e32 v68, v32, v32 0 0.00
2182 v_mac_f32_e32 v68, v34, v34 0 0.00
2183 _L87:
2184 v_add_f32_e32 v66, s21, v40 0 0.00
2185 v_sub_f32_e32 v69, 1.0, v66 0 0.00
2186 v_mul_f32_e32 v76, v66, v66 0 0.00
2187 v_mul_f32_e32 v70, v66, v69 0 0.00
2188 v_mul_f32_e32 v73, v69, v69 0 0.00
2189 v_mul_f32_e32 v74, 0x40400000, v70 0 0.00
2190 v_mul_f32_e64 v72, v70, v55 mul:2 0 0.00
2191 v_mul_f32_e32 v71, 0x40400000, v73 0 0.00
2192 v_mul_f32_e64 v75, v70, v56 mul:2 0 0.00
2193 v_mul_f32_e32 v77, v109, v74 0 0.00
2194 v_mac_f32_e32 v72, v54, v73 0 0.00
2195 v_mul_f32_e32 v74, v110, v74 0 0.00
2196 v_mac_f32_e32 v75, v58, v73 0 0.00
2197 v_mac_f32_e32 v77, v4, v71 0 0.00
2198 v_mad_f32 v105, v53, v76, v72 0 0.00
2199 v_mac_f32_e32 v74, v108, v71 0 0.00
2200 v_mad_f32 v47, v57, v76, v75 0 0.00
2201 v_mul_f32_e32 v71, v69, v73 0 0.00
2202 v_mac_f32_e32 v77, v103, v76 0 0.00
2203 v_mul_f32_e32 v70, v105, v105 0 0.00
2204 v_mac_f32_e32 v74, v102, v76 0 0.00
2205 v_mul_f32_e32 v76, v66, v77 0 0.00
2206 v_mac_f32_e32 v70, v47, v47 0 0.00
2207 v_mul_f32_e32 v69, v66, v74 0 0.00
2208 v_mad_f32 v1, v49, v71, v76 0 0.00
2209 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v70 0 0.00
2210 v_mac_f32_e32 v69, v22, v71 0 0.00
2211 s_and_saveexec_b32 s24, vcc_lo 0 0.00
2212 s_cbranch_execz _L78 0 0.00
2213 BBF0_79:
2214 v_add_f32_e32 v70, 0xb58637bd, v66 0 0.00
2215 v_cmp_gt_f32_e32 vcc_lo, 1.0, v66 0 0.00
2216 v_sub_f32_e32 v72, 1.0, v70 0 0.00
2217 v_mul_f32_e32 v75, v70, v70 0 0.00
2218 v_mul_f32_e32 v73, v70, v72 0 0.00
2219 v_mul_f32_e32 v77, v72, v72 0 0.00
2220 v_mul_f32_e64 v80, v73, v55 mul:2 0 0.00
2221 v_mul_f32_e64 v71, v73, v56 mul:2 0 0.00
2222 v_mac_f32_e32 v80, v54, v77 0 0.00
2223 v_mac_f32_e32 v71, v58, v77 0 0.00
2224 v_mad_f32 v105, v53, v75, v80 0 0.00
2225 v_mad_f32 v47, v57, v75, v71 0 0.00
2226 s_and_saveexec_b32 s25, vcc_lo 0 0.00
2227 s_cbranch_execz _L78 0 0.00
2228 BBF0_80:
2229 v_mul_f32_e32 v66, 0x40400000, v73 0 0.00
2230 v_mul_f32_e32 v79, 0x40400000, v77 0 0.00
2231 v_mul_f32_e32 v74, v72, v77 0 0.00
2232 v_mul_f32_e32 v69, v109, v66 0 0.00
2233 v_mul_f32_e32 v66, v110, v66 0 0.00
2234 v_mac_f32_e32 v69, v4, v79 0 0.00
2235 v_mac_f32_e32 v66, v108, v79 0 0.00
2236 v_mac_f32_e32 v69, v103, v75 0 0.00
2237 v_mac_f32_e32 v66, v102, v75 0 0.00
2238 v_mul_f32_e32 v72, v70, v69 0 0.00
2239 v_mul_f32_e32 v69, v70, v66 0 0.00
2240 v_mov_b32_e32 v66, v70 0 0.00
2241 v_mad_f32 v1, v49, v74, v72 0 0.00
2242 v_mac_f32_e32 v69, v22, v74 0 0.00
2243 _L78:
2244 s_mov_b32 exec_lo, s24 0 0.00
2245 s_ff1_i32_b32 s25, exec_lo 0 0.00
2246 s_mov_b32 s24, exec_lo 0 0.00
2247 s_lshl_b32 s26, 1, s25 0 0.00
2248 s_and_b32 s26, s26, exec_lo 0 0.00
2249 s_and_saveexec_b32 s26, s26 0 0.00
2250 s_cbranch_execz _L79 0 0.00
2251 BBF0_81:
2252 s_bcnt1_i32_b32 s27, s24 0 0.00
2253 v_mov_b32_e32 v72, s27 0 0.00
2254 s_waitcnt lgkmcnt(0) 0 0.00
2255 s_waitcnt_depctr 0xffe3 0 0.00
2256 buffer_atomic_add v72, off, s[8:11], 0 offset:32 glc 0 0.00
2257 _L79:
2258 s_waitcnt_depctr 0xffe3 0 0.00
2259 s_mov_b32 exec_lo, s26 0 0.00
2260 s_waitcnt vmcnt(0) 0 0.00
2261 v_readlane_b32 s25, v72, s25 0 0.00
2262 v_mbcnt_lo_u32_b32 v72, s24, 0 0 0.00
2263 v_mov_b32_e32 v81, v3 0 0.00
2264 v_mov_b32_e32 v82, s21 0 0.00
2265 v_mov_b32_e32 v83, v66 0 0.00
2266 v_sub_f32_e32 v75, v66, v65 0 0.00
2267 v_mul_f32_e32 v79, v105, v105 0 0.00
2268 v_sub_f32_e32 v76, v1, v28 0 0.00
2269 v_sub_f32_e32 v80, v69, v36 0 0.00
2270 v_mul_f32_e32 v77, v75, v75 0 0.00
2271 v_mac_f32_e32 v79, v47, v47 0 0.00
2272 v_mul_f32_e32 v74, v76, v76 0 0.00
2273 v_add_nc_i32 v72, s25, v72 0 0.00
2274 v_mad_f32 v50, v80, v80, v74 0 0.00
2275 v_mul_lo_u32 v72, v72, 12 0 0.00
2276 s_waitcnt lgkmcnt(0) 0 0.00
2277 s_waitcnt_depctr 0xffe3 0 0.00
2278 buffer_store_dwordx3 v[81:83], v72, s[16:19], 0 offen glc 0 0.00
2279 v_mul_f32_e32 v72, v68, v77 0 0.00
2280 v_mul_f32_e32 v77, v77, v79 0 0.00
2281 v_sqrt_f32_e32 v78, v50 0 0.00
2282 v_cmp_lt_f32_e64 s24, v72, 0x2b8cbccc 0 0.00
2283 v_cmp_lt_f32_e64 s26, v77, 0x2b8cbccc 0 0.00
2284 v_cmp_le_f32_e32 vcc_lo, 0x358637bd, v78 0 0.00
2285 s_and_b32 s24, s24, s26 0 0.00
2286 s_or_b32 vcc_lo, vcc_lo, s24 0 0.00
2287 s_and_saveexec_b32 s25, vcc_lo 0 0.00
2288 s_cbranch_execz _L80 0 0.00
2289 BBF0_82:
2290 v_cmp_lt_f32_e64 s27, v50, 0x358637bd 0 0.00
2291 s_andn1_saveexec_b32 s28, s27 0 0.00
2292 s_cbranch_execz _L81 0 0.00
2293 BBF0_83:
2294 v_mul_f32_e32 v77, v32, v76 0 0.00
2295 v_mul_f32_e32 v81, v32, v80 0 0.00
2296 v_max_f32_e32 v72, 0x358637bd, v50 0 0.00
2297 v_mac_f32_e32 v77, v34, v80 0 0.00
2298 v_mad_f32 v81, v34, v76, -v81 0 0.00
2299 v_rcp_f32_e32 v72, v72 0 0.00
2300 v_mul_f32_e32 v24, v77, v77 0 0.00
2301 v_mac_f32_e32 v24, v81, v81 0 0.00
2302 v_sqrt_f32_e32 v74, v24 0 0.00
2303 v_mul_f32_e32 v24, v75, v72 0 0.00
2304 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v74 0 0.00
2305 s_and_saveexec_b32 s29, vcc_lo 0 0.00
2306 v_mov_b32_e32 v46, 0x3eaaaaab 0 0.00
2307 v_mov_b32_e32 v71, 0 0 0.00
2308 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00
2309 s_cbranch_execz _L82 0 0.00
2310 BBF0_84:
2311 v_max_f32_e64 v72, |v77|, |v81| 0 0.00
2312 v_min_f32_e64 v82, |v77|, |v81| 0 0.00
2313 s_mov_b32 s30, 0x3caaae5f 0 0.00
2314 v_min_f32_e32 v84, v77, v81 0 0.00
2315 v_cmp_gt_f32_e64 vcc_lo, |v81|, |v77| 0 0.00
2316 v_rcp_f32_e32 v72, v72 0 0.00
2317 v_mul_f32_e32 v46, v24, v74 0 0.00
2318 v_mul_f32_e32 v72, v82, v72 0 0.00
2319 v_mul_f32_e32 v75, v72, v72 0 0.00
2320 v_madak_f32 v82, s30, v75, 0xbdae5a36 0 0.00
2321 v_cmp_gt_f32_e64 s30, -v84, v84 0 0.00
2322 v_madak_f32 v82, v75, v82, 0x3e3876e2 0 0.00
2323 v_madak_f32 v82, v75, v82, 0xbea91d04 0 0.00
2324 v_madak_f32 v85, v75, v82, 0x3f7ff738 0 0.00
2325 v_mul_f32_e32 v75, v72, v85 0 0.00
2326 v_madak_f32 v75, -2.0, v75, 0x3fc90fdb 0 0.00
2327 v_cndmask_b32_e32 v82, 0, v75, vcc_lo 0 0.00
2328 v_max_f32_e32 v75, v77, v81 0 0.00
2329 v_cmp_gt_f32_e64 vcc_lo, -v77, v77 0 0.00
2330 v_cndmask_b32_e64 v77, 0, 0xc0490fdb, vcc_lo 0 0.00
2331 v_mac_f32_e32 v82, v72, v85 0 0.00
2332 v_cmp_ge_f32_e64 vcc_lo, v75, -v75 0 0.00
2333 v_add_f32_e32 v72, v82, v77 0 0.00
2334 s_and_b32 vcc_lo, s30, vcc_lo 0 0.00
2335 v_cndmask_b32_e64 v75, 0, 0x80000000, vcc_lo 0 0.00
2336 v_xor_b32_e32 v71, v72, v75 0 0.00
2337 _L82:
2338 s_mov_b32 exec_lo, s29 0 0.00
2339 v_mul_f32_e32 v75, v105, v76 0 0.00
2340 v_mul_f32_e32 v77, v47, v76 0 0.00
2341 v_mac_f32_e32 v75, v47, v80 0 0.00
2342 v_mad_f32 v77, v105, v80, -v77 0 0.00
2343 v_mul_f32_e32 v79, v75, v75 0 0.00
2344 v_mac_f32_e32 v79, v77, v77 0 0.00
2345 v_sqrt_f32_e32 v50, v79 0 0.00
2346 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v50 0 0.00
2347 s_and_b32 exec_lo, s29, vcc_lo 0 0.00
2348 v_mov_b32_e32 v104, 0x3eaaaaab 0 0.00
2349 v_mov_b32_e32 v70, 0 0 0.00
2350 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00
2351 s_cbranch_execz _L83 0 0.00
2352 BBF0_85:
2353 v_max_f32_e64 v81, |v75|, |v77| 0 0.00
2354 s_mov_b32 s30, 0x3caaae5f 0 0.00
2355 v_min_f32_e32 v85, v75, v77 0 0.00
2356 v_cmp_gt_f32_e64 vcc_lo, |v77|, |v75| 0 0.00
2357 v_mul_f32_e32 v104, v24, v50 0 0.00
2358 v_rcp_f32_e32 v82, v81 0 0.00
2359 v_min_f32_e64 v81, |v75|, |v77| 0 0.00
2360 v_mul_f32_e32 v82, v81, v82 0 0.00
2361 v_mul_f32_e32 v81, v82, v82 0 0.00
2362 v_madak_f32 v83, s30, v81, 0xbdae5a36 0 0.00
2363 v_cmp_gt_f32_e64 s30, -v85, v85 0 0.00
2364 v_madak_f32 v83, v81, v83, 0x3e3876e2 0 0.00
2365 v_madak_f32 v83, v81, v83, 0xbea91d04 0 0.00
2366 v_madak_f32 v81, v81, v83, 0x3f7ff738 0 0.00
2367 v_mul_f32_e32 v83, v82, v81 0 0.00
2368 v_madak_f32 v83, -2.0, v83, 0x3fc90fdb 0 0.00
2369 v_cndmask_b32_e32 v84, 0, v83, vcc_lo 0 0.00
2370 v_max_f32_e32 v83, v75, v77 0 0.00
2371 v_cmp_gt_f32_e64 vcc_lo, -v75, v75 0 0.00
2372 v_cndmask_b32_e64 v75, 0, 0xc0490fdb, vcc_lo 0 0.00
2373 v_mac_f32_e32 v84, v82, v81 0 0.00
2374 v_cmp_ge_f32_e64 vcc_lo, v83, -v83 0 0.00
2375 v_add_f32_e32 v75, v84, v75 0 0.00
2376 s_and_b32 vcc_lo, s30, vcc_lo 0 0.00
2377 v_cndmask_b32_e64 v77, 0, 0x80000000, vcc_lo 0 0.00
2378 v_xor_b32_e32 v70, v75, v77 0 0.00
2379 _L83:
2380 s_mov_b32 exec_lo, s29 0 0.00
2381 _L81:
2382 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00
2383 v_cndmask_b32_e64 v72, 0, -1, s24 0 0.00
2384 v_cndmask_b32_e64 v75, 0, -1, s26 0 0.00
2385 v_cndmask_b32_e64 v46, v79, 0x3eaaaaab, s27 0 0.00
2386 v_cndmask_b32_e64 v104, v104, 0x3eaaaaab, s27 0 0.00
2387 v_cndmask_b32_e64 v71, v72, 0, s27 0 0.00
2388 v_cndmask_b32_e64 v70, v75, 0, s27 0 0.00
2389 s_mov_b32 exec_lo, s28 0 0.00
2390 v_mul_f32_e32 v79, 0.15915494, v71 0 0.00
2391 v_mul_f32_e32 v81, 0.15915494, v70 0 0.00
2392 v_cos_f32_e32 v82, v79 0 0.00
2393 v_cos_f32_e32 v79, v81 0 0.00
2394 v_mul_f32_e32 v81, v82, v79 0 0.00
2395 v_cmp_lt_f32_e64 s24, v81, 0 0 0.00
2396 s_andn2_b32 exec_lo, s28, s24 0 0.00
2397 s_cbranch_execz _L84 0 0.00
2398 BBF0_86:
2399 v_add_f32_e32 v77, 1.0, v82 0 0.00
2400 v_add_f32_e32 v81, 1.0, v79 0 0.00
2401 v_mul_f32_e32 v83, 0.15915494, v71 0 0.00
2402 v_mul_f32_e32 v84, 0.15915494, v70 0 0.00
2403 v_mul_f32_e32 v86, v104, v46 0 0.00
2404 v_max_f32_e32 v77, 0x3089705f, v77 0 0.00
2405 v_max_f32_e32 v81, 0x3089705f, v81 0 0.00
2406 v_sin_f32_e32 v85, v83 0 0.00
2407 v_add_f32_e32 v88, v70, v71 0 0.00
2408 v_rcp_f32_e32 v77, v77 0 0.00
2409 v_sin_f32_e32 v84, v84 0 0.00
2410 v_mul_f32_e32 v79, v79, v85 0 0.00
2411 v_mul_f32_e32 v83, 0x3f2aaaab, v77 0 0.00
2412 v_madmk_f32 v87, v77, 0xbf2aaaab, v46 0 0.00
2413 v_rcp_f32_e32 v77, v81 0 0.00
2414 v_mul_f32_e64 v81, v85, v46 mul:2 0 0.00
2415 v_mac_f32_e32 v79, v82, v84 0 0.00
2416 v_mul_f32_e32 v90, v83, v85 0 0.00
2417 v_mul_f32_e32 v24, v87, v87 0 0.00
2418 v_mac_f32_e32 v81, v84, v104 0 0.00
2419 v_mac_f32_e32 v90, v85, v83 0 0.00
2420 v_madmk_f32 v85, v77, 0xbf2aaaab, v104 0 0.00
2421 v_mul_f32_e32 v77, 0x3f2aaaab, v77 0 0.00
2422 v_mac_f32_e32 v81, v84, v104 0 0.00
2423 v_mul_f32_e32 v104, v88, v88 0 0.00
2424 v_mac_f32_e32 v24, v85, v85 0 0.00
2425 v_mac_f32_e32 v90, v84, v77 0 0.00
2426 v_mul_f32_e32 v74, v83, v77 0 0.00
2427 v_mad_f32 v81, -v86, v79, v81 0 0.00
2428 v_mul_f32_e64 v83, |v88|, v104 0 0.00
2429 v_sqrt_f32_e32 v24, v24 0 0.00
2430 v_mac_f32_e32 v90, v84, v77 0 0.00
2431 v_mad_f32 v90, -v74, v79, v90 0 0.00
2432 v_sub_f32_e32 v74, v71, v70 0 0.00
2433 v_mul_f32_e32 v79, 0x3e19999a, v90 0 0.00
2434 v_mul_f32_e64 v82, |v74|, 0x3d8f5c29 0 0.00
2435 v_mul_f32_e64 v74, |v74|, 0x3bf5c28f 0 0.00
2436 v_mad_f32 v79, v81, 0x3e19999a, -v79 0 0.00
2437 v_mad_f32 v82, |v88|, 0x3ba3d70a, v82 0 0.00
2438 v_madmk_f32 v83, v83, 0x369b3073, v74 0 0.00
2439 v_mul_f32_e64 v74, |v79|, 0x3fc66666 0 0.00
2440 v_mac_f32_e32 v74, v83, v104 0 0.00
2441 v_mad_f32 v77, v82, v24, v74 0 0.00
2442 _L84:
2443 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00
2444 v_cndmask_b32_e64 v77, v50, 2.0, s24 0 0.00
2445 s_mov_b32 exec_lo, s28 0 0.00
2446 v_mul_f32_e32 v74, v78, v77 0 0.00
2447 v_mul_f32_e32 v74, v2, v74 0 0.00
2448 v_cmp_le_f32_e64 s24, v74, 0x3e800000 0 0.00
2449 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v40 0 0.00
2450 s_or_b32 vcc_lo, s24, vcc_lo 0 0.00
2451 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00
2452 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
2453 s_andn2_b32 s23, s23, exec_lo 0 0.00
2454 s_cbranch_scc0 _L85 0 0.00
2455 BBF0_87:
2456 s_and_b32 exec_lo, s24, s23 0 0.00
2457 _L80:
2458 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00
2459 s_and_b32 exec_lo, s25, s23 0 0.00
2460 s_ff1_i32_b32 s25, exec_lo 0 0.00
2461 s_mov_b32 s24, exec_lo 0 0.00
2462 s_lshl_b32 s26, 1, s25 0 0.00
2463 v_lshlrev_b32_e32 v41, 1, v41 0 0.00
2464 s_and_b32 s26, s26, exec_lo 0 0.00
2465 v_ldexp_f32 v40, v40, -1 0 0.00
2466 s_and_saveexec_b32 s26, s26 0 0.00
2467 s_cbranch_execz _L86 0 0.00
2468 BBF0_88:
2469 s_bcnt1_i32_b32 s27, s24 0 0.00
2470 v_mov_b32_e32 v66, s27 0 0.00
2471 s_waitcnt_depctr 0xffe3 0 0.00
2472 buffer_atomic_add v66, off, s[8:11], 0 offset:32 glc 0 0.00
2473 _L86:
2474 s_waitcnt_depctr 0xffe3 0 0.00
2475 s_mov_b32 exec_lo, s26 0 0.00
2476 v_mbcnt_lo_u32_b32 v69, s24, 0 0 0.00
2477 s_waitcnt vmcnt(0) 0 0.00
2478 v_readlane_b32 s24, v66, s25 0 0.00
2479 v_cvt_f32_u32_e32 v72, v41 0 0.00
2480 v_mov_b32_e32 v71, v40 0 0.00
2481 v_mov_b32_e32 v70, v72 0 0.00
2482 v_add_nc_i32 v66, s24, v69 0 0.00
2483 v_mov_b32_e32 v69, v59 0 0.00
2484 v_mul_lo_u32 v66, v66, 12 0 0.00
2485 s_waitcnt_depctr 0xffe3 0 0.00
2486 buffer_store_dwordx3 v[69:71], v66, s[16:19], 0 offen glc 0 0.00
2487 s_branch _L87 0 0.00
2488 _L85:
2489 s_mov_b32 exec_lo, s22 0 0.00
2490 v_add_nc_u32_e32 v34, 1, v41 0 0.00
2491 s_mov_b32 s21, exec_lo 0 0.00
2492 s_ff1_i32_b32 s22, exec_lo 0 0.00
2493 v_ffbl_b32_e32 v24, v34 0 0.00
2494 s_lshl_b32 s23, 1, s22 0 0.00
2495 s_and_b32 s23, s23, exec_lo 0 0.00
2496 v_min_u32_e32 v24, 32, v24 0 0.00
2497 v_lshlrev_b32_e64 v50, v24, 1 0 0.00
2498 v_cvt_f32_u32_e32 v67, v50 0 0.00
2499 v_lshrrev_b32_e32 v41, v24, v34 0 0.00
2500 v_mul_f32_e32 v40, v40, v67 0 0.00
2501 s_and_saveexec_b32 s23, s23 0 0.00
2502 s_cbranch_execz _L88 0 0.00
2503 BBF0_89:
2504 s_bcnt1_i32_b32 s24, s21 0 0.00
2505 v_mov_b32_e32 v46, s24 0 0.00
2506 s_waitcnt_depctr 0xffe3 0 0.00
2507 buffer_atomic_add v46, off, s[8:11], 0 offset:32 glc 0 0.00
2508 _L88:
2509 s_waitcnt_depctr 0xffe3 0 0.00
2510 s_mov_b32 exec_lo, s23 0 0.00
2511 v_sub_f32_e32 v65, v70, v71 0 0.00
2512 s_waitcnt vmcnt(0) 0 0.00
2513 v_readlane_b32 s22, v46, s22 0 0.00
2514 v_mbcnt_lo_u32_b32 v46, s21, 0 0 0.00
2515 v_mov_b32_e32 v84, v61 0 0.00
2516 v_mov_b32_e32 v86, v40 0 0.00
2517 v_mul_f32_e32 v67, v65, v65 0 0.00
2518 v_add_f32_e32 v77, v71, v70 0 0.00
2519 v_mul_f32_e32 v82, v67, v67 0 0.00
2520 v_mad_f32 v83, v67, 0xbccccccd, 1.0 0 0.00
2521 v_mul_f32_e32 v81, v77, v77 0 0.00
2522 s_mov_b32 s21, 0xbc6a0ea1 0 0.00
2523 s_mov_b32 s23, 0x3979a934 0 0.00
2524 s_mov_b32 s24, 0x388fa325 0 0.00
2525 s_mov_b32 s25, 0x3b21e3b8 0 0.00
2526 s_mov_b32 s26, 0xb84c68e7 0 0.00
2527 v_madmk_f32 v83, v82, 0x39b3719e, v83 0 0.00
2528 v_madak_f32 v68, s21, v67, 0x40c00000 0 0.00
2529 v_madak_f32 v50, s26, v67, 0x3a088889 0 0.00
2530 v_madak_f32 v72, s25, v67, 0xbd2aaaab 0 0.00
2531 v_madak_f32 v74, s24, v67, 0xba3b3ee7 0 0.00
2532 v_madak_f32 v79, s23, v67, 0xbdcccccd 0 0.00
2533 v_madmk_f32 v85, v82, 0xb8c28a7f, v68 0 0.00
2534 v_mul_f32_e32 v68, v67, v82 0 0.00
2535 v_cvt_f32_u32_e32 v67, v41 0 0.00
2536 v_add_nc_i32 v46, s22, v46 0 0.00
2537 v_madmk_f32 v79, v82, 0x378e44a1, v79 0 0.00
2538 v_madmk_f32 v72, v82, 0xb81c6fca, v72 0 0.00
2539 v_madmk_f32 v82, v68, 0x3494ab4c, v85 0 0.00
2540 v_mov_b32_e32 v85, v67 0 0.00
2541 v_mul_lo_u32 v46, v46, 12 0 0.00
2542 v_madmk_f32 v50, v81, 0xb6500cec, v50 0 0.00
2543 v_madmk_f32 v74, v81, 0xb70526e7, v74 0 0.00
2544 s_waitcnt_depctr 0xffe3 0 0.00
2545 buffer_store_dwordx3 v[84:86], v46, s[16:19], 0 offen glc 0 0.00
2546 v_mac_f32_e32 v79, v74, v81 0 0.00
2547 v_mac_f32_e32 v72, v50, v81 0 0.00
2548 v_madmk_f32 v46, v68, 0xb601da25, v83 0 0.00
2549 v_rcp_f32_e32 v50, v78 0 0.00
2550 v_mac_f32_e32 v82, v79, v81 0 0.00
2551 v_mac_f32_e32 v46, v72, v81 0 0.00
2552 v_mul_f32_e32 v74, v65, v82 0 0.00
2553 v_mul_f32_e64 v65, -v20, v46 0 0.00
2554 v_cmp_gt_f32_e64 vcc_lo, 0x3a83126f, |v74| 0 0.00
2555 v_mul_f32_e32 v67, v65, v50 0 0.00
2556 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00
2557 s_cbranch_execz _L89 0 0.00
2558 BBF0_90:
2559 v_mad_f32 v65, v74, -0.5, v77 0 0.00
2560 v_cmp_gt_f32_e64 s22, 0x3a83126f, |v67| 0 0.00
2561 s_andn1_saveexec_b32 s23, s22 0 0.00
2562 s_cbranch_execz _L90 0 0.00
2563 BBF0_91:
2564 v_mad_f32 v50, -v67, v65, -1.0 0 0.00
2565 v_mul_f32_e32 v68, v74, v67 0 0.00
2566 v_mad_f32 v50, -v65, v67, v50 0 0.00
2567 v_ldexp_f32 v24, -v68, 1 0 0.00
2568 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v50| 0 0.00
2569 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00
2570 s_cbranch_execz _L91 0 0.00
2571 BBF0_92:
2572 v_add_f32_e64 v79, |v50|, -1.0 0 0.00
2573 v_mov_b32_e32 v81, 0xbf4f5c29 0 0.00
2574 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v50| 0 0.00
2575 v_cndmask_b32_e64 v83, 0.5, 0x3f23fe5d, vcc_lo 0 0.00
2576 v_mov_b32_e32 v82, 0x3f6a311b 0 0.00
2577 v_sqrt_f32_e64 v84, |v79| 0 0.00
2578 v_cndmask_b32_e32 v81, 0xbe1fbe77, v81, vcc_lo 0 0.00
2579 s_mov_b32 s25, 0x3f715bef 0 0.00
2580 v_cndmask_b32_e32 v85, 0x3e255531, v82, vcc_lo 0 0.00
2581 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v50| 0 0.00
2582 v_mad_f32 v81, v83, |v50|, v81 0 0.00
2583 v_mul_f32_e32 v79, v79, v84 0 0.00
2584 v_mad_f32 v85, v81, |v50|, v85 0 0.00
2585 v_madak_f32 v82, s25, v79, 0x3f490fdb 0 0.00
2586 v_cndmask_b32_e32 v81, v85, v82, vcc_lo 0 0.00
2587 _L91:
2588 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
2589 v_mul_f32_e64 v79, |v50|, 0x3e32e5ab 0 0.00
2590 v_sin_f32_e32 v79, v79 0 0.00
2591 v_mul_f32_e32 v81, 0x3f693710, v79 0 0.00
2592 s_mov_b32 exec_lo, s24 0 0.00
2593 v_cmp_gt_f32_e32 vcc_lo, 0, v50 0 0.00
2594 v_cndmask_b32_e64 v79, 0, -1, vcc_lo 0 0.00
2595 v_cmp_lt_f32_e32 vcc_lo, 0, v50 0 0.00
2596 v_mad_f32 v68, v68, -2.0, v50 0 0.00
2597 v_add_co_ci_u32_e64 v79, vcc_lo, v79, 0, vcc_lo 0 0.00
2598 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v68| 0 0.00
2599 v_cvt_f32_i32_e32 v79, v79 0 0.00
2600 v_mul_f32_e32 v34, v81, v79 0 0.00
2601 s_andn2_b32 exec_lo, s24, vcc_lo 0 0.00
2602 s_cbranch_execz _L92 0 0.00
2603 BBF0_93:
2604 v_add_f32_e64 v79, |v68|, -1.0 0 0.00
2605 v_mov_b32_e32 v81, 0xbf4f5c29 0 0.00
2606 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v68| 0 0.00
2607 v_cndmask_b32_e64 v83, 0.5, 0x3f23fe5d, vcc_lo 0 0.00
2608 s_mov_b32 s25, 0x3f715bef 0 0.00
2609 v_sqrt_f32_e64 v84, |v79| 0 0.00
2610 v_cndmask_b32_e32 v86, 0xbe1fbe77, v81, vcc_lo 0 0.00
2611 v_mov_b32_e32 v81, 0x3f6a311b 0 0.00
2612 v_mad_f32 v86, v83, |v68|, v86 0 0.00
2613 v_cndmask_b32_e32 v87, 0x3e255531, v81, vcc_lo 0 0.00
2614 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v68| 0 0.00
2615 v_mul_f32_e32 v79, v79, v84 0 0.00
2616 v_mad_f32 v87, v86, |v68|, v87 0 0.00
2617 v_madak_f32 v81, s25, v79, 0x3f490fdb 0 0.00
2618 v_cndmask_b32_e32 v79, v87, v81, vcc_lo 0 0.00
2619 _L92:
2620 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00
2621 v_mul_f32_e64 v79, |v68|, 0x3e32e5ab 0 0.00
2622 v_sin_f32_e32 v79, v79 0 0.00
2623 v_mul_f32_e32 v79, 0x3f693710, v79 0 0.00
2624 s_mov_b32 exec_lo, s24 0 0.00
2625 v_rcp_f32_e32 v83, v24 0 0.00
2626 v_mul_f32_e32 v84, v74, v50 0 0.00
2627 v_mad_f32 v65, -v84, v83, v65 0 0.00
2628 v_cmp_gt_f32_e32 vcc_lo, 0, v68 0 0.00
2629 v_cndmask_b32_e64 v81, 0, -1, vcc_lo 0 0.00
2630 v_cmp_lt_f32_e32 vcc_lo, 0, v68 0 0.00
2631 v_mad_f32 v67, v65, v67, 1.0 0 0.00
2632 v_add_co_ci_u32_e64 v68, vcc_lo, v81, 0, vcc_lo 0 0.00
2633 v_mul_f32_e32 v65, v65, v67 0 0.00
2634 v_cvt_f32_i32_e32 v68, v68 0 0.00
2635 v_sqrt_f32_e64 v67, |v65| 0 0.00
2636 v_mad_f32 v70, v79, v68, -v34 0 0.00
2637 v_mul_f32_e32 v68, v70, v67 0 0.00
2638 v_mul_f32_e32 v67, v68, v83 0 0.00
2639 _L90:
2640 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00
2641 s_cbranch_execz _L93 0 0.00
2642 BBF0_94:
2643 v_sqrt_f32_e64 v50, |v65| 0 0.00
2644 v_add_f32_e32 v68, v74, v65 0 0.00
2645 v_mov_b32_e32 v24, v74 0 0.00
2646 v_sqrt_f32_e64 v82, |v68| 0 0.00
2647 v_mul_f32_e32 v34, v65, v50 0 0.00
2648 v_mov_b32_e32 v50, v65 0 0.00
2649 v_mad_f32 v70, v68, v82, -v34 0 0.00
2650 v_rcp_f32_e32 v68, v74 0 0.00
2651 v_mul_f32_e32 v79, 0x3f2aaaab, v70 0 0.00
2652 v_mul_f32_e32 v67, v79, v68 0 0.00
2653 _L93:
2654 s_mov_b32 exec_lo, s23 0 0.00
2655 v_cndmask_b32_e64 v68, 0, 2, s22 0 0.00
2656 _L89:
2657 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00
2658 s_cbranch_execz _L94 0 0.00
2659 BBF0_95:
2660 v_mad_f32 v50, v77, v67, 1.0 0 0.00
2661 v_mov_b32_e32 v24, 0 0 0.00
2662 v_mov_b32_e32 v34, 0 0 0.00
2663 v_mov_b32_e32 v68, 1 0 0.00
2664 v_mov_b32_e32 v70, 0 0 0.00
2665 v_mul_f32_e32 v50, v77, v50 0 0.00
2666 v_sqrt_f32_e64 v67, |v50| 0 0.00
2667 v_mov_b32_e32 v50, 0 0 0.00
2668 _L94:
2669 s_mov_b32 exec_lo, s21 0 0.00
2670 v_ldexp_f32 v72, v46, -2 0 0.00
2671 v_mul_f32_e32 v83, v60, v78 0 0.00
2672 v_mov_b32_e32 v32, 0 0 0.00
2673 s_movk_i32 s26, 0xffff 0 0.00
2674 v_ldexp_f32 v82, v74, -1 0 0.00
2675 v_rcp_f32_e32 v72, v72 0 0.00
2676 s_mov_b32 s24, exec_lo 0 0.00
2677 s_mov_b32 s25, exec_lo 0 0.00
2678 v_rcp_f32_e32 v65, v24 0 0.00
2679 v_mul_f32_e32 v72, v83, v72 0 0.00
2680 v_cmp_eq_f32_e64 s21, v66, 1.0 0 0.00
2681 v_cmp_ne_i32_e64 s22, v68, 1 0 0.00
2682 v_cmp_ne_i32_e64 s23, v68, 2 0 0.00
2683 v_rcp_f32_e32 v68, v46 0 0.00
2684 v_sqrt_f32_e32 v72, v72 0 0.00
2685 v_mul_f32_e32 v67, v72, v67 0 0.00
2686 v_max_f32_e32 v72, 0x358637bd, v78 0 0.00
2687 v_ceil_f32_e32 v67, v67 0 0.00
2688 v_rcp_f32_e32 v78, v72 0 0.00
2689 v_max_f32_e32 v104, 1.0, v67 0 0.00
2690 v_cvt_u32_f32_e32 v83, v104 0 0.00
2691 v_mul_f32_e64 v78, -v20, v78 0 0.00
2692 v_rcp_f32_e32 v72, v104 0 0.00
2693 s_nop 0 0 0.00
2694 s_nop 0 0 0.00
2695 s_nop 0 0 0.00
2696 s_nop 0 0 0.00
2697 s_nop 0 0 0.00
2698 s_nop 0 0 0.00
2699 s_nop 0 0 0.00
2700 _L105:
2701 v_cmp_eq_i32_e64 s26, s26, 0 0 0.00
2702 v_add_co_ci_u32_e64 v84, vcc_lo, v32, 0, s26 0 0.00
2703 v_cmp_gt_u32_e32 vcc_lo, v83, v84 0 0.00
2704 s_and_saveexec_b32 s27, vcc_lo 0 0.00
2705 s_andn2_b32 exec_lo, s27, exec_lo 0 0.00
2706 s_andn2_b32 s25, s25, exec_lo 0 0.00
2707 s_cbranch_scc0 _L95 0 0.00
2708 BBF0_96:
2709 s_and_b32 exec_lo, s27, s25 0 0.00
2710 v_add_co_ci_u32_e64 v32, vcc_lo, v32, 1, s26 0 0.00
2711 v_cmp_eq_i32_e32 vcc_lo, v83, v32 0 0.00
2712 s_and_b32 vcc_lo, vcc_lo, s21 0 0.00
2713 s_andn1_saveexec_b32 s26, vcc_lo 0 0.00
2714 s_cbranch_execz _L96 0 0.00
2715 BBF0_97:
2716 v_cvt_f32_u32_e32 v85, v32 0 0.00
2717 v_mul_f32_e32 v85, v85, v72 0 0.00
2718 s_and_saveexec_b32 s27, s22 0 0.00
2719 s_cbranch_execz _L97 0 0.00
2720 BBF0_98:
2721 v_mad_f32 v85, v70, v85, v34 0 0.00
2722 s_and_saveexec_b32 s28, s23 0 0.00
2723 s_cbranch_execz _L98 0 0.00
2724 BBF0_99:
2725 v_cmp_gt_f32_e64 vcc_lo, 0x3f337960, |v85| 0 0.00
2726 s_andn1_saveexec_b32 s29, vcc_lo 0 0.00
2727 s_cbranch_execz _L99 0 0.00
2728 BBF0_100:
2729 v_cmp_gt_f32_e64 vcc_lo, 0x3f673b59, |v85| 0 0.00
2730 s_andn1_saveexec_b32 s30, vcc_lo 0 0.00
2731 s_cbranch_execz _L100 0 0.00
2732 BBF0_101:
2733 v_mov_b32_e32 v32, 0xbf83a110 0 0.00
2734 v_cmp_gt_f32_e64 vcc_lo, 0x40027ca5, |v85| 0 0.00
2735 v_cndmask_b32_e64 v86, 2.0, 0x3fc7d00b, vcc_lo 0 0.00
2736 v_cndmask_b32_e32 v32, 0xbe98df6c, v32, vcc_lo 0 0.00
2737 v_mad_f32 v32, v86, |v85|, v32 0 0.00
2738 v_mov_b32_e32 v86, 0x3f21d928 0 0.00
2739 v_sqrt_f32_e32 v32, v32 0 0.00
2740 v_cndmask_b32_e32 v87, 0x3e1fbe77, v86, vcc_lo 0 0.00
2741 v_add_f32_e32 v32, v87, v32 0 0.00
2742 _L100:
2743 s_andn2_b32 exec_lo, s30, exec_lo 0 0.00
2744 s_cbranch_execz _L101 0 0.00
2745 BBF0_102:
2746 v_add_f32_e64 v32, |v85|, 0xbf490fdb 0 0.00
2747 v_log_f32_e64 v86, |v32| 0 0.00
2748 v_cmp_gt_f32_e32 vcc_lo, 0, v32 0 0.00
2749 v_cndmask_b32_e64 v87, 0, -1, vcc_lo 0 0.00
2750 v_cmp_lt_f32_e32 vcc_lo, 0, v32 0 0.00
2751 v_mul_f32_e32 v32, 0x3f2aaaab, v86 0 0.00
2752 v_add_co_ci_u32_e64 v86, vcc_lo, v87, 0, vcc_lo 0 0.00
2753 v_exp_f32_e32 v32, v32 0 0.00
2754 v_cvt_f32_i32_e32 v87, v86 0 0.00
2755 v_mul_f32_e32 v32, v32, v87 0 0.00
2756 v_mad_f32 v32, v32, 0x3f852018, 1.0 0 0.00
2757 _L101:
2758 s_mov_b32 exec_lo, s30 0 0.00
2759 _L99:
2760 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00
2761 s_cbranch_execz _L102 0 0.00
2762 BBF0_103:
2763 s_mov_b32 s30, 0xbca86ba3 0 0.00
2764 v_mul_f32_e64 v32, |v85|, 0x3f8c8168 0 0.00
2765 v_mad_f32 v86, |v85|, 0xbf8c8168, 1.0 0 0.00
2766 v_mad_f32 v87, |v85|, s30, 0x3d981627 0 0.00
2767 v_sqrt_f32_e32 v86, v86 0 0.00
2768 v_madak_f32 v87, v87, v32, 0xbe593484 0 0.00
2769 v_madak_f32 v32, v87, v32, 0x3fc90da4 0 0.00
2770 v_mad_f32 v32, -v32, v86, 0x3fc90fdb 0 0.00
2771 v_mul_f32_e32 v32, 0x3f693710, v32 0 0.00
2772 _L102:
2773 s_mov_b32 exec_lo, s29 0 0.00
2774 v_cmp_gt_f32_e32 vcc_lo, 0, v85 0 0.00
2775 v_cndmask_b32_e64 v86, 0, -1, vcc_lo 0 0.00
2776 v_cmp_lt_f32_e32 vcc_lo, 0, v85 0 0.00
2777 v_add_co_ci_u32_e64 v85, vcc_lo, v86, 0, vcc_lo 0 0.00
2778 v_cvt_f32_i32_e32 v85, v85 0 0.00
2779 v_mul_f32_e32 v32, v32, v85 0 0.00
2780 _L98:
2781 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00
2782 s_cbranch_execz _L103 0 0.00
2783 BBF0_104:
2784 v_log_f32_e64 v32, |v85| 0 0.00
2785 v_cmp_gt_f32_e32 vcc_lo, 0, v85 0 0.00
2786 v_cndmask_b32_e64 v86, 0, -1, vcc_lo 0 0.00
2787 v_cmp_lt_f32_e32 vcc_lo, 0, v85 0 0.00
2788 v_mul_f32_e32 v32, 0x3f2aaaab, v32 0 0.00
2789 v_add_co_ci_u32_e64 v85, vcc_lo, v86, 0, vcc_lo 0 0.00
2790 v_exp_f32_e32 v32, v32 0 0.00
2791 v_cvt_f32_i32_e32 v85, v85 0 0.00
2792 v_mul_f32_e32 v32, v85, v32 0 0.00
2793 _L103:
2794 s_mov_b32 exec_lo, s28 0 0.00
2795 v_sub_f32_e32 v32, v32, v50 0 0.00
2796 v_mul_f32_e32 v85, v32, v65 0 0.00
2797 _L97:
2798 s_mov_b32 exec_lo, s27 0 0.00
2799 v_add_f32_e64 v87, v85, -1.0 div:2 0 0.00
2800 v_mul_f32_e32 v32, v85, v85 0 0.00
2801 v_add_f32_e64 v91, v85, -2.0 div:2 0 0.00
2802 v_ldexp_f32 v98, v85, -1 0 0.00
2803 v_mad_f32 v87, v74, v87, v77 0 0.00
2804 v_mul_f32_e32 v86, v74, v32 0 0.00
2805 v_mad_f32 v96, v82, v91, v77 0 0.00
2806 v_mul_f32_e32 v87, v85, v87 0 0.00
2807 v_ldexp_f32 v32, v86, -1 0 0.00
2808 v_mad_f32 v96, v96, v98, -v71 0 0.00
2809 v_mul_f32_e32 v88, v87, v87 0 0.00
2810 v_mul_f32_e64 v89, v86, v32 div:2 0 0.00
2811 v_mul_f32_e64 v90, v86, v87 div:2 0 0.00
2812 v_mul_f32_e32 v93, v88, v88 0 0.00
2813 v_mul_f32_e32 v94, v88, v89 0 0.00
2814 v_mac_f32_e32 v90, v87, v32 0 0.00
2815 v_mul_f32_e32 v91, v89, v89 0 0.00
2816 v_mad_f32 v98, v89, 0xbbcccccd, 1.0 0 0.00
2817 v_mac_f32_e32 v94, v88, v89 0 0.00
2818 v_mul_f32_e64 v97, v90, v88 mul:2 0 0.00
2819 v_mul_f32_e32 v92, v90, v89 0 0.00
2820 v_mac_f32_e32 v94, v90, v90 0 0.00
2821 v_mul_f32_e32 v99, v87, v97 0 0.00
2822 v_mul_f32_e32 v101, v87, v92 0 0.00
2823 v_mul_f32_e32 v95, 0x38c30c31, v94 0 0.00
2824 v_mac_f32_e32 v99, v93, v32 0 0.00
2825 v_mac_f32_e32 v101, v87, v92 0 0.00
2826 v_mul_f32_e32 v92, 0.15915494, v96 0 0.00
2827 v_madmk_f32 v96, v88, 0xbd2aaaab, v98 0 0.00
2828 v_madmk_f32 v100, v93, 0x3a088889, v95 0 0.00
2829 v_mul_f32_e32 v95, v90, v97 0 0.00
2830 v_mac_f32_e32 v101, v94, v32 0 0.00
2831 v_mad_f32 v98, v32, v87, v90 0 0.00
2832 v_mac_f32_e32 v97, v90, v88 0 0.00
2833 v_madmk_f32 v91, v91, 0x3797b426, v100 0 0.00
2834 v_mac_f32_e32 v95, v93, v89 0 0.00
2835 v_mul_f32_e64 v89, v86, v89 div:2 0 0.00
2836 v_mac_f32_e32 v95, v94, v88 0 0.00
2837 v_add_f32_e32 v94, v96, v91 0 0.00
2838 v_mul_f32_e32 v91, v98, v87 0 0.00
2839 v_mul_f32_e32 v98, 0x39c30c31, v89 0 0.00
2840 v_mul_f32_e32 v96, 0x3672b9d6, v101 0 0.00
2841 v_mul_f32_e32 v87, v87, v97 0 0.00
2842 v_madmk_f32 v89, v95, 0xb521d13a, v94 0 0.00
2843 v_add_f32_e32 v95, -1.0, v85 0 0.00
2844 v_mul_f32_e32 v94, v88, v93 0 0.00
2845 v_madmk_f32 v91, v91, 0x3b088889, v98 0 0.00
2846 v_madmk_f32 v90, v99, 0x379c09c1, v96 0 0.00
2847 v_mac_f32_e32 v87, v32, v93 0 0.00
2848 v_mad_f32 v100, v82, v95, v77 0 0.00
2849 v_madmk_f32 v89, v94, 0xb6500d01, v89 0 0.00
2850 v_sin_f32_e32 v95, v92 0 0.00
2851 v_mad_f32 v91, v86, 0x3d2aaaab, -v91 0 0.00
2852 v_mul_f32_e32 v86, v93, v93 0 0.00
2853 v_mad_f32 v94, v100, v85, -v71 0 0.00
2854 v_mul_f32_e32 v87, v87, v88 0 0.00
2855 v_add_f32_e32 v90, v91, v90 0 0.00
2856 v_madmk_f32 v86, v86, 0x3238ef1d, v89 0 0.00
2857 v_mul_f32_e32 v32, 0.15915494, v94 0 0.00
2858 v_mul_f32_e32 v94, v85, v68 0 0.00
2859 v_cos_f32_e32 v85, v92 0 0.00
2860 v_madmk_f32 v90, v87, 0xb3b8ef1d, v90 0 0.00
2861 v_mul_f32_e32 v88, v94, v95 0 0.00
2862 v_cos_f32_e32 v91, v32 0 0.00
2863 v_sin_f32_e32 v89, v32 0 0.00
2864 v_mul_f32_e32 v87, v94, v85 0 0.00
2865 v_mul_f32_e32 v85, v86, v88 0 0.00
2866 v_mul_f32_e32 v32, v90, v88 0 0.00
2867 v_mad_f32 v85, -v90, v87, -v85 0 0.00
2868 v_mad_f32 v32, v86, v87, -v32 0 0.00
2869 v_mac_f32_e32 v85, v78, v91 0 0.00
2870 v_mac_f32_e32 v32, v78, v89 0 0.00
2871 v_mul_f32_e32 v86, v80, v85 0 0.00
2872 v_mul_f32_e32 v85, v76, v85 0 0.00
2873 v_mad_f32 v86, v76, v32, -v86 0 0.00
2874 v_mac_f32_e32 v85, v80, v32 0 0.00
2875 v_add_f32_e32 v32, v28, v86 0 0.00
2876 v_add_f32_e32 v85, v36, v85 0 0.00
2877 _L96:
2878 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00
2879 v_mov_b32_e32 v85, v44 0 0.00
2880 v_mov_b32_e32 v32, v45 0 0.00
2881 s_mov_b32 exec_lo, s26 0 0.00
2882 s_ff1_i32_b32 s27, exec_lo 0 0.00
2883 s_mov_b32 s26, exec_lo 0 0.00
2884 s_lshl_b32 s28, 1, s27 0 0.00
2885 s_and_b32 s28, s28, exec_lo 0 0.00
2886 s_and_saveexec_b32 s28, s28 0 0.00
2887 s_cbranch_execz _L104 0 0.00
2888 BBF0_105:
2889 s_bcnt1_i32_b32 s29, s26 0 0.00
2890 v_mov_b32_e32 v86, s29 0 0.00
2891 s_waitcnt_depctr 0xffe3 0 0.00
2892 buffer_atomic_add v86, off, s[8:11], 0 offset:28 glc 0 0.00
2893 _L104:
2894 s_waitcnt_depctr 0xffe3 0 0.00
2895 s_mov_b32 exec_lo, s28 0 0.00
2896 s_waitcnt vmcnt(0) 0 0.00
2897 v_readlane_b32 s27, v86, s27 0 0.00
2898 v_cndmask_b32_e64 v87, v63, v85, s3 0 0.00
2899 v_cndmask_b32_e64 v51, v85, v63, s3 0 0.00
2900 v_cndmask_b32_e64 v88, v64, v32, s3 0 0.00
2901 v_cndmask_b32_e64 v46, v32, v64, s3 0 0.00
2902 v_mbcnt_lo_u32_b32 v86, s26, 0 0 0.00
2903 v_mul_f32_e32 v90, v25, v87 0 0.00
2904 v_mul_f32_e32 v89, v25, v51 0 0.00
2905 v_mul_f32_e32 v51, v8, v51 0 0.00
2906 v_mul_f32_e32 v87, v8, v87 0 0.00
2907 v_mov_b32_e32 v63, v85 0 0.00
2908 v_mac_f32_e32 v90, v31, v88 0 0.00
2909 v_mac_f32_e32 v89, v31, v46 0 0.00
2910 v_mac_f32_e32 v51, v29, v46 0 0.00
2911 v_mac_f32_e32 v87, v29, v88 0 0.00
2912 v_mov_b32_e32 v64, v32 0 0.00
2913 v_add_nc_i32 v86, s27, v86 0 0.00
2914 v_add_f32_e32 v88, v23, v89 0 0.00
2915 v_add_f32_e32 v89, v16, v51 0 0.00
2916 v_add_f32_e32 v90, v23, v90 0 0.00
2917 v_add_f32_e32 v91, v16, v87 0 0.00
2918 v_mul_lo_u32 v86, v86, 24 0 0.00
2919 s_movk_i32 s26, 0x0 0 0.00
2920 v_mov_b32_e32 v32, v84 0 0.00
2921 s_waitcnt_depctr 0xffe3 0 0.00
2922 s_clause 0x1 0 0.00
2923 buffer_store_dword v7, v86, s[12:15], 0 offen glc 0 0.00
2924 buffer_store_dwordx4 v[88:91], v86, s[12:15], 0 offen offset:8 glc 0 0.00
2925 v_min3_f32 v48, v88, v90, v48 0 0.00
2926 v_min3_f32 v42, v89, v91, v42 0 0.00
2927 v_max3_f32 v107, v88, v90, v107 0 0.00
2928 v_max3_f32 v106, v89, v91, v106 0 0.00
2929 s_branch _L105 0 0.00
2930 _L95:
2931 s_mov_b32 exec_lo, s24 0 0.00
2932 v_mov_b32_e32 v28, v1 0 0.00
2933 v_mov_b32_e32 v34, v47 0 0.00
2934 v_mov_b32_e32 v36, v69 0 0.00
2935 v_mov_b32_e32 v65, v66 0 0.00
2936 v_mov_b32_e32 v32, v105 0 0.00
2937 s_branch _L106 0 0.00
2938 _L77:
2939 s_mov_b32 exec_lo, s6 0 0.00
2940 _L75:
2941 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
2942 v_mov_b32_e32 v106, v67 0 0.00
2943 v_mov_b32_e32 v107, v68 0 0.00
2944 v_mov_b32_e32 v42, v71 0 0.00
2945 v_mov_b32_e32 v48, v72 0 0.00
2946 s_mov_b32 exec_lo, s2 0 0.00
2947 v_cmp_eq_i32_e64 s2, v17, 1 0 0.00
2948 v_cmp_eq_i32_e32 vcc_lo, 0, v10 0 0.00
2949 s_or_b32 vcc_lo, s2, vcc_lo 0 0.00
2950 s_andn1_saveexec_b32 s2, vcc_lo 0 0.00
2951 s_cbranch_execz _L107 0 0.00
2952 BBF0_106:
2953 v_and_b32_e32 v5, 0x3000000, v6 0 0.00
2954 v_cmp_eq_i32_e64 s3, v5, 0x2000000 0 0.00
2955 s_and_saveexec_b32 s6, s3 0 0.00
2956 s_cbranch_execz _L108 0 0.00
2957 BBF0_107:
2958 s_waitcnt lgkmcnt(0) 0 0.00
2959 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
2960 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00
2961 v_mul_f32_e32 v6, v13, v37 0 0.00
2962 v_mul_f32_e32 v16, v13, v15 0 0.00
2963 v_mul_f32_e32 v9, v14, v37 0 0.00
2964 v_mul_f32_e32 v17, v14, v15 0 0.00
2965 s_mov_b32 s16, 0xbc996e30 0 0.00
2966 v_mac_f32_e32 v6, v11, v39 0 0.00
2967 v_mac_f32_e32 v16, v11, v111 0 0.00
2968 v_mac_f32_e32 v9, v12, v39 0 0.00
2969 v_mac_f32_e32 v17, v12, v111 0 0.00
2970 v_add_f32_e32 v44, v18, v6 0 0.00
2971 v_add_f32_e32 v16, v18, v16 0 0.00
2972 v_add_f32_e32 v45, v19, v9 0 0.00
2973 v_add_f32_e32 v10, v19, v17 0 0.00
2974 v_sub_f32_e32 v17, v44, v16 0 0.00
2975 v_sub_f32_e32 v23, v45, v10 0 0.00
2976 v_mul_f32_e32 v16, v17, v17 0 0.00
2977 v_mac_f32_e32 v16, v23, v23 0 0.00
2978 v_sqrt_f32_e32 v10, v16 0 0.00
2979 v_max_f32_e32 v10, 0x3e800000, v10 0 0.00
2980 v_rcp_f32_e32 v10, v10 0 0.00
2981 v_mad_f32 v10, 0xbe800000, v10, 1.0 0 0.00
2982 v_sub_f32_e32 v16, 1.0, v10 0 0.00
2983 v_madak_f32 v17, s16, v10, 0x3d981627 0 0.00
2984 v_sqrt_f32_e32 v22, v16 0 0.00
2985 v_madak_f32 v17, v17, v10, 0xbe593484 0 0.00
2986 v_madak_f32 v16, v17, v10, 0x3fc90da4 0 0.00
2987 v_mul_f32_e64 v10, v16, v22 mul:2 0 0.00
2988 v_max_f32_e32 v10, 0x38d1b717, v10 0 0.00
2989 v_rcp_f32_e32 v16, v10 0 0.00
2990 v_mul_f32_e32 v10, 0.15915494, v10 0 0.00
2991 v_sin_f32_e32 v23, v10 0 0.00
2992 v_mul_f32_e32 v16, 0x40490fdb, v16 0 0.00
2993 v_cos_f32_e32 v24, v10 0 0.00
2994 v_ceil_f32_e32 v16, v16 0 0.00
2995 v_cvt_u32_f32_e32 v16, v16 0 0.00
2996 v_mov_b32_e32 v10, v16 0 0.00
2997 s_waitcnt lgkmcnt(0) 0 0.00
2998 s_waitcnt_depctr 0xffe3 0 0.00
2999 buffer_atomic_add v10, off, s[8:11], 0 offset:28 glc 0 0.00
3000 s_waitcnt_depctr 0xffe3 0 0.00
3001 s_movk_i32 s10, 0xffff 0 0.00
3002 s_mov_b32 s8, exec_lo 0 0.00
3003 s_mov_b32 s9, exec_lo 0 0.00
3004 v_mov_b32_e32 v1, 0 0 0.00
3005 v_add_nc_u32_e32 v17, -1, v16 0 0.00
3006 s_nop 0 0 0.00
3007 s_nop 0 0 0.00
3008 s_nop 0 0 0.00
3009 _L110:
3010 v_cmp_eq_i32_e64 s10, s10, 0 0 0.00
3011 v_add_co_ci_u32_e64 v28, vcc_lo, v1, 0, s10 0 0.00
3012 v_cmp_gt_u32_e32 vcc_lo, v17, v28 0 0.00
3013 s_and_saveexec_b32 s11, vcc_lo 0 0.00
3014 s_andn2_b32 exec_lo, s11, exec_lo 0 0.00
3015 s_andn2_b32 s9, s9, exec_lo 0 0.00
3016 s_cbranch_scc0 _L109 0 0.00
3017 BBF0_108:
3018 s_and_b32 exec_lo, s11, s9 0 0.00
3019 v_mul_f32_e64 v31, -v23, v0 0 0.00
3020 v_mul_f32_e32 v6, v24, v0 0 0.00
3021 s_waitcnt vmcnt(0) 0 0.00
3022 v_add_co_ci_u32_e64 v1, vcc_lo, v1, v10, s10 0 0.00
3023 s_movk_i32 s10, 0x0 0 0.00
3024 v_mac_f32_e32 v31, v21, v24 0 0.00
3025 v_mad_f32 v0, v21, v23, v6 0 0.00
3026 v_mul_lo_u32 v1, v1, 24 0 0.00
3027 v_add_f32_e32 v9, v15, v31 0 0.00
3028 v_add_f32_e32 v32, v111, v0 0 0.00
3029 v_mov_b32_e32 v21, v31 0 0.00
3030 v_mul_f32_e32 v34, v13, v9 0 0.00
3031 v_mul_f32_e32 v9, v14, v9 0 0.00
3032 v_mac_f32_e32 v34, v11, v32 0 0.00
3033 v_mac_f32_e32 v9, v12, v32 0 0.00
3034 v_add_f32_e32 v46, v18, v34 0 0.00
3035 v_add_f32_e32 v47, v19, v9 0 0.00
3036 s_waitcnt_depctr 0xffe3 0 0.00
3037 s_clause 0x1 0 0.00
3038 buffer_store_dword v7, v1, s[12:15], 0 offen glc 0 0.00
3039 buffer_store_dwordx4 v[44:47], v1, s[12:15], 0 offen offset:8 glc 0 0.00
3040 v_mov_b32_e32 v1, v28 0 0.00
3041 v_min3_f32 v48, v44, v46, v48 0 0.00
3042 v_min3_f32 v42, v45, v47, v42 0 0.00
3043 v_max3_f32 v107, v44, v46, v107 0 0.00
3044 v_max3_f32 v106, v45, v47, v106 0 0.00
3045 v_mov_b32_e32 v45, v47 0 0.00
3046 v_mov_b32_e32 v44, v46 0 0.00
3047 s_branch _L110 0 0.00
3048 _L109:
3049 s_mov_b32 exec_lo, s8 0 0.00
3050 v_mul_f32_e32 v1, v13, v43 0 0.00
3051 v_mul_f32_e32 v9, v14, v43 0 0.00
3052 s_waitcnt vmcnt(0) 0 0.00
3053 v_add_nc_u32_e32 v6, v16, v10 0 0.00
3054 v_mac_f32_e32 v1, v11, v38 0 0.00
3055 v_mac_f32_e32 v9, v12, v38 0 0.00
3056 v_mul_lo_u32 v6, v6, 24 0 0.00
3057 v_add_f32_e32 v46, v18, v1 0 0.00
3058 v_add_f32_e32 v47, v19, v9 0 0.00
3059 v_add_nc_u32_e32 v16, 0xffffffe8, v6 0 0.00
3060 v_min3_f32 v48, v44, v46, v48 0 0.00
3061 v_min3_f32 v42, v45, v47, v42 0 0.00
3062 v_max3_f32 v107, v44, v46, v107 0 0.00
3063 v_max3_f32 v106, v45, v47, v106 0 0.00
3064 v_add_nc_u32_e32 v6, -16, v6 0 0.00
3065 s_waitcnt_depctr 0xffe3 0 0.00
3066 buffer_store_dword v7, v16, s[12:15], 0 offen glc 0 0.00
3067 buffer_store_dwordx4 v[44:47], v6, s[12:15], 0 offen glc 0 0.00
3068 _L108:
3069 s_waitcnt_depctr 0xffe3 0 0.00
3070 s_andn2_b32 exec_lo, s6, s3 0 0.00
3071 s_cbranch_execz _L111 0 0.00
3072 BBF0_109:
3073 s_waitcnt lgkmcnt(0) 0 0.00
3074 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
3075 v_cmp_eq_i32_e32 vcc_lo, 0x1000000, v5 0 0.00
3076 v_cndmask_b32_e64 v4, 1, 3, vcc_lo 0 0.00
3077 s_waitcnt lgkmcnt(0) 0 0.00
3078 s_waitcnt_depctr 0xffe3 0 0.00
3079 buffer_atomic_add v4, off, s[8:11], 0 offset:28 glc 0 0.00
3080 s_waitcnt_depctr 0xffe3 0 0.00
3081 s_and_saveexec_b32 s3, vcc_lo 0 0.00
3082 s_cbranch_execz _L112 0 0.00
3083 BBF0_110:
3084 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3085 v_mul_f32_e32 v10, v13, v37 0 0.00
3086 v_mad_f32 v36, v20, v27, v37 0 0.00
3087 v_mad_f32 v25, v20, v27, v43 0 0.00
3088 v_mad_f32 v40, v20, v30, v39 0 0.00
3089 v_mul_f32_e32 v9, v14, v37 0 0.00
3090 v_mac_f32_e32 v10, v11, v39 0 0.00
3091 v_mul_f32_e32 v22, v13, v36 0 0.00
3092 v_mul_f32_e32 v15, v14, v36 0 0.00
3093 v_mad_f32 v17, v20, v30, v38 0 0.00
3094 v_mul_f32_e32 v20, v13, v25 0 0.00
3095 v_add_f32_e32 v28, v18, v10 0 0.00
3096 v_mul_f32_e32 v10, v14, v25 0 0.00
3097 v_mul_f32_e32 v21, v13, v43 0 0.00
3098 v_mul_f32_e32 v23, v14, v43 0 0.00
3099 v_mac_f32_e32 v15, v12, v40 0 0.00
3100 v_mac_f32_e32 v22, v11, v40 0 0.00
3101 v_mac_f32_e32 v9, v12, v39 0 0.00
3102 v_mac_f32_e32 v20, v11, v17 0 0.00
3103 v_mac_f32_e32 v10, v12, v17 0 0.00
3104 v_mac_f32_e32 v21, v11, v38 0 0.00
3105 v_mac_f32_e32 v23, v12, v38 0 0.00
3106 v_add_f32_e32 v31, v19, v15 0 0.00
3107 v_add_f32_e32 v30, v18, v22 0 0.00
3108 v_add_f32_e32 v29, v19, v9 0 0.00
3109 s_waitcnt vmcnt(0) 0 0.00
3110 v_mul_lo_u32 v16, v4, 24 0 0.00
3111 v_add_f32_e32 v32, v18, v20 0 0.00
3112 v_add_f32_e32 v33, v19, v10 0 0.00
3113 v_add_f32_e32 v34, v18, v21 0 0.00
3114 v_add_f32_e32 v35, v19, v23 0 0.00
3115 v_min3_f32 v3, v28, v30, v48 0 0.00
3116 v_min3_f32 v2, v29, v31, v42 0 0.00
3117 v_max3_f32 v27, v28, v30, v107 0 0.00
3118 v_max3_f32 v1, v29, v31, v106 0 0.00
3119 s_waitcnt lgkmcnt(0) 0 0.00
3120 s_clause 0x3 0 0.00
3121 buffer_store_dword v7, v16, s[8:11], 0 offen offset:24 glc 0 0.00
3122 buffer_store_dwordx4 v[28:31], v16, s[8:11], 0 offen offset:32 glc 0 0.00
3123 buffer_store_dword v7, v16, s[8:11], 0 offen offset:48 glc 0 0.00
3124 buffer_store_dwordx4 v[32:35], v16, s[8:11], 0 offen offset:56 glc 0 0.00
3125 v_min3_f32 v48, v34, v32, v3 0 0.00
3126 v_max3_f32 v106, v35, v33, v1 0 0.00
3127 v_min3_f32 v42, v35, v33, v2 0 0.00
3128 v_max3_f32 v107, v34, v32, v27 0 0.00
3129 s_nop 0 0 0.00
3130 _L112:
3131 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
3132 v_mov_b32_e32 v25, v43 0 0.00
3133 v_mov_b32_e32 v36, v37 0 0.00
3134 v_mov_b32_e32 v17, v38 0 0.00
3135 v_mov_b32_e32 v40, v39 0 0.00
3136 s_mov_b32 exec_lo, s3 0 0.00
3137 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3138 s_waitcnt vmcnt(0) 0 0.00
3139 v_mul_lo_u32 v10, v4, 24 0 0.00
3140 v_mul_f32_e32 v4, v13, v36 0 0.00
3141 v_mul_f32_e32 v1, v14, v36 0 0.00
3142 v_mul_f32_e32 v13, v13, v25 0 0.00
3143 v_mul_f32_e32 v15, v14, v25 0 0.00
3144 v_mac_f32_e32 v4, v11, v40 0 0.00
3145 v_mac_f32_e32 v1, v12, v40 0 0.00
3146 v_mac_f32_e32 v13, v11, v17 0 0.00
3147 v_mac_f32_e32 v15, v12, v17 0 0.00
3148 v_add_f32_e32 v11, v18, v4 0 0.00
3149 v_add_f32_e32 v12, v19, v1 0 0.00
3150 v_add_f32_e32 v13, v18, v13 0 0.00
3151 v_add_f32_e32 v14, v19, v15 0 0.00
3152 s_waitcnt lgkmcnt(0) 0 0.00
3153 s_waitcnt_depctr 0xffe3 0 0.00
3154 s_clause 0x1 0 0.00
3155 buffer_store_dword v7, v10, s[8:11], 0 offen glc 0 0.00
3156 buffer_store_dwordx4 v[11:14], v10, s[8:11], 0 offen offset:8 glc 0 0.00
3157 v_min3_f32 v48, v11, v13, v48 0 0.00
3158 v_min3_f32 v42, v12, v14, v42 0 0.00
3159 v_max3_f32 v107, v11, v13, v107 0 0.00
3160 v_max3_f32 v106, v12, v14, v106 0 0.00
3161 _L111:
3162 s_mov_b32 exec_lo, s6 0 0.00
3163 _L107:
3164 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
3165 s_cbranch_execz _L113 0 0.00
3166 BBF0_111:
3167 v_mul_f32_e32 v10, v26, v26 0 0.00
3168 v_mac_f32_e32 v10, v9, v9 0 0.00
3169 v_rsq_f32_e32 v0, v10 0 0.00
3170 v_and_b32_e32 v10, 0x30000000, v6 0 0.00
3171 v_cmp_ne_i32_e32 vcc_lo, 0, v10 0 0.00
3172 v_mul_f32_e32 v23, v0, v5 0 0.00
3173 v_mul_f32_e64 v0, v23, v9 div:2 0 0.00
3174 v_mul_f32_e64 v5, v23, v26 div:2 0 0.00
3175 v_subrev_f32_e32 v23, v0, v111 0 0.00
3176 v_add_f32_e32 v22, v5, v15 0 0.00
3177 v_add_f32_e32 v0, v0, v111 0 0.00
3178 v_subrev_f32_e32 v8, v5, v15 0 0.00
3179 s_and_saveexec_b32 s3, vcc_lo 0 0.00
3180 s_cbranch_execz _L114 0 0.00
3181 BBF0_112:
3182 v_mul_f32_e32 v5, v33, v26 0 0.00
3183 v_mul_f32_e32 v30, v35, v26 0 0.00
3184 v_cmp_ne_i32_e32 vcc_lo, 0x10000000, v10 0 0.00
3185 v_mad_f32 v5, v35, v9, -v5 0 0.00
3186 v_mac_f32_e32 v30, v33, v9 0 0.00
3187 s_and_saveexec_b32 s6, vcc_lo 0 0.00
3188 s_cbranch_execz _L115 0 0.00
3189 BBF0_113:
3190 v_cmp_eq_i32_e32 vcc_lo, 0x20000000, v10 0 0.00
3191 s_waitcnt lgkmcnt(0) 0 0.00
3192 s_and_saveexec_b32 s8, vcc_lo 0 0.00
3193 s_cbranch_execz _L116 0 0.00
3194 BBF0_114:
3195 s_load_dwordx4 s[12:15], s[0:1], 0x80 0 0.00
3196 s_load_dwordx4 s[16:19], s[0:1], 0xa0 0 0.00
3197 v_cmp_lt_f32_e32 vcc_lo, 0, v5 0 0.00
3198 v_cndmask_b32_e32 v9, v37, v8, vcc_lo 0 0.00
3199 v_cndmask_b32_e32 v20, v39, v0, vcc_lo 0 0.00
3200 v_cndmask_b32_e32 v10, v22, v43, vcc_lo 0 0.00
3201 v_cndmask_b32_e32 v21, v43, v22, vcc_lo 0 0.00
3202 v_mul_f32_e32 v24, v13, v15 0 0.00
3203 v_mul_f32_e32 v22, v13, v9 0 0.00
3204 v_cndmask_b32_e32 v16, v23, v38, vcc_lo 0 0.00
3205 v_cndmask_b32_e32 v17, v38, v23, vcc_lo 0 0.00
3206 v_mul_f32_e32 v23, v14, v9 0 0.00
3207 v_mac_f32_e32 v24, v11, v111 0 0.00
3208 v_mac_f32_e32 v22, v11, v20 0 0.00
3209 v_mul_f32_e32 v25, v14, v15 0 0.00
3210 v_min_f32_e64 v27, |v30|, |v5| 0 0.00
3211 v_mac_f32_e32 v23, v12, v20 0 0.00
3212 v_add_f32_e32 v24, v18, v24 0 0.00
3213 v_add_f32_e32 v33, v18, v22 0 0.00
3214 v_mac_f32_e32 v25, v12, v111 0 0.00
3215 s_mov_b32 s9, 0x3caaae5f 0 0.00
3216 v_add_f32_e32 v34, v19, v23 0 0.00
3217 v_cndmask_b32_e32 v6, v8, v37, vcc_lo 0 0.00
3218 v_cndmask_b32_e32 v0, v0, v39, vcc_lo 0 0.00
3219 v_add_f32_e32 v26, v19, v25 0 0.00
3220 v_subrev_f32_e32 v25, v24, v33 0 0.00
3221 v_max_f32_e64 v24, |v30|, |v5| 0 0.00
3222 v_cmp_gt_f32_e64 vcc_lo, |v5|, |v30| 0 0.00
3223 v_subrev_f32_e32 v26, v26, v34 0 0.00
3224 v_mul_f32_e32 v25, v25, v25 0 0.00
3225 v_rcp_f32_e32 v24, v24 0 0.00
3226 v_mac_f32_e32 v25, v26, v26 0 0.00
3227 v_sqrt_f32_e32 v25, v25 0 0.00
3228 v_mul_f32_e32 v29, v27, v24 0 0.00
3229 v_mul_f32_e32 v26, v29, v29 0 0.00
3230 v_max_f32_e32 v24, 0x3e800000, v25 0 0.00
3231 v_madak_f32 v25, s9, v26, 0xbdae5a36 0 0.00
3232 s_mov_b32 s9, 0xbc996e30 0 0.00
3233 v_rcp_f32_e32 v24, v24 0 0.00
3234 v_madak_f32 v25, v26, v25, 0x3e3876e2 0 0.00
3235 v_madak_f32 v25, v26, v25, 0xbea91d04 0 0.00
3236 v_mad_f32 v24, 0xbe800000, v24, 1.0 0 0.00
3237 v_madak_f32 v26, v26, v25, 0x3f7ff738 0 0.00
3238 v_sub_f32_e32 v25, 1.0, v24 0 0.00
3239 v_madak_f32 v27, s9, v24, 0x3d981627 0 0.00
3240 v_sqrt_f32_e32 v28, v25 0 0.00
3241 v_mul_f32_e32 v25, v29, v26 0 0.00
3242 v_madak_f32 v27, v27, v24, 0xbe593484 0 0.00
3243 v_madak_f32 v25, -2.0, v25, 0x3fc90fdb 0 0.00
3244 v_madak_f32 v27, v27, v24, 0x3fc90da4 0 0.00
3245 v_cndmask_b32_e32 v31, 0, v25, vcc_lo 0 0.00
3246 v_min_f32_e32 v25, v5, v30 0 0.00
3247 v_mul_f32_e64 v24, v27, v28 mul:2 0 0.00
3248 v_max_f32_e32 v5, v5, v30 0 0.00
3249 v_cmp_gt_f32_e64 vcc_lo, -v30, v30 0 0.00
3250 v_mac_f32_e32 v31, v29, v26 0 0.00
3251 v_cmp_gt_f32_e64 s9, -v25, v25 0 0.00
3252 v_cndmask_b32_e64 v26, 0, 0xc0490fdb, vcc_lo 0 0.00
3253 v_max_f32_e32 v24, 0x38d1b717, v24 0 0.00
3254 v_cmp_ge_f32_e64 vcc_lo, v5, -v5 0 0.00
3255 v_add_f32_e32 v5, v31, v26 0 0.00
3256 v_rcp_f32_e32 v25, v24 0 0.00
3257 s_and_b32 vcc_lo, s9, vcc_lo 0 0.00
3258 v_cndmask_b32_e64 v26, 0, 0x80000000, vcc_lo 0 0.00
3259 v_xor_b32_e32 v26, v5, v26 0 0.00
3260 v_mul_f32_e64 v5, |v26|, v25 0 0.00
3261 v_ceil_f32_e32 v5, v5 0 0.00
3262 v_cvt_u32_f32_e32 v5, v5 0 0.00
3263 v_max_u32_e32 v26, 1, v5 0 0.00
3264 v_mul_f32_e32 v5, 0.15915494, v24 0 0.00
3265 v_sin_f32_e32 v24, v5 0 0.00
3266 v_cos_f32_e32 v28, v5 0 0.00
3267 v_mov_b32_e32 v5, v26 0 0.00
3268 s_waitcnt lgkmcnt(0) 0 0.00
3269 s_waitcnt_depctr 0xffe3 0 0.00
3270 buffer_atomic_add v5, off, s[12:15], 0 offset:28 glc 0 0.00
3271 s_movk_i32 s11, 0xffff 0 0.00
3272 s_mov_b32 s9, exec_lo 0 0.00
3273 s_mov_b32 s10, exec_lo 0 0.00
3274 v_sub_f32_e32 v4, v20, v111 0 0.00
3275 v_mov_b32_e32 v20, 0 0 0.00
3276 v_sub_f32_e32 v2, v9, v15 0 0.00
3277 v_add_nc_u32_e32 v25, -1, v26 0 0.00
3278 s_nop 0 0 0.00
3279 s_nop 0 0 0.00
3280 s_nop 0 0 0.00
3281 s_nop 0 0 0.00
3282 _L118:
3283 v_cmp_eq_i32_e64 s11, s11, 0 0 0.00
3284 v_add_co_ci_u32_e64 v31, vcc_lo, v20, 0, s11 0 0.00
3285 v_cmp_gt_u32_e32 vcc_lo, v25, v31 0 0.00
3286 s_and_saveexec_b32 s20, vcc_lo 0 0.00
3287 s_andn2_b32 exec_lo, s20, exec_lo 0 0.00
3288 s_andn2_b32 s10, s10, exec_lo 0 0.00
3289 s_cbranch_scc0 _L117 0 0.00
3290 BBF0_115:
3291 s_and_b32 exec_lo, s20, s10 0 0.00
3292 v_mul_f32_e64 v29, -v24, v4 0 0.00
3293 v_mul_f32_e32 v9, v28, v4 0 0.00
3294 s_waitcnt vmcnt(0) 0 0.00
3295 v_add_co_ci_u32_e64 v20, vcc_lo, v20, v5, s11 0 0.00
3296 s_movk_i32 s11, 0x0 0 0.00
3297 v_mac_f32_e32 v29, v2, v28 0 0.00
3298 v_mad_f32 v4, v2, v24, v9 0 0.00
3299 v_mul_lo_u32 v20, v20, 24 0 0.00
3300 v_add_f32_e32 v23, v15, v29 0 0.00
3301 v_add_f32_e32 v30, v111, v4 0 0.00
3302 v_mov_b32_e32 v2, v29 0 0.00
3303 v_mul_f32_e32 v32, v13, v23 0 0.00
3304 v_mul_f32_e32 v23, v14, v23 0 0.00
3305 v_mac_f32_e32 v32, v11, v30 0 0.00
3306 v_mac_f32_e32 v23, v12, v30 0 0.00
3307 v_add_f32_e32 v35, v18, v32 0 0.00
3308 v_add_f32_e32 v36, v19, v23 0 0.00
3309 s_waitcnt_depctr 0xffe3 0 0.00
3310 s_clause 0x1 0 0.00
3311 buffer_store_dword v7, v20, s[16:19], 0 offen glc 0 0.00
3312 buffer_store_dwordx4 v[33:36], v20, s[16:19], 0 offen offset:8 glc 0 0.00
3313 v_mov_b32_e32 v20, v31 0 0.00
3314 v_min3_f32 v48, v33, v35, v48 0 0.00
3315 v_min3_f32 v42, v34, v36, v42 0 0.00
3316 v_max3_f32 v107, v33, v35, v107 0 0.00
3317 v_max3_f32 v106, v34, v36, v106 0 0.00
3318 v_mov_b32_e32 v34, v36 0 0.00
3319 v_mov_b32_e32 v33, v35 0 0.00
3320 s_branch _L118 0 0.00
3321 _L117:
3322 s_mov_b32 exec_lo, s9 0 0.00
3323 s_waitcnt vmcnt(0) 0 0.00
3324 v_add_nc_u32_e32 v2, v26, v5 0 0.00
3325 v_mul_f32_e32 v26, v13, v10 0 0.00
3326 v_mul_f32_e32 v5, v14, v10 0 0.00
3327 s_ff1_i32_b32 s10, exec_lo 0 0.00
3328 s_mov_b32 s9, exec_lo 0 0.00
3329 v_mul_lo_u32 v2, v2, 24 0 0.00
3330 s_lshl_b32 s11, 1, s10 0 0.00
3331 s_and_b32 s11, s11, exec_lo 0 0.00
3332 v_mac_f32_e32 v26, v11, v16 0 0.00
3333 v_mac_f32_e32 v5, v12, v16 0 0.00
3334 v_add_nc_u32_e32 v9, 0xffffffe8, v2 0 0.00
3335 v_add_f32_e32 v35, v18, v26 0 0.00
3336 v_add_f32_e32 v36, v19, v5 0 0.00
3337 v_add_nc_u32_e32 v2, -16, v2 0 0.00
3338 s_waitcnt_depctr 0xffe3 0 0.00
3339 s_clause 0x1 0 0.00
3340 buffer_store_dword v7, v9, s[16:19], 0 offen glc 0 0.00
3341 buffer_store_dwordx4 v[33:36], v2, s[16:19], 0 offen glc 0 0.00
3342 v_min3_f32 v5, v33, v35, v48 0 0.00
3343 v_min3_f32 v8, v34, v36, v42 0 0.00
3344 v_max3_f32 v1, v33, v35, v107 0 0.00
3345 v_max3_f32 v4, v34, v36, v106 0 0.00
3346 v_mbcnt_lo_u32_b32 v2, s9, 0 0 0.00
3347 s_and_saveexec_b32 s11, s11 0 0.00
3348 s_cbranch_execz _L119 0 0.00
3349 BBF0_116:
3350 s_bcnt1_i32_b32 s9, s9 0 0.00
3351 v_mov_b32_e32 v3, s9 0 0.00
3352 buffer_atomic_add v3, off, s[12:15], 0 offset:28 glc 0 0.00
3353 _L119:
3354 s_waitcnt_depctr 0xffe3 0 0.00
3355 s_mov_b32 exec_lo, s11 0 0.00
3356 s_waitcnt vmcnt(0) 0 0.00
3357 v_readlane_b32 s9, v3, s10 0 0.00
3358 v_mul_f32_e32 v10, v13, v6 0 0.00
3359 v_mul_f32_e32 v3, v14, v6 0 0.00
3360 v_mul_f32_e32 v6, v13, v21 0 0.00
3361 v_mac_f32_e32 v10, v11, v0 0 0.00
3362 v_mac_f32_e32 v3, v12, v0 0 0.00
3363 v_mac_f32_e32 v6, v11, v17 0 0.00
3364 v_add_f32_e32 v9, v18, v10 0 0.00
3365 v_add_f32_e32 v10, v19, v3 0 0.00
3366 v_mul_f32_e32 v3, v14, v21 0 0.00
3367 v_add_f32_e32 v11, v18, v6 0 0.00
3368 v_add_nc_i32 v2, s9, v2 0 0.00
3369 v_mac_f32_e32 v3, v12, v17 0 0.00
3370 v_min3_f32 v48, v9, v11, v5 0 0.00
3371 v_max3_f32 v107, v9, v11, v1 0 0.00
3372 v_mul_lo_u32 v2, v2, 24 0 0.00
3373 v_add_f32_e32 v12, v19, v3 0 0.00
3374 v_min3_f32 v42, v10, v12, v8 0 0.00
3375 v_max3_f32 v106, v10, v12, v4 0 0.00
3376 s_waitcnt_depctr 0xffe3 0 0.00
3377 buffer_store_dword v7, v2, s[16:19], 0 offen glc 0 0.00
3378 buffer_store_dwordx4 v[9:12], v2, s[16:19], 0 offen offset:8 glc 0 0.00
3379 _L116:
3380 s_waitcnt_depctr 0xffe3 0 0.00
3381 s_mov_b32 exec_lo, s8 0 0.00
3382 _L115:
3383 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00
3384 s_cbranch_execz _L120 0 0.00
3385 BBF0_117:
3386 v_mul_f32_e32 v15, v5, v5 0 0.00
3387 s_waitcnt lgkmcnt(0) 0 0.00
3388 v_cmp_neq_f32_e64 s8, v5, 0 0 0.00
3389 v_fma_mix_f32 v2, v6, v6, 0 op_sel_hi:[1, 1, 0] 0 0.00
3390 v_mac_f32_e32 v15, v30, v30 0 0.00
3391 v_sqrt_f32_e32 v15, v15 0 0.00
3392 v_add_f32_e32 v17, v30, v15 0 0.00
3393 v_ldexp_f32 v6, v15, 1 0 0.00
3394 v_mul_f32_e32 v15, v17, v2 0 0.00
3395 v_cmp_gt_f32_e32 vcc_lo, v15, v6 0 0.00
3396 s_and_b32 vcc_lo, vcc_lo, s8 0 0.00
3397 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
3398 s_andn1_saveexec_b32 s12, vcc_lo 0 0.00
3399 s_cbranch_execz _L121 0 0.00
3400 BBF0_118:
3401 s_ff1_i32_b32 s14, exec_lo 0 0.00
3402 s_mov_b32 s13, exec_lo 0 0.00
3403 s_lshl_b32 s15, 1, s14 0 0.00
3404 s_and_b32 s15, s15, exec_lo 0 0.00
3405 s_and_saveexec_b32 s15, s15 0 0.00
3406 s_cbranch_execz _L122 0 0.00
3407 BBF0_119:
3408 s_bcnt1_i32_b32 s16, s13 0 0.00
3409 s_mulk_i32 s16, 0x2 0 0.00
3410 v_mov_b32_e32 v2, s16 0 0.00
3411 s_waitcnt lgkmcnt(0) 0 0.00
3412 s_waitcnt_depctr 0xffe3 0 0.00
3413 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00
3414 _L122:
3415 s_waitcnt_depctr 0xffe3 0 0.00
3416 s_mov_b32 exec_lo, s15 0 0.00
3417 s_waitcnt vmcnt(0) 0 0.00
3418 v_readlane_b32 s14, v2, s14 0 0.00
3419 v_mbcnt_lo_u32_b32 v2, s13, 0 0 0.00
3420 v_mul_lo_u32 v2, v2, 2 0 0.00
3421 v_add_nc_i32 v6, s14, v2 0 0.00
3422 _L121:
3423 s_andn2_b32 exec_lo, s12, exec_lo 0 0.00
3424 s_cbranch_execz _L123 0 0.00
3425 BBF0_120:
3426 s_ff1_i32_b32 s14, exec_lo 0 0.00
3427 s_mov_b32 s13, exec_lo 0 0.00
3428 s_lshl_b32 s15, 1, s14 0 0.00
3429 s_and_b32 s15, s15, exec_lo 0 0.00
3430 s_and_saveexec_b32 s15, s15 0 0.00
3431 s_cbranch_execz _L124 0 0.00
3432 BBF0_121:
3433 s_bcnt1_i32_b32 s16, s13 0 0.00
3434 s_mulk_i32 s16, 0x3 0 0.00
3435 v_mov_b32_e32 v2, s16 0 0.00
3436 s_waitcnt lgkmcnt(0) 0 0.00
3437 s_waitcnt_depctr 0xffe3 0 0.00
3438 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00
3439 _L124:
3440 s_waitcnt_depctr 0xffe3 0 0.00
3441 s_mov_b32 exec_lo, s15 0 0.00
3442 s_waitcnt lgkmcnt(0) 0 0.00
3443 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3444 v_cmp_lt_f32_e32 vcc_lo, 0, v5 0 0.00
3445 v_cndmask_b32_e32 v6, v39, v38, vcc_lo 0 0.00
3446 v_cndmask_b32_e32 v17, v23, v0, vcc_lo 0 0.00
3447 v_cndmask_b32_e32 v15, v37, v43, vcc_lo 0 0.00
3448 v_cndmask_b32_e32 v10, v22, v8, vcc_lo 0 0.00
3449 v_rcp_f32_e32 v5, v5 0 0.00
3450 v_cndmask_b32_e32 v21, v37, v8, vcc_lo 0 0.00
3451 v_sub_f32_e32 v6, v17, v6 0 0.00
3452 v_cndmask_b32_e32 v20, v39, v0, vcc_lo 0 0.00
3453 v_sub_f32_e32 v25, v10, v15 0 0.00
3454 s_waitcnt vmcnt(0) 0 0.00
3455 v_readlane_b32 s14, v2, s14 0 0.00
3456 v_mul_f32_e32 v15, v14, v21 0 0.00
3457 v_mul_f32_e32 v24, v33, v6 0 0.00
3458 v_mbcnt_lo_u32_b32 v2, s13, 0 0 0.00
3459 v_mul_f32_e32 v6, v13, v21 0 0.00
3460 v_mac_f32_e32 v15, v12, v20 0 0.00
3461 v_mad_f32 v24, v35, v25, -v24 0 0.00
3462 v_mul_lo_u32 v2, v2, 3 0 0.00
3463 v_mul_f32_e32 v27, v24, v5 0 0.00
3464 v_add_f32_e32 v29, v19, v15 0 0.00
3465 v_mac_f32_e32 v6, v11, v20 0 0.00
3466 v_add_nc_i32 v2, s14, v2 0 0.00
3467 v_mad_f32 v10, -v9, v27, v10 0 0.00
3468 v_mad_f32 v17, -v26, v27, v17 0 0.00
3469 v_add_f32_e32 v28, v18, v6 0 0.00
3470 v_mul_lo_u32 v6, v2, 24 0 0.00
3471 v_mul_f32_e32 v24, v13, v10 0 0.00
3472 v_mul_f32_e32 v15, v14, v10 0 0.00
3473 v_cndmask_b32_e32 v8, v8, v10, vcc_lo 0 0.00
3474 v_cndmask_b32_e32 v39, v17, v39, vcc_lo 0 0.00
3475 v_cndmask_b32_e32 v37, v10, v37, vcc_lo 0 0.00
3476 v_mac_f32_e32 v24, v11, v17 0 0.00
3477 v_mac_f32_e32 v15, v12, v17 0 0.00
3478 v_cndmask_b32_e32 v0, v0, v17, vcc_lo 0 0.00
3479 v_add_f32_e32 v30, v18, v24 0 0.00
3480 v_add_f32_e32 v31, v19, v15 0 0.00
3481 s_waitcnt lgkmcnt(0) 0 0.00
3482 s_waitcnt_depctr 0xffe3 0 0.00
3483 s_clause 0x1 0 0.00
3484 buffer_store_dword v7, v6, s[8:11], 0 offen glc 0 0.00
3485 buffer_store_dwordx4 v[28:31], v6, s[8:11], 0 offen offset:8 glc 0 0.00
3486 v_min3_f32 v48, v28, v30, v48 0 0.00
3487 v_min3_f32 v42, v29, v31, v42 0 0.00
3488 v_max3_f32 v107, v28, v30, v107 0 0.00
3489 v_max3_f32 v106, v29, v31, v106 0 0.00
3490 v_add_nc_u32_e32 v6, 1, v2 0 0.00
3491 _L123:
3492 s_mov_b32 exec_lo, s12 0 0.00
3493 s_waitcnt lgkmcnt(0) 0 0.00
3494 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3495 v_mul_f32_e32 v17, v14, v22 0 0.00
3496 v_mul_f32_e32 v10, v13, v37 0 0.00
3497 v_mul_f32_e32 v1, v14, v37 0 0.00
3498 v_mul_f32_e32 v16, v13, v22 0 0.00
3499 v_mul_f32_e32 v22, v13, v8 0 0.00
3500 v_mac_f32_e32 v17, v12, v23 0 0.00
3501 v_mac_f32_e32 v10, v11, v39 0 0.00
3502 v_mac_f32_e32 v1, v12, v39 0 0.00
3503 v_mac_f32_e32 v16, v11, v23 0 0.00
3504 v_mac_f32_e32 v22, v11, v0 0 0.00
3505 v_add_f32_e32 v26, v19, v17 0 0.00
3506 v_mul_f32_e32 v17, v14, v8 0 0.00
3507 v_mul_f32_e32 v8, v13, v43 0 0.00
3508 v_mul_f32_e32 v13, v14, v43 0 0.00
3509 v_add_f32_e32 v25, v18, v16 0 0.00
3510 v_add_f32_e32 v24, v19, v1 0 0.00
3511 v_mac_f32_e32 v17, v12, v0 0 0.00
3512 v_mac_f32_e32 v8, v11, v38 0 0.00
3513 v_mac_f32_e32 v13, v12, v38 0 0.00
3514 v_add_f32_e32 v23, v18, v10 0 0.00
3515 v_mul_lo_u32 v6, v6, 24 0 0.00
3516 v_add_f32_e32 v10, v18, v22 0 0.00
3517 v_add_f32_e32 v11, v19, v17 0 0.00
3518 v_add_f32_e32 v12, v18, v8 0 0.00
3519 v_add_f32_e32 v13, v19, v13 0 0.00
3520 v_min3_f32 v9, v23, v25, v48 0 0.00
3521 v_min3_f32 v4, v24, v26, v42 0 0.00
3522 v_max3_f32 v5, v23, v25, v107 0 0.00
3523 v_max3_f32 v8, v24, v26, v106 0 0.00
3524 s_waitcnt lgkmcnt(0) 0 0.00
3525 s_waitcnt_depctr 0xffe3 0 0.00
3526 s_clause 0x3 0 0.00
3527 buffer_store_dword v7, v6, s[8:11], 0 offen glc 0 0.00
3528 buffer_store_dwordx4 v[23:26], v6, s[8:11], 0 offen offset:8 glc 0 0.00
3529 buffer_store_dword v7, v6, s[8:11], 0 offen offset:24 glc 0 0.00
3530 buffer_store_dwordx4 v[10:13], v6, s[8:11], 0 offen offset:32 glc 0 0.00
3531 v_min3_f32 v48, v12, v10, v9 0 0.00
3532 v_max3_f32 v106, v13, v11, v8 0 0.00
3533 v_min3_f32 v42, v13, v11, v4 0 0.00
3534 v_max3_f32 v107, v12, v10, v5 0 0.00
3535 _L120:
3536 s_mov_b32 exec_lo, s6 0 0.00
3537 _L114:
3538 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
3539 s_cbranch_execz _L113 0 0.00
3540 BBF0_122:
3541 s_waitcnt lgkmcnt(0) 0 0.00
3542 s_ff1_i32_b32 s8, exec_lo 0 0.00
3543 s_mov_b32 s6, exec_lo 0 0.00
3544 s_lshl_b32 s9, 1, s8 0 0.00
3545 s_and_b32 s9, s9, exec_lo 0 0.00
3546 s_and_saveexec_b32 s9, s9 0 0.00
3547 s_cbranch_execz _L125 0 0.00
3548 BBF0_123:
3549 s_load_dwordx4 s[12:15], s[0:1], 0x80 0 0.00
3550 s_bcnt1_i32_b32 s10, s6 0 0.00
3551 s_mulk_i32 s10, 0x2 0 0.00
3552 v_mov_b32_e32 v2, s10 0 0.00
3553 s_waitcnt lgkmcnt(0) 0 0.00
3554 s_waitcnt_depctr 0xffe3 0 0.00
3555 buffer_atomic_add v2, off, s[12:15], 0 offset:28 glc 0 0.00
3556 _L125:
3557 s_waitcnt_depctr 0xffe3 0 0.00
3558 s_mov_b32 exec_lo, s9 0 0.00
3559 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00
3560 s_waitcnt vmcnt(0) 0 0.00
3561 v_readlane_b32 s8, v2, s8 0 0.00
3562 v_mul_f32_e32 v6, v13, v37 0 0.00
3563 v_mbcnt_lo_u32_b32 v2, s6, 0 0 0.00
3564 v_mul_f32_e32 v10, v13, v22 0 0.00
3565 v_mul_f32_e32 v9, v14, v22 0 0.00
3566 v_mul_f32_e32 v5, v14, v37 0 0.00
3567 v_mac_f32_e32 v6, v11, v39 0 0.00
3568 v_mul_lo_u32 v2, v2, 2 0 0.00
3569 v_mul_f32_e32 v22, v13, v8 0 0.00
3570 v_mac_f32_e32 v9, v12, v23 0 0.00
3571 v_mac_f32_e32 v10, v11, v23 0 0.00
3572 v_add_f32_e32 v23, v18, v6 0 0.00
3573 v_mul_f32_e32 v17, v14, v8 0 0.00
3574 v_mul_f32_e32 v6, v13, v43 0 0.00
3575 v_mul_f32_e32 v13, v14, v43 0 0.00
3576 v_mac_f32_e32 v5, v12, v39 0 0.00
3577 v_add_nc_i32 v2, s8, v2 0 0.00
3578 v_mac_f32_e32 v22, v11, v0 0 0.00
3579 v_mac_f32_e32 v17, v12, v0 0 0.00
3580 v_mac_f32_e32 v6, v11, v38 0 0.00
3581 v_mac_f32_e32 v13, v12, v38 0 0.00
3582 v_add_f32_e32 v26, v19, v9 0 0.00
3583 v_add_f32_e32 v25, v18, v10 0 0.00
3584 v_add_f32_e32 v24, v19, v5 0 0.00
3585 v_mul_lo_u32 v2, v2, 24 0 0.00
3586 v_add_f32_e32 v9, v18, v22 0 0.00
3587 v_add_f32_e32 v10, v19, v17 0 0.00
3588 v_add_f32_e32 v11, v18, v6 0 0.00
3589 v_add_f32_e32 v12, v19, v13 0 0.00
3590 v_min3_f32 v3, v23, v25, v48 0 0.00
3591 v_min3_f32 v13, v24, v26, v42 0 0.00
3592 v_max3_f32 v8, v23, v25, v107 0 0.00
3593 v_max3_f32 v1, v24, v26, v106 0 0.00
3594 s_waitcnt lgkmcnt(0) 0 0.00
3595 s_waitcnt_depctr 0xffe3 0 0.00
3596 s_clause 0x3 0 0.00
3597 buffer_store_dword v7, v2, s[12:15], 0 offen glc 0 0.00
3598 buffer_store_dwordx4 v[23:26], v2, s[12:15], 0 offen offset:8 glc 0 0.00
3599 buffer_store_dword v7, v2, s[12:15], 0 offen offset:24 glc 0 0.00
3600 buffer_store_dwordx4 v[9:12], v2, s[12:15], 0 offen offset:32 glc 0 0.00
3601 v_min3_f32 v48, v11, v9, v3 0 0.00
3602 v_max3_f32 v106, v12, v10, v1 0 0.00
3603 v_min3_f32 v42, v12, v10, v13 0 0.00
3604 v_max3_f32 v107, v11, v9, v8 0 0.00
3605 _L113:
3606 s_mov_b32 exec_lo, s2 0 0.00
3607 _L32:
3608 s_andn2_b32 exec_lo, s4, exec_lo 0 0.00
3609 s_cbranch_execz _L31 0 0.00
3610 BBF0_124:
3611 v_cmp_eq_i32_e32 vcc_lo, 1, v9 0 0.00
3612 s_and_saveexec_b32 s2, vcc_lo 0 0.00
3613 v_mov_b32_e32 v106, 0xf2fc6f7c 0 0.00
3614 v_mov_b32_e32 v107, 0xf2fc6f7c 0 0.00
3615 v_mov_b32_e32 v42, 0x72fc6f7c 0 0.00
3616 v_mov_b32_e32 v48, 0x72fc6f7c 0 0.00
3617 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00
3618 s_cbranch_execz _L31 0 0.00
3619 BBF0_125:
3620 v_subrev_f32_e32 v9, v23, v109 0 0.00
3621 v_subrev_f32_e32 v3, v22, v110 0 0.00
3622 v_subrev_f32_e32 v10, v23, v4 0 0.00
3623 v_subrev_f32_e32 v1, v22, v108 0 0.00
3624 v_subrev_f32_e32 v2, v23, v111 0 0.00
3625 v_mul_f32_e32 v17, v9, v9 0 0.00
3626 v_mul_f32_e32 v0, v10, v10 0 0.00
3627 v_mac_f32_e32 v17, v3, v3 0 0.00
3628 v_mac_f32_e32 v0, v1, v1 0 0.00
3629 v_cmp_gt_f32_e64 s3, v17, 0x2b8cbccc 0 0.00
3630 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v0 0 0.00
3631 v_cndmask_b32_e64 v9, v2, v9, s3 0 0.00
3632 v_subrev_f32_e32 v2, v22, v15 0 0.00
3633 v_cndmask_b32_e64 v0, v2, v3, s3 0 0.00
3634 v_cndmask_b32_e32 v2, v9, v10, vcc_lo 0 0.00
3635 v_cndmask_b32_e32 v10, v0, v1, vcc_lo 0 0.00
3636 v_mul_f32_e32 v0, v2, v2 0 0.00
3637 v_mac_f32_e32 v0, v10, v10 0 0.00
3638 v_rsq_f32_e32 v1, v0 0 0.00
3639 v_lshrrev_b32_e32 v0, 2, v6 0 0.00
3640 v_and_b32_e32 v0, 0x3000000, v0 0 0.00
3641 v_mul_f32_e32 v2, v2, v1 0 0.00
3642 v_mul_f32_e32 v3, v10, v1 0 0.00
3643 v_cmp_eq_i32_e32 vcc_lo, 0x2000000, v0 0 0.00
3644 v_mul_f32_e64 v1, v5, v2 div:2 0 0.00
3645 v_mul_f32_e64 v32, v5, v3 div:2 0 0.00
3646 v_subrev_f32_e32 v5, v1, v22 0 0.00
3647 v_add_f32_e32 v15, v32, v23 0 0.00
3648 v_subrev_f32_e32 v27, v32, v23 0 0.00
3649 v_add_f32_e32 v6, v1, v22 0 0.00
3650 s_andn1_saveexec_b32 s3, vcc_lo 0 0.00
3651 s_cbranch_execz _L126 0 0.00
3652 BBF0_126:
3653 s_waitcnt lgkmcnt(0) 0 0.00
3654 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
3655 v_cmp_eq_i32_e32 vcc_lo, 0x1000000, v0 0 0.00
3656 v_cndmask_b32_e64 v0, 1, 3, vcc_lo 0 0.00
3657 s_waitcnt lgkmcnt(0) 0 0.00
3658 s_waitcnt_depctr 0xffe3 0 0.00
3659 buffer_atomic_add v0, off, s[8:11], 0 offset:28 glc 0 0.00
3660 s_waitcnt_depctr 0xffe3 0 0.00
3661 s_and_saveexec_b32 s6, vcc_lo 0 0.00
3662 s_cbranch_execz _L127 0 0.00
3663 BBF0_127:
3664 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3665 v_mad_f32 v1, -v20, v3, v5 0 0.00
3666 v_mad_f32 v4, -v20, v2, v15 0 0.00
3667 v_mul_f32_e32 v8, v13, v5 0 0.00
3668 v_mad_f32 v3, -v20, v3, v6 0 0.00
3669 v_mul_f32_e32 v5, v14, v5 0 0.00
3670 v_mul_f32_e32 v9, v14, v1 0 0.00
3671 v_mul_f32_e32 v10, v13, v1 0 0.00
3672 v_mac_f32_e32 v8, v11, v15 0 0.00
3673 v_mad_f32 v17, -v20, v2, v27 0 0.00
3674 v_mul_f32_e32 v2, v13, v3 0 0.00
3675 v_mac_f32_e32 v9, v12, v4 0 0.00
3676 v_mul_f32_e32 v24, v13, v6 0 0.00
3677 v_add_f32_e32 v33, v18, v8 0 0.00
3678 v_mul_f32_e32 v8, v14, v3 0 0.00
3679 v_mac_f32_e32 v10, v11, v4 0 0.00
3680 v_add_f32_e32 v36, v19, v9 0 0.00
3681 v_mul_f32_e32 v9, v14, v6 0 0.00
3682 v_mac_f32_e32 v5, v12, v15 0 0.00
3683 v_mac_f32_e32 v2, v11, v17 0 0.00
3684 v_mac_f32_e32 v8, v12, v17 0 0.00
3685 v_mac_f32_e32 v24, v11, v27 0 0.00
3686 v_mac_f32_e32 v9, v12, v27 0 0.00
3687 v_add_f32_e32 v35, v18, v10 0 0.00
3688 v_add_f32_e32 v34, v19, v5 0 0.00
3689 s_waitcnt vmcnt(0) 0 0.00
3690 v_mul_lo_u32 v16, v0, 24 0 0.00
3691 v_add_f32_e32 v29, v18, v2 0 0.00
3692 v_add_f32_e32 v30, v19, v8 0 0.00
3693 v_add_f32_e32 v31, v18, v24 0 0.00
3694 v_add_f32_e32 v32, v19, v9 0 0.00
3695 v_min3_f32 v9, v33, v35, 0x72fc6f7c 0 0.00
3696 v_min3_f32 v6, v34, v36, 0x72fc6f7c 0 0.00
3697 v_max3_f32 v24, v33, v35, 0xf2fc6f7c 0 0.00
3698 v_max3_f32 v23, v34, v36, 0xf2fc6f7c 0 0.00
3699 s_waitcnt lgkmcnt(0) 0 0.00
3700 s_clause 0x3 0 0.00
3701 buffer_store_dword v7, v16, s[8:11], 0 offen offset:24 glc 0 0.00
3702 buffer_store_dwordx4 v[33:36], v16, s[8:11], 0 offen offset:32 glc 0 0.00
3703 buffer_store_dword v7, v16, s[8:11], 0 offen offset:48 glc 0 0.00
3704 buffer_store_dwordx4 v[29:32], v16, s[8:11], 0 offen offset:56 glc 0 0.00
3705 v_min3_f32 v22, v31, v29, v9 0 0.00
3706 v_max3_f32 v20, v32, v30, v23 0 0.00
3707 v_min3_f32 v21, v32, v30, v6 0 0.00
3708 v_max3_f32 v2, v31, v29, v24 0 0.00
3709 _L127:
3710 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00
3711 s_cbranch_execz _L128 0 0.00
3712 BBF0_128:
3713 v_mov_b32_e32 v20, 0xf2fc6f7c 0 0.00
3714 v_mov_b32_e32 v2, 0xf2fc6f7c 0 0.00
3715 v_mov_b32_e32 v21, 0x72fc6f7c 0 0.00
3716 v_mov_b32_e32 v22, 0x72fc6f7c 0 0.00
3717 v_mov_b32_e32 v3, v6 0 0.00
3718 v_mov_b32_e32 v17, v27 0 0.00
3719 v_mov_b32_e32 v1, v5 0 0.00
3720 v_mov_b32_e32 v4, v15 0 0.00
3721 _L128:
3722 s_mov_b32 exec_lo, s6 0 0.00
3723 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00
3724 s_waitcnt vmcnt(0) 0 0.00
3725 v_mul_lo_u32 v9, v0, 24 0 0.00
3726 v_mul_f32_e32 v0, v13, v1 0 0.00
3727 v_mul_f32_e32 v15, v14, v1 0 0.00
3728 v_mul_f32_e32 v8, v13, v3 0 0.00
3729 v_mul_f32_e32 v5, v14, v3 0 0.00
3730 v_mac_f32_e32 v0, v11, v4 0 0.00
3731 v_mac_f32_e32 v15, v12, v4 0 0.00
3732 v_mac_f32_e32 v8, v11, v17 0 0.00
3733 v_mac_f32_e32 v5, v12, v17 0 0.00
3734 v_add_f32_e32 v14, v18, v0 0 0.00
3735 v_add_f32_e32 v15, v19, v15 0 0.00
3736 v_add_f32_e32 v16, v18, v8 0 0.00
3737 v_add_f32_e32 v17, v19, v5 0 0.00
3738 s_waitcnt lgkmcnt(0) 0 0.00
3739 s_waitcnt_depctr 0xffe3 0 0.00
3740 s_clause 0x1 0 0.00
3741 buffer_store_dword v7, v9, s[8:11], 0 offen glc 0 0.00
3742 buffer_store_dwordx4 v[14:17], v9, s[8:11], 0 offen offset:8 glc 0 0.00
3743 v_min3_f32 v48, v14, v16, v22 0 0.00
3744 v_min3_f32 v42, v15, v17, v21 0 0.00
3745 v_max3_f32 v107, v14, v16, v2 0 0.00
3746 v_max3_f32 v106, v15, v17, v20 0 0.00
3747 _L126:
3748 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00
3749 s_cbranch_execz _L31 0 0.00
3750 BBF0_129:
3751 s_waitcnt lgkmcnt(0) 0 0.00
3752 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00
3753 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00
3754 v_mul_f32_e32 v0, v13, v5 0 0.00
3755 v_mul_f32_e32 v2, v13, v22 0 0.00
3756 v_mul_f32_e32 v5, v14, v5 0 0.00
3757 v_mul_f32_e32 v9, v14, v22 0 0.00
3758 s_mov_b32 s6, 0xbc996e30 0 0.00
3759 v_mac_f32_e32 v0, v11, v15 0 0.00
3760 v_mac_f32_e32 v2, v11, v23 0 0.00
3761 v_mac_f32_e32 v5, v12, v15 0 0.00
3762 v_mac_f32_e32 v9, v12, v23 0 0.00
3763 v_add_f32_e32 v33, v18, v0 0 0.00
3764 v_add_f32_e32 v2, v18, v2 0 0.00
3765 v_add_f32_e32 v34, v19, v5 0 0.00
3766 v_add_f32_e32 v3, v19, v9 0 0.00
3767 v_subrev_f32_e32 v2, v2, v33 0 0.00
3768 v_subrev_f32_e32 v9, v3, v34 0 0.00
3769 v_mul_f32_e32 v2, v2, v2 0 0.00
3770 v_mac_f32_e32 v2, v9, v9 0 0.00
3771 v_sqrt_f32_e32 v2, v2 0 0.00
3772 v_max_f32_e32 v2, 0x3e800000, v2 0 0.00
3773 v_rcp_f32_e32 v2, v2 0 0.00
3774 v_mad_f32 v2, 0xbe800000, v2, 1.0 0 0.00
3775 v_sub_f32_e32 v3, 1.0, v2 0 0.00
3776 v_madak_f32 v8, s6, v2, 0x3d981627 0 0.00
3777 v_sqrt_f32_e32 v3, v3 0 0.00
3778 v_madak_f32 v8, v8, v2, 0xbe593484 0 0.00
3779 v_madak_f32 v2, v8, v2, 0x3fc90da4 0 0.00
3780 v_mul_f32_e64 v2, v2, v3 mul:2 0 0.00
3781 v_max_f32_e32 v2, 0x38d1b717, v2 0 0.00
3782 v_rcp_f32_e32 v3, v2 0 0.00
3783 v_mul_f32_e32 v2, 0.15915494, v2 0 0.00
3784 v_sin_f32_e32 v8, v2 0 0.00
3785 v_mul_f32_e32 v3, 0x40490fdb, v3 0 0.00
3786 v_cos_f32_e32 v16, v2 0 0.00
3787 v_ceil_f32_e32 v3, v3 0 0.00
3788 v_cvt_u32_f32_e32 v3, v3 0 0.00
3789 v_mov_b32_e32 v2, v3 0 0.00
3790 s_waitcnt lgkmcnt(0) 0 0.00
3791 s_waitcnt_depctr 0xffe3 0 0.00
3792 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00
3793 s_waitcnt_depctr 0xffe3 0 0.00
3794 s_movk_i32 s9, 0xffff 0 0.00
3795 s_mov_b32 s6, exec_lo 0 0.00
3796 s_mov_b32 s8, exec_lo 0 0.00
3797 v_mov_b32_e32 v0, 0 0 0.00
3798 v_mov_b32_e32 v17, 0xf2fc6f7c 0 0.00
3799 v_mov_b32_e32 v20, 0xf2fc6f7c 0 0.00
3800 v_mov_b32_e32 v21, 0x72fc6f7c 0 0.00
3801 v_mov_b32_e32 v24, 0x72fc6f7c 0 0.00
3802 v_add_nc_u32_e32 v9, -1, v3 0 0.00
3803 v_xor_b32_e32 v15, 0x80000000, v1 0 0.00
3804 s_nop 0 0 0.00
3805 s_nop 0 0 0.00
3806 s_nop 0 0 0.00
3807 s_nop 0 0 0.00
3808 s_nop 0 0 0.00
3809 s_nop 0 0 0.00
3810 s_nop 0 0 0.00
3811 _L130:
3812 v_cmp_eq_i32_e64 s9, s9, 0 0 0.00
3813 v_add_co_ci_u32_e64 v10, vcc_lo, v0, 0, s9 0 0.00
3814 v_cmp_gt_u32_e32 vcc_lo, v9, v10 0 0.00
3815 s_and_saveexec_b32 s10, vcc_lo 0 0.00
3816 s_andn2_b32 exec_lo, s10, exec_lo 0 0.00
3817 s_andn2_b32 s8, s8, exec_lo 0 0.00
3818 s_cbranch_scc0 _L129 0 0.00
3819 BBF0_130:
3820 s_and_b32 exec_lo, s10, s8 0 0.00
3821 v_mul_f32_e64 v25, -v8, v32 0 0.00
3822 v_mul_f32_e32 v1, v16, v32 0 0.00
3823 s_waitcnt vmcnt(0) 0 0.00
3824 v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s9 0 0.00
3825 s_movk_i32 s9, 0x0 0 0.00
3826 v_mac_f32_e32 v25, v15, v16 0 0.00
3827 v_mad_f32 v32, v15, v8, v1 0 0.00
3828 v_mul_lo_u32 v0, v0, 24 0 0.00
3829 v_add_f32_e32 v15, v22, v25 0 0.00
3830 v_add_f32_e32 v26, v23, v32 0 0.00
3831 v_mul_f32_e32 v28, v13, v15 0 0.00
3832 v_mul_f32_e32 v29, v14, v15 0 0.00
3833 v_mov_b32_e32 v15, v25 0 0.00
3834 v_mac_f32_e32 v28, v11, v26 0 0.00
3835 v_mac_f32_e32 v29, v12, v26 0 0.00
3836 v_add_f32_e32 v30, v18, v28 0 0.00
3837 v_add_f32_e32 v31, v19, v29 0 0.00
3838 v_mov_b32_e32 v28, v33 0 0.00
3839 v_mov_b32_e32 v29, v34 0 0.00
3840 s_waitcnt_depctr 0xffe3 0 0.00
3841 s_clause 0x1 0 0.00
3842 buffer_store_dword v7, v0, s[12:15], 0 offen glc 0 0.00
3843 buffer_store_dwordx4 v[28:31], v0, s[12:15], 0 offen offset:8 glc 0 0.00
3844 v_mov_b32_e32 v0, v10 0 0.00
3845 v_min3_f32 v24, v33, v30, v24 0 0.00
3846 v_min3_f32 v21, v34, v31, v21 0 0.00
3847 v_max3_f32 v20, v33, v30, v20 0 0.00
3848 v_max3_f32 v17, v34, v31, v17 0 0.00
3849 v_mov_b32_e32 v34, v31 0 0.00
3850 v_mov_b32_e32 v33, v30 0 0.00
3851 s_branch _L130 0 0.00
3852 _L129:
3853 s_mov_b32 exec_lo, s6 0 0.00
3854 v_mul_f32_e32 v0, v13, v6 0 0.00
3855 v_mul_f32_e32 v1, v14, v6 0 0.00
3856 v_mac_f32_e32 v0, v11, v27 0 0.00
3857 v_mac_f32_e32 v1, v12, v27 0 0.00
3858 v_add_f32_e32 v35, v18, v0 0 0.00
3859 s_waitcnt vmcnt(0) 0 0.00
3860 v_add_nc_u32_e32 v0, v3, v2 0 0.00
3861 v_add_f32_e32 v36, v19, v1 0 0.00
3862 v_min3_f32 v48, v33, v35, v24 0 0.00
3863 v_mul_lo_u32 v0, v0, 24 0 0.00
3864 v_min3_f32 v42, v34, v36, v21 0 0.00
3865 v_max3_f32 v107, v33, v35, v20 0 0.00
3866 v_max3_f32 v106, v34, v36, v17 0 0.00
3867 v_add_nc_u32_e32 v9, 0xffffffe8, v0 0 0.00
3868 v_add_nc_u32_e32 v10, -16, v0 0 0.00
3869 s_waitcnt_depctr 0xffe3 0 0.00
3870 buffer_store_dword v7, v9, s[12:15], 0 offen glc 0 0.00
3871 buffer_store_dwordx4 v[33:36], v10, s[12:15], 0 offen glc 0 0.00
3872 _L31:
3873 s_waitcnt_depctr 0xffe3 4 0.75 132
3874 s_mov_b32 exec_lo, s7 4 0.05 9
3875 v_cmp_gt_f32_sdwa s2, v107, v48 src0_sel:DWORD src1_sel:DWORD 4 0.01 1
3876 v_cmp_gt_f32_e32 vcc_lo, v106, v42 4 0.09 16
3877 s_or_b32 vcc_lo, s2, vcc_lo 4 0.01 2
3878 s_and_saveexec_b32 s2, vcc_lo 4 0.05 9
3879 s_cbranch_execz _L1 4 0.01 1
3880 BBF0_131:
3881 s_waitcnt lgkmcnt(0) 4 0.02 4
3882 s_load_dwordx4 s[8:11], s[0:1], 0x60 4 0.01 1
3883 v_floor_f32_e32 v3, v48 4 0.01 1
3884 v_floor_f32_e32 v2, v42 4 0.01 1
3885 v_mul_lo_u32 v4, v7, 24 4 0.02 4
3886 v_ceil_f32_e32 v1, v107 4 0.02 3
3887 v_cvt_i32_f32_e32 v2, v2 4 0.01 1
3888 v_cvt_i32_f32_e32 v3, v3 4 0.01 1
3889 v_ceil_f32_e32 v0, v106 4 0.01 1
3890 v_cvt_i32_f32_e32 v1, v1 4 0.01 1
3891 s_waitcnt lgkmcnt(0) 4 0.10 18
3892 s_waitcnt_depctr 0xffe3 4 0.01 2
3893 buffer_atomic_smin v3, v4, s[8:11], 0 offen 4 0.01 1
3894 buffer_atomic_smin v2, v4, s[8:11], 0 offen offset:4 4 0.01 1
3895 buffer_atomic_smax v1, v4, s[8:11], 0 offen offset:8 4 0.10 18
3896 v_cvt_i32_f32_e32 v1, v0 4 0.08 15
3897 buffer_atomic_smax v1, v4, s[8:11], 0 offen offset:12 4 0.01 1
3898 _L1:
3899 s_endpgm 4 38.24 6772
3900 s_code_end 0 0.00
3901 s_code_end 0 0.00
3902 s_code_end 0 0.00
3903 s_code_end 0 0.00
3904 s_code_end 0 0.00
3905 s_code_end 0 0.00
3906 s_code_end 0 0.00
3907 s_code_end 0 0.00
3908 s_code_end 0 0.00
3909 s_code_end 0 0.00
3910 s_code_end 0 0.00
3911 s_code_end 0 0.00
3912 s_code_end 0 0.00
3913 s_code_end 0 0.00
3914 s_code_end 0 0.00
3915 s_code_end 0 0.00
3916 s_code_end 0 0.00
3917 s_code_end 0 0.00
3918 s_code_end 0 0.00
3919 s_code_end 0 0.00
3920 s_code_end 0 0.00
3921 s_code_end 0 0.00
3922 s_code_end 0 0.00
3923 s_code_end 0 0.00
3924 s_code_end 0 0.00
3925 s_code_end 0 0.00
3926 s_code_end 0 0.00
3927 s_code_end 0 0.00
3928 s_code_end 0 0.00
3929 s_code_end 0 0.00
3930 s_code_end 0 0.00
3931 s_code_end 0 0.00
3932 s_code_end 0 0.00
3933 s_code_end 0 0.00
3934 s_code_end 0 0.00
3935 s_code_end 0 0.00
3936 s_code_end 0 0.00
3937 s_code_end 0 0.00
3938 s_code_end 0 0.00
3939 s_code_end 0 0.00
3940 s_code_end 0 0.00
3941 s_code_end 0 0.00
3942 s_code_end 0 0.00
3943 s_code_end 0 0.00
3944 s_code_end 0 0.00
3945 s_code_end 0 0.00
3946 s_code_end 0 0.00
3947 s_code_end 0 0.00
3948 s_code_end 0 0.00
3949 s_code_end 0 0.00
3950 s_code_end 0 0.00
3951 s_code_end 0 0.00
3952 s_code_end 0 0.00
3953 s_code_end 0 0.00
3954 s_code_end 0 0.00
3955 s_code_end 0 0.00
3956 s_code_end 0 0.00
3957 s_code_end 0 0.00
3958 s_code_end 0 0.00
3959 s_code_end 0 0.00
3960 s_code_end 0 0.00
3961 s_code_end 0 0.00
3962 s_code_end 0 0.00
3963 s_code_end 0 0.00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment