Last active
July 25, 2019 17:34
-
-
Save doitsujin/ddeefde765ad8c2b53de05dd30e6993c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DISASM: | |
BB0: | |
s_mov_b32 m0, s4 ; befc0004 | |
v_interp_p1_f32_e32 v4, v0, attr0.x ; d4100000 | |
v_interp_p2_f32_e32 v4, v1, attr0.x ; d4110001 | |
v_interp_p1_f32_e32 v5, v0, attr0.y ; d4140100 | |
v_interp_p2_f32_e32 v5, v1, attr0.y ; d4150101 | |
v_interp_p1_f32_e32 v6, v0, attr0.z ; d4180200 | |
v_interp_p2_f32_e32 v6, v1, attr0.z ; d4190201 | |
s_mov_b64 s[0:1], exec ; be80017e | |
s_wqm_b64 exec, exec ; befe077e | |
s_add_i32 s4, 32, s3 ; 810403a0 | |
s_add_i32 s6, 64, s3 ; 810603c0 | |
s_mov_b32 s7, 0 ; be870080 | |
s_load_dwordx4 s[8:11], s[6:7], 0x0 ; c00a0203 00000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
s_buffer_load_dwordx2 s[6:7], s[8:11], 0x20 ; c0260184 20000000 | |
s_buffer_load_dwordx4 s[12:15], s[8:11], 0x14 ; c02a0304 14000000 | |
s_mov_b32 s5, 0 ; be850080 | |
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; c00a0402 00000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
s_buffer_load_dword s4, s[16:19], 0xab4 ; c0220108 ab400000 | |
s_mov_b32 s5, s3 ; be850003 | |
s_mov_b32 s3, 0 ; be830080 | |
s_load_dwordx8 s[20:27], s[2:3], 0xc0 ; c00e0501 c0000000 | |
s_load_dwordx4 s[28:31], s[2:3], 0x20 ; c00a0701 20000000 | |
v_interp_p1_f32_e32 v7, v0, attr0.w ; d41c0300 | |
v_interp_p2_f32_e32 v7, v1, attr0.w ; d41d0301 | |
v_interp_p1_f32_e32 v8, v0, attr1.x ; d4200400 | |
v_interp_p2_f32_e32 v8, v1, attr1.x ; d4210401 | |
v_mov_b32_e32 v9, s6 ; 7e120206 | |
v_mov_b32_e32 v10, s7 ; 7e140207 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mul_f32_e32 v9, s4, v9 ; a1212040 | |
v_mul_f32_e32 v10, s4, v10 ; a1414040 | |
v_mac_f32_e32 v9, s13, v7 ; 2c120e0d | |
v_mac_f32_e32 v10, s13, v8 ; 2c14100d | |
v_mul_f32_e32 v9, 0.15915494, v9 ; a1212f80 | |
v_fract_f32_e32 v9, v9 ; 7e123709 | |
v_sin_f32_e32 v9, v9 ; 7e125309 | |
v_mul_f32_e32 v10, 0.15915494, v10 ; a1414f80 | |
v_fract_f32_e32 v10, v10 ; 7e14370a | |
v_cos_f32_e32 v10, v10 ; 7e14550a | |
v_rcp_f32_e32 v11, s13 ; 7e16440d | |
v_mul_f32_e32 v11, s14, v11 ; a16160e0 | |
v_mac_f32_e32 v7, v9, v11 ; 2c0e1709 | |
v_mac_f32_e32 v8, v10, v11 ; 2c10170a | |
image_sample v[8:11], v7, s[20:27], s[28:31] dmask:0xf ; f0800f00 e5080700 | |
s_add_i32 s4, 48, s5 ; 810405b0 | |
s_mov_b32 s5, 0 ; be850080 | |
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; c00a0102 00000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
s_buffer_load_dwordx2 s[14:15], s[4:7], 0x0 ; c0260382 00000000 | |
s_nop 0 ; bf800000 | |
s_buffer_load_dwordx2 s[4:5], s[4:7], 0x8 ; c0260102 80000000 | |
v_interp_p1_f32_e32 v7, v0, attr1.y ; d41c0500 | |
v_interp_p2_f32_e32 v7, v1, attr1.y ; d41d0501 | |
v_interp_p1_f32_e32 v12, v0, attr1.z ; d4300600 | |
v_interp_p2_f32_e32 v12, v1, attr1.z ; d4310601 | |
v_interp_p1_f32_e32 v0, v0, attr1.w ; d4000700 | |
v_interp_p2_f32_e32 v0, v1, attr1.w ; d4010701 | |
s_mov_b32 s13, 0x38d1b717 ; be8d00ff 38d1b717 | |
v_mul_f32_e32 v1, v12, v12 ; a02190c0 | |
v_mac_f32_e32 v1, v7, v7 ; 2c020f07 | |
v_mac_f32_e32 v1, v0, v0 ; 2c020100 | |
v_rsq_f32_e32 v1, v1 ; 7e024901 | |
v_mul_f32_e32 v7, v1, v7 ; a0e0f010 | |
v_mul_f32_e32 v12, v1, v12 ; a1819010 | |
v_mul_f32_e32 v0, v1, v0 ; a0001010 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mov_b32_e32 v1, s14 ; 7e02020e | |
v_cmp_lt_f32_e32 vcc, 0x3f800054, v1 ; 7c8202ff 3f800054 | |
v_cmp_lt_f32_e64 s[6:7], 0, s14 ; d0410006 1c800000 | |
s_cmp_lg_u64 0, vcc ; bf136a80 | |
s_mul_i32 s14, -1, scc ; 920efdc1 | |
s_cmp_lg_u64 0, s[6:7] ; bf130680 | |
s_mul_i32 s6, -1, scc ; 9206fdc1 | |
v_mov_b32_e32 v1, s5 ; 7e020205 | |
v_xor_b32_e32 v13, 0x80000000, v1 ; 2a1a02ff 80000000 | |
s_waitcnt vmcnt(0) ; bf8ccf70 | |
v_mul_f32_e32 v8, s12, v8 ; a10100c0 | |
v_mul_f32_e32 v9, s12, v9 ; a12120c0 | |
v_mul_f32_e32 v10, s12, v10 ; a14140c0 | |
v_subrev_f32_e32 v14, s15, v11 ; 61c160f0 | |
v_cmp_gt_f32_e64 s[20:21], s13, |v14| ; d0440214 21c0d000 | |
v_cndmask_b32_e64 v1, v13, v1, s[20:21] ; d1000001 52030d00 | |
v_cmp_gt_f32_e64 s[20:21], 0, v1 ; d0440014 20280000 | |
s_and_b64 s[20:21], vcc, s[20:21] ; 8694146a | |
s_and_b64 s[20:21], s[20:21], exec ; 86947e14 | |
s_mov_b64 exec, s[0:1] ; befe0100 | |
s_andn2_b64 exec, exec, s[20:21] ; 89fe147e | |
s_cbranch_scc1 3 ; bf850003 | |
exp null off, off, off, off done vm ; c4001890 80808080 | |
s_endpgm ; bf810000 | |
s_andn2_b32 s12, s6, s14 ; 890c0e06 | |
v_mul_f32_e32 v1, s5, v14 ; a021c050 | |
v_cmp_lt_f32_e64 vcc, v1, -s4 ; d041006a 40000901 | |
v_cndmask_b32_e64 v1, 0, 1, vcc ; d1000001 1a902800 | |
v_sub_u32_e32 v1, vcc, 0, v1 ; 34020280 | |
v_and_b32_e32 v1, s12, v1 ; 2602020c | |
v_cmp_ne_i32_e32 vcc, 0, v1 ; 7d8a0280 | |
s_and_b64 s[0:1], vcc, exec ; 86807e6a | |
s_andn2_b64 exec, exec, s[0:1] ; 89fe007e | |
s_cbranch_scc1 3 ; bf850003 | |
exp null off, off, off, off done vm ; c4001890 80808080 | |
s_endpgm ; bf810000 | |
s_load_dwordx8 s[20:27], s[2:3], 0x100 ; c00e0501 10000000 | |
s_load_dwordx4 s[4:7], s[2:3], 0x0 ; c00a0101 00000000 | |
s_buffer_load_dwordx4 s[28:31], s[16:19], 0xc ; c02a0708 c0000000 | |
s_buffer_load_dwordx4 s[32:35], s[16:19], 0x1c ; c02a0808 1c000000 | |
s_buffer_load_dwordx2 s[0:1], s[16:19], 0x0 ; c0260008 00000000 | |
s_buffer_load_dwordx4 s[36:39], s[16:19], 0x2c ; c02a0908 2c000000 | |
s_buffer_load_dword s12, s[16:19], 0x3c ; c0220308 3c000000 | |
s_buffer_load_dwordx2 s[14:15], s[16:19], 0xa90 ; c0260388 a9000000 | |
s_buffer_load_dwordx2 s[40:41], s[16:19], 0xaa0 ; c0260a08 aa000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mul_f32_e32 v1, s29, v5 ; a020a1d0 | |
v_mul_f32_e32 v13, s30, v5 ; a1a0a1e0 | |
v_mul_f32_e32 v14, s32, v5 ; a1c0a200 | |
v_mac_f32_e32 v1, s0, v4 ; 2c020800 | |
v_mac_f32_e32 v13, s1, v4 ; 2c1a0801 | |
v_mac_f32_e32 v14, s28, v4 ; 2c1c081c | |
v_mac_f32_e32 v1, s33, v6 ; 2c020c21 | |
v_mac_f32_e32 v13, s34, v6 ; 2c1a0c22 | |
v_mac_f32_e32 v14, s36, v6 ; 2c1c0c24 | |
v_add_f32_e64 v1, s37, v1 div:2 ; d1010001 18020225 | |
v_add_f32_e64 v13, s38, v13 div:2 ; d101000d 18021a26 | |
v_add_f32_e32 v14, s12, v14 ; 21c1c0c0 | |
v_rcp_f32_e32 v14, v14 ; 7e1c450e | |
v_mad_f32 v1, v1, v14, 0.5 ; d1c10001 3c21d010 | |
v_mad_f32 v13, v13, -v14, 0.5 ; d1c1000d 43c21d0d | |
v_cvt_i32_f32_e32 v14, s14 ; 7e1c100e | |
v_cvt_i32_f32_e32 v15, s15 ; 7e1e100f | |
v_cvt_f32_u32_e32 v14, v14 ; 7e1c0d0e | |
v_cvt_f32_u32_e32 v15, v15 ; 7e1e0d0f | |
v_rcp_f32_e32 v16, s40 ; 7e204428 | |
v_rcp_f32_e32 v17, s41 ; 7e224429 | |
v_rcp_f32_e32 v14, v14 ; 7e1c450e | |
v_rcp_f32_e32 v15, v15 ; 7e1e450f | |
v_mul_f32_e32 v16, v14, v16 ; a20210e0 | |
v_mul_f32_e32 v17, v15, v17 ; a22230f0 | |
v_rcp_f32_e32 v16, v16 ; 7e204510 | |
v_rcp_f32_e32 v17, v17 ; 7e224511 | |
v_mul_f32_e32 v1, v1, v16 ; a0221010 | |
v_mul_f32_e32 v13, v13, v17 ; a1a230d0 | |
v_mul_f32_e32 v16, v14, v1 ; a20030e0 | |
v_mul_f32_e32 v17, v15, v13 ; a221b0f0 | |
image_sample_lz v[20:23], v16, s[20:27], s[4:7] dmask:0xf ; f09c0f00 25141000 | |
s_load_dwordx8 s[20:27], s[2:3], 0x40 ; c00e0501 40000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
image_sample_lz v[16:19], v16, s[20:27], s[4:7] dmask:0xf ; f09c0f00 25101000 | |
s_load_dwordx8 s[20:27], s[2:3], 0x140 ; c00e0501 14000000 | |
s_buffer_load_dword s12, s[16:19], 0xa80 ; c0220308 a8000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_cvt_f32_i32_e32 v1, s12 ; 7e020a0c | |
v_rcp_f32_e32 v1, v1 ; 7e024501 | |
v_mul_f32_e32 v2, v2, v1 ; a0403020 | |
v_mul_f32_e32 v1, v3, v1 ; a0203030 | |
v_cvt_i32_f32_e32 v2, v2 ; 7e041102 | |
v_cvt_i32_f32_e32 v1, v1 ; 7e021101 | |
v_ashrrev_i32_e32 v2, 6, v2 ; 22040486 | |
v_ashrrev_i32_e32 v3, 6, v1 ; 22060286 | |
image_load v[1:2], v2, s[20:27] dmask:0x3 unorm ; f0001300 50102000 | |
s_waitcnt vmcnt(1) ; bf8ccf71 | |
v_mul_f32_e32 v3, s13, v16 ; a06200d0 | |
v_madmk_f32 v20, v20, 0x40a00000, v3 ; 2e280714 40a00000 | |
v_madmk_f32 v21, v21, 0x40a00000, v3 ; 2e2a0715 40a00000 | |
v_madmk_f32 v22, v22, 0x40a00000, v3 ; 2e2c0716 40a00000 | |
v_add_f32_e32 v3, v23, v3 ; 20607170 | |
v_mov_b32_e32 v13, 0 ; 7e1a0280 | |
v_mov_b32_e32 v14, 0 ; 7e1c0280 | |
v_mov_b32_e32 v15, 0 ; 7e1e0280 | |
v_mov_b32_e32 v23, 0 ; 7e2e0280 | |
v_mov_b32_e32 v24, 0 ; 7e300280 | |
v_mov_b32_e32 v25, 0 ; 7e320280 | |
s_wqm_b64 exec, exec ; befe077e | |
s_mov_b64 s[0:1], exec ; be80017e | |
s_mov_b32 s4, 0 ; be840080 | |
BB1: | |
s_waitcnt vmcnt(0) ; bf8ccf70 | |
v_cmp_ge_u32_e32 vcc, s4, v2 ; 7d9c0404 | |
s_and_saveexec_b64 s[6:7], vcc ; be86206a | |
s_cbranch_execz BB9 ; bf880002 | |
BB2: | |
s_andn2_b64 s[6:7], s[6:7], exec ; 89867e06 | |
s_cbranch_scc0 BB10 ; bf84006e | |
BB9: | |
s_mov_b64 exec, s[6:7] ; befe0106 | |
s_load_dwordx4 s[12:15], s[2:3], 0x180 ; c00a0301 18000000 | |
v_add_u32_e32 v26, vcc, 1, v1 ; 32340281 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
buffer_load_format_x v1, v1, s[12:15], 0 idxen ; e0002000 80030101 | |
s_waitcnt vmcnt(0) ; bf8ccf70 | |
v_add_u32_e32 v27, vcc, 33, v1 ; 323602a1 | |
v_lshlrev_b32_e32 v27, 4, v27 ; 24363684 | |
buffer_load_dword v28, v27, s[16:19], 0 offen offset:12 ; e050100c 80041c1b | |
buffer_load_dwordx3 v[29:31], v27, s[16:19], 0 offen ; e0581000 80041d1b | |
v_add_u32_e32 v1, vcc, 0x61, v1 ; 320202ff 61000000 | |
v_lshlrev_b32_e32 v1, 4, v1 ; 24020284 | |
buffer_load_dword v27, v1, s[16:19], 0 offen offset:12 ; e050100c 80041b01 | |
buffer_load_dwordx3 v[32:34], v1, s[16:19], 0 offen ; e0581000 80042001 | |
s_mov_b32 s5, 0x3ca3d70a ; be8500ff 3ca3d70a | |
s_add_u32 s4, s4, 1 ; 80048104 | |
s_waitcnt vmcnt(3) ; bf8ccf73 | |
v_cmp_lt_f32_e32 vcc, -0.5, v28 ; 7c8238f1 | |
v_cndmask_b32_e64 v1, 0, 1, vcc ; d1000001 1a902800 | |
v_sub_u32_e32 v1, vcc, 0, v1 ; 34020280 | |
v_cmp_le_f32_e32 vcc, 0x40200000, v28 ; 7c8638ff 40200000 | |
s_mov_b32 s6, 0x3fc00000 ; be8600ff 3fc00000 | |
v_cmp_le_f32_e64 s[6:7], s6, v28 ; d0430006 23806000 | |
v_cmp_le_f32_e64 s[12:13], 0.5, v28 ; d043000c 238f0000 | |
v_cmp_gt_f32_e64 s[14:15], 0.5, v28 ; d044000e 238f0000 | |
v_cndmask_b32_e64 v28, 0, 1.0, s[14:15] ; d100001c 39e48000 | |
v_cndmask_b32_e64 v35, 0, 1.0, s[12:13] ; d1000023 31e48000 | |
v_cndmask_b32_e64 v28, v28, 0, s[6:7] ; d100001c 19011c00 | |
v_cndmask_b32_e64 v35, v35, 0, s[6:7] ; d1000023 19012300 | |
v_cndmask_b32_e64 v36, 0, 1.0, s[6:7] ; d1000024 19e48000 | |
v_cndmask_b32_e64 v28, v28, 0, vcc ; d100001c 1a9011c0 | |
v_cndmask_b32_e64 v35, v35, 0, vcc ; d1000023 1a901230 | |
v_cndmask_b32_e64 v36, v36, 0, vcc ; d1000024 1a901240 | |
v_cndmask_b32_e64 v37, 0, 1.0, vcc ; d1000025 1a9e4800 | |
v_and_b32_e32 v28, v1, v28 ; 26383901 | |
v_and_b32_e32 v35, v1, v35 ; 26464701 | |
v_and_b32_e32 v36, v1, v36 ; 26484901 | |
v_and_b32_e32 v1, v1, v37 ; 26024b01 | |
v_mul_f32_e32 v35, v17, v35 ; a4647110 | |
v_mac_f32_e32 v35, v16, v28 ; 2c463910 | |
v_mac_f32_e32 v35, v18, v36 ; 2c464912 | |
v_mac_f32_e32 v35, v19, v1 ; 2c460313 | |
s_waitcnt vmcnt(2) ; bf8ccf72 | |
v_subrev_f32_e32 v1, v4, v29 ; 6023b040 | |
v_subrev_f32_e32 v28, v5, v30 ; 6383d050 | |
v_subrev_f32_e32 v29, v6, v31 ; 63a3f060 | |
v_mul_f32_e32 v30, v28, v28 ; a3c391c0 | |
v_mac_f32_e32 v30, v1, v1 ; 2c3c0301 | |
v_mac_f32_e32 v30, v29, v29 ; 2c3c3b1d | |
v_sqrt_f32_e32 v31, v30 ; 7e3e4f1e | |
v_rsq_f32_e32 v30, v30 ; 7e3c491e | |
v_mul_f32_e32 v1, v1, v30 ; a023d010 | |
v_mul_f32_e32 v28, v28, v30 ; a383d1c0 | |
v_mul_f32_e32 v29, v29, v30 ; a3a3d1d0 | |
v_mul_f32_e32 v28, v28, v12 ; a38191c0 | |
v_mac_f32_e32 v28, v1, v7 ; 2c380f01 | |
v_mad_f32 v1, v29, v0, v28 clamp ; d1c18001 472011d0 | |
s_waitcnt vmcnt(1) ; bf8ccf71 | |
v_rcp_f32_e32 v27, v27 ; 7e36451b | |
v_mul_f32_e64 v36, v31, v27 clamp ; d1058024 2371f000 | |
v_mad_f32 v36, v36, v36, s5 ; d1c10024 16492400 | |
v_rcp_f32_e32 v36, v36 ; 7e484524 | |
v_add_f32_e32 v36, 0xbf7afafb, v36 ; 24848ff0 bf7afafb | |
v_mul_f32_e32 v36, 0x3e50e560, v36 ; a4848ff0 3e50e560 | |
s_waitcnt vmcnt(0) ; bf8ccf70 | |
v_mul_f32_e32 v37, v36, v32 ; a4a41240 | |
v_mul_f32_e32 v38, v36, v33 ; a4c43240 | |
v_mul_f32_e32 v36, v36, v34 ; a4845240 | |
v_mul_f32_e32 v37, v35, v37 ; a4a4b230 | |
v_mul_f32_e32 v38, v35, v38 ; a4c4d230 | |
v_mul_f32_e32 v39, v35, v36 ; a4e49230 | |
v_mul_f32_e32 v27, v37, v1 ; a3603250 | |
v_mul_f32_e32 v28, v38, v1 ; a3803260 | |
v_mul_f32_e32 v1, v39, v1 ; a0203270 | |
v_add_f32_e32 v25, v37, v25 ; 23233250 | |
v_add_f32_e32 v24, v38, v24 ; 23031260 | |
v_add_f32_e32 v23, v39, v23 ; 22e2f270 | |
v_mac_f32_e32 v15, v27, v8 ; 2c1e111b | |
v_mac_f32_e32 v14, v28, v9 ; 2c1c131c | |
v_mac_f32_e32 v13, v1, v10 ; 2c1a1501 | |
v_mov_b32_e32 v1, v26 ; 7e02031a | |
s_branch BB1 ; bf82ff8c | |
BB10: | |
s_mov_b64 exec, s[0:1] ; befe0100 | |
s_buffer_load_dwordx4 s[4:7], s[8:11], 0x0 ; c02a0104 00000000 | |
s_buffer_load_dword s0, s[16:19], 0xac4 ; c0220008 ac400000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mac_f32_e32 v15, s4, v8 ; 2c1e1004 | |
v_mac_f32_e32 v14, s5, v9 ; 2c1c1205 | |
v_mac_f32_e32 v13, s6, v10 ; 2c1a1406 | |
s_cmp_lg_i32 s0, 0 ; bf018000 | |
s_cbranch_scc0 BB12 ; bf84003d | |
BB11: | |
s_load_dwordx8 s[4:11], s[2:3], 0x80 ; c00e0101 80000000 | |
s_nop 0 ; bf800000 | |
s_load_dwordx4 s[0:3], s[2:3], 0x10 ; c00a0001 10000000 | |
s_buffer_load_dword s12, s[16:19], 0xabc ; c0220308 abc00000 | |
s_buffer_load_dword s13, s[16:19], 0x1c0 ; c0220348 1c000000 | |
s_buffer_load_dwordx4 s[20:23], s[16:19], 0x1cc ; c02a0508 1cc00000 | |
s_buffer_load_dwordx4 s[24:27], s[16:19], 0x1e0 ; c02a0608 1e000000 | |
s_buffer_load_dwordx4 s[28:31], s[16:19], 0x1f0 ; c02a0708 1f000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mul_f32_e32 v1, s24, v12 ; a0218180 | |
v_mul_f32_e32 v2, s25, v12 ; a0418190 | |
v_mul_f32_e32 v12, s26, v12 ; a18181a0 | |
v_mac_f32_e32 v1, s21, v7 ; 2c020e15 | |
v_mac_f32_e32 v2, s22, v7 ; 2c040e16 | |
v_mac_f32_e32 v12, s23, v7 ; 2c180e17 | |
v_mac_f32_e32 v1, s28, v0 ; 2c02001c | |
v_mac_f32_e32 v2, s29, v0 ; 2c04001d | |
v_mac_f32_e32 v12, s30, v0 ; 2c18001e | |
v_cubema_f32 v0, v1, v2, v12 ; d1c70000 43205010 | |
v_rcp_f32_e64 v0, |v0| ; d1620100 10000000 | |
v_cubesc_f32 v7, v1, v2, v12 ; d1c50007 43205010 | |
v_madak_f32 v16, v7, v0, 0x3fc00000 ; 30200107 3fc00000 | |
v_cubetc_f32 v7, v1, v2, v12 ; d1c60007 43205010 | |
v_madak_f32 v17, v7, v0, 0x3fc00000 ; 30220107 3fc00000 | |
v_cubeid_f32 v18, v1, v2, v12 ; d1c40012 43205010 | |
image_sample v[0:2], v16, s[4:11], s[0:3] dmask:0x7 da ; f0804700 10010000 | |
v_mul_f32_e32 v7, s12, v25 ; a0e320c0 | |
v_mul_f32_e32 v12, s12, v24 ; a18300c0 | |
v_mul_f32_e32 v16, s12, v23 ; a202e0c0 | |
v_add_f32_e32 v7, s13, v7 ; 20e0e0d0 | |
v_add_f32_e32 v12, s13, v12 ; 218180d0 | |
v_add_f32_e32 v16, s13, v16 ; 220200d0 | |
v_add_f32_e32 v7, s20, v7 ; 20e0e140 | |
v_add_f32_e32 v12, s20, v12 ; 21818140 | |
v_add_f32_e32 v16, s20, v16 ; 22020140 | |
s_waitcnt vmcnt(0) ; bf8ccf70 | |
v_mul_f32_e32 v0, v7, v0 ; a0001070 | |
v_mul_f32_e32 v1, v12, v1 ; a02030c0 | |
v_mul_f32_e32 v2, v16, v2 ; a0405100 | |
v_mul_f32_e32 v0, v8, v0 ; a0001080 | |
v_mul_f32_e32 v1, v9, v1 ; a0203090 | |
v_mul_f32_e32 v2, v10, v2 ; a04050a0 | |
v_mul_f32_e32 v0, v3, v0 ; a0001030 | |
v_mul_f32_e32 v1, v3, v1 ; a0203030 | |
v_mul_f32_e32 v7, v3, v2 ; a0e05030 | |
s_branch BB13 ; bf820003 | |
BB12: | |
v_mov_b32_e32 v0, 0 ; 7e000280 | |
v_mov_b32_e32 v1, 0 ; 7e020280 | |
v_mov_b32_e32 v7, 0 ; 7e0e0280 | |
BB13: | |
s_buffer_load_dword s0, s[16:19], 0xb24 ; c0220008 b2400000 | |
v_mac_f32_e32 v0, v20, v8 ; 2c001114 | |
v_mac_f32_e32 v1, v21, v9 ; 2c021315 | |
v_mac_f32_e32 v7, v22, v10 ; 2c0e1516 | |
s_buffer_load_dword s1, s[16:19], 0xacc ; c0220048 acc00000 | |
v_add_f32_e32 v0, v0, v15 ; 2001f000 | |
v_add_f32_e32 v12, v1, v14 ; 2181d010 | |
v_add_f32_e32 v1, v7, v13 ; 2021b070 | |
v_mul_f32_e32 v2, 0x3fb8aa3b, v0 ; a0400ff0 3fb8aa3b | |
v_mul_f32_e32 v3, 0x3fb8aa3b, v12 ; a0618ff0 3fb8aa3b | |
v_mul_f32_e32 v7, 0x3fb8aa3b, v1 ; a0e02ff0 3fb8aa3b | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_cmp_lt_f32_e64 vcc, 1.0, s0 ; d041006a f2000000 | |
v_mul_f32_e32 v2, s0, v2 ; a0404000 | |
v_mul_f32_e32 v3, s0, v3 ; a0606000 | |
v_mul_f32_e32 v7, s0, v7 ; a0e0e000 | |
v_exp_f32_e64 v2, -v2 ; d1600002 20000102 | |
v_exp_f32_e64 v3, -v3 ; d1600003 20000103 | |
v_exp_f32_e64 v7, -v7 ; d1600007 20000107 | |
v_sub_f32_e32 v2, 1.0, v2 ; 40404f20 | |
v_sub_f32_e32 v3, 1.0, v3 ; 40606f20 | |
v_sub_f32_e32 v7, 1.0, v7 ; 40e0ef20 | |
v_cndmask_b32_e32 v0, v0, v2, vcc ; 50000000 | |
v_cndmask_b32_e32 v12, v12, v3, vcc ; 18070c00 | |
v_cndmask_b32_e32 v1, v1, v7, vcc ; 20f01000 | |
s_cmp_lg_i32 s1, 0 ; bf018001 | |
s_cbranch_scc0 BB18 ; bf84002f | |
BB14: | |
s_buffer_load_dword s0, s[16:19], 0xae8 ; c0220008 ae800000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_cmp_lt_f32_e64 vcc, 0, s0 ; d041006a 80000000 | |
s_cmp_lg_u64 0, vcc ; bf136a80 | |
s_cbranch_scc0 BB16 ; bf840008 | |
BB15: | |
s_buffer_load_dword s0, s[16:19], 0xae0 ; c0220008 ae000000 | |
s_buffer_load_dword s1, s[16:19], 0x158 ; c0220048 15800000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mov_b32_e32 v2, s0 ; 7e040200 | |
v_add_f32_e32 v2, s1, v2 ; 20404010 | |
s_branch BB17 ; bf820004 | |
BB16: | |
s_buffer_load_dword s0, s[16:19], 0xae0 ; c0220008 ae000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mov_b32_e32 v2, s0 ; 7e040200 | |
BB17: | |
s_buffer_load_dwordx4 s[0:3], s[16:19], 0x150 ; c02a0008 15000000 | |
s_buffer_load_dword s4, s[16:19], 0xae4 ; c0220108 ae400000 | |
s_buffer_load_dword s5, s[16:19], 0xaec ; c0220148 aec00000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_subrev_f32_e32 v3, s0, v4 ; 60608000 | |
v_subrev_f32_e32 v7, s1, v5 ; 60e0a010 | |
v_subrev_f32_e32 v8, s2, v6 ; 6100c020 | |
v_subrev_f32_e32 v2, v2, v6 ; 6040d020 | |
v_rcp_f32_e64 v9, |v8| ; d1620109 10800000 | |
v_mul_f32_e64 v2, v2, v9 clamp ; d1058002 21302000 | |
v_mul_f32_e32 v7, v7, v7 ; a0e0f070 | |
v_mac_f32_e32 v7, v3, v3 ; 2c0e0703 | |
v_mac_f32_e32 v7, v8, v8 ; 2c0e1108 | |
v_sqrt_f32_e32 v3, v7 ; 7e064f07 | |
v_mul_f32_e32 v2, v2, v3 ; a0407020 | |
v_rcp_f32_e32 v3, s4 ; 7e064404 | |
v_mul_f32_e64 v2, v2, v3 clamp ; d1058002 20702000 | |
v_log_f32_e32 v2, v2 ; 7e044302 | |
v_mul_f32_e32 v2, s5, v2 ; a0404050 | |
v_exp_f32_e32 v2, v2 ; 7e044102 | |
v_min_f32_e32 v2, 1.0, v2 ; 140404f2 | |
s_branch BB19 ; bf820001 | |
BB18: | |
v_mov_b32_e32 v2, 0 ; 7e040280 | |
BB19: | |
s_buffer_load_dword s0, s[16:19], 0xad0 ; c0220008 ad000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
s_cmp_lg_i32 s0, 0 ; bf018000 | |
s_cbranch_scc0 BB25 ; bf84002a | |
BB20: | |
s_buffer_load_dword s0, s[16:19], 0xaf8 ; c0220008 af800000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_cmp_lt_f32_e64 vcc, 0, s0 ; d041006a 80000000 | |
s_cmp_lg_u64 0, vcc ; bf136a80 | |
s_cbranch_scc0 BB22 ; bf840008 | |
BB21: | |
s_buffer_load_dword s0, s[16:19], 0xaf0 ; c0220008 af000000 | |
s_buffer_load_dword s1, s[16:19], 0x158 ; c0220048 15800000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mov_b32_e32 v3, s0 ; 7e060200 | |
v_add_f32_e32 v3, s1, v3 ; 20606010 | |
s_branch BB23 ; bf820004 | |
BB22: | |
s_buffer_load_dword s0, s[16:19], 0xaf0 ; c0220008 af000000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_mov_b32_e32 v3, s0 ; 7e060200 | |
BB23: | |
s_buffer_load_dwordx4 s[0:3], s[16:19], 0x150 ; c02a0008 15000000 | |
s_buffer_load_dword s4, s[16:19], 0xaf4 ; c0220108 af400000 | |
s_waitcnt lgkmcnt(0) ; bf8cc07f | |
v_subrev_f32_e32 v4, s0, v4 ; 60808000 | |
v_subrev_f32_e32 v5, s1, v5 ; 60a0a010 | |
v_subrev_f32_e32 v7, s2, v6 ; 60e0c020 | |
v_subrev_f32_e32 v3, v3, v6 ; 6060d030 | |
v_rcp_f32_e64 v6, |v7| ; d1620106 10700000 | |
v_mul_f32_e64 v3, v3, v6 clamp ; d1058003 20d03000 | |
v_mul_f32_e32 v5, v5, v5 ; a0a0b050 | |
v_mac_f32_e32 v5, v4, v4 ; 2c0a0904 | |
v_mac_f32_e32 v5, v7, v7 ; 2c0a0f07 | |
v_sqrt_f32_e32 v4, v5 ; 7e084f05 | |
v_mul_f32_e32 v3, v3, v4 ; a0609030 | |
v_rcp_f32_e32 v4, s4 ; 7e084404 | |
v_mul_f32_e64 v3, v3, v4 clamp ; d1058003 20903000 | |
v_add_f32_e32 v2, v3, v2 ; 20405030 | |
v_min_f32_e32 v2, 1.0, v2 ; 140404f2 | |
BB25: | |
v_sub_f32_e32 v2, 1.0, v2 ; 40404f20 | |
v_mul_f32_e32 v0, v0, v2 ; a0005000 | |
v_mul_f32_e32 v12, v12, v2 ; a18050c0 | |
v_mul_f32_e32 v1, v1, v2 ; a0205010 | |
v_mul_f32_e32 v2, 0x3e4ccccd, v15 ; a041eff0 3e4ccccd | |
v_mul_f32_e32 v3, 0x3e4ccccd, v14 ; a061cff0 3e4ccccd | |
v_mul_f32_e32 v4, 0x3e4ccccd, v13 ; a081aff0 3e4ccccd | |
v_min_f32_e32 v2, 1.0, v2 ; 140404f2 | |
v_min_f32_e32 v3, 1.0, v3 ; 140606f2 | |
v_min_f32_e32 v4, 1.0, v4 ; 140808f2 | |
v_cvt_pkrtz_f16_f32 v0, v0, v12 ; d2960000 21900000 | |
v_cvt_pkrtz_f16_f32 v1, v1, v11 ; d2960001 21701000 | |
exp mrt0 v0, off, v1, off compr ; c4000405 80800100 | |
v_cvt_pkrtz_f16_f32 v2, v2, v3 ; d2960002 20702000 | |
v_cvt_pkrtz_f16_f32 v3, v4, v11 ; d2960003 21704000 | |
exp mrt1 v2, off, v3, off done compr vm ; c4001c15 80800302 | |
s_endpgm ; bf810000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment