Skip to content

Instantly share code, notes, and snippets.

@doitsujin
Last active July 25, 2019 17:34
Show Gist options
  • Save doitsujin/ddeefde765ad8c2b53de05dd30e6993c to your computer and use it in GitHub Desktop.
Save doitsujin/ddeefde765ad8c2b53de05dd30e6993c to your computer and use it in GitHub Desktop.
DISASM:
BB0:
s_mov_b32 m0, s4 ; befc0004
v_interp_p1_f32_e32 v4, v0, attr0.x ; d4100000
v_interp_p2_f32_e32 v4, v1, attr0.x ; d4110001
v_interp_p1_f32_e32 v5, v0, attr0.y ; d4140100
v_interp_p2_f32_e32 v5, v1, attr0.y ; d4150101
v_interp_p1_f32_e32 v6, v0, attr0.z ; d4180200
v_interp_p2_f32_e32 v6, v1, attr0.z ; d4190201
s_mov_b64 s[0:1], exec ; be80017e
s_wqm_b64 exec, exec ; befe077e
s_add_i32 s4, 32, s3 ; 810403a0
s_add_i32 s6, 64, s3 ; 810603c0
s_mov_b32 s7, 0 ; be870080
s_load_dwordx4 s[8:11], s[6:7], 0x0 ; c00a0203 00000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
s_buffer_load_dwordx2 s[6:7], s[8:11], 0x20 ; c0260184 20000000
s_buffer_load_dwordx4 s[12:15], s[8:11], 0x14 ; c02a0304 14000000
s_mov_b32 s5, 0 ; be850080
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; c00a0402 00000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
s_buffer_load_dword s4, s[16:19], 0xab4 ; c0220108 ab400000
s_mov_b32 s5, s3 ; be850003
s_mov_b32 s3, 0 ; be830080
s_load_dwordx8 s[20:27], s[2:3], 0xc0 ; c00e0501 c0000000
s_load_dwordx4 s[28:31], s[2:3], 0x20 ; c00a0701 20000000
v_interp_p1_f32_e32 v7, v0, attr0.w ; d41c0300
v_interp_p2_f32_e32 v7, v1, attr0.w ; d41d0301
v_interp_p1_f32_e32 v8, v0, attr1.x ; d4200400
v_interp_p2_f32_e32 v8, v1, attr1.x ; d4210401
v_mov_b32_e32 v9, s6 ; 7e120206
v_mov_b32_e32 v10, s7 ; 7e140207
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mul_f32_e32 v9, s4, v9 ; a1212040
v_mul_f32_e32 v10, s4, v10 ; a1414040
v_mac_f32_e32 v9, s13, v7 ; 2c120e0d
v_mac_f32_e32 v10, s13, v8 ; 2c14100d
v_mul_f32_e32 v9, 0.15915494, v9 ; a1212f80
v_fract_f32_e32 v9, v9 ; 7e123709
v_sin_f32_e32 v9, v9 ; 7e125309
v_mul_f32_e32 v10, 0.15915494, v10 ; a1414f80
v_fract_f32_e32 v10, v10 ; 7e14370a
v_cos_f32_e32 v10, v10 ; 7e14550a
v_rcp_f32_e32 v11, s13 ; 7e16440d
v_mul_f32_e32 v11, s14, v11 ; a16160e0
v_mac_f32_e32 v7, v9, v11 ; 2c0e1709
v_mac_f32_e32 v8, v10, v11 ; 2c10170a
image_sample v[8:11], v7, s[20:27], s[28:31] dmask:0xf ; f0800f00 e5080700
s_add_i32 s4, 48, s5 ; 810405b0
s_mov_b32 s5, 0 ; be850080
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; c00a0102 00000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
s_buffer_load_dwordx2 s[14:15], s[4:7], 0x0 ; c0260382 00000000
s_nop 0 ; bf800000
s_buffer_load_dwordx2 s[4:5], s[4:7], 0x8 ; c0260102 80000000
v_interp_p1_f32_e32 v7, v0, attr1.y ; d41c0500
v_interp_p2_f32_e32 v7, v1, attr1.y ; d41d0501
v_interp_p1_f32_e32 v12, v0, attr1.z ; d4300600
v_interp_p2_f32_e32 v12, v1, attr1.z ; d4310601
v_interp_p1_f32_e32 v0, v0, attr1.w ; d4000700
v_interp_p2_f32_e32 v0, v1, attr1.w ; d4010701
s_mov_b32 s13, 0x38d1b717 ; be8d00ff 38d1b717
v_mul_f32_e32 v1, v12, v12 ; a02190c0
v_mac_f32_e32 v1, v7, v7 ; 2c020f07
v_mac_f32_e32 v1, v0, v0 ; 2c020100
v_rsq_f32_e32 v1, v1 ; 7e024901
v_mul_f32_e32 v7, v1, v7 ; a0e0f010
v_mul_f32_e32 v12, v1, v12 ; a1819010
v_mul_f32_e32 v0, v1, v0 ; a0001010
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v1, s14 ; 7e02020e
v_cmp_lt_f32_e32 vcc, 0x3f800054, v1 ; 7c8202ff 3f800054
v_cmp_lt_f32_e64 s[6:7], 0, s14 ; d0410006 1c800000
s_cmp_lg_u64 0, vcc ; bf136a80
s_mul_i32 s14, -1, scc ; 920efdc1
s_cmp_lg_u64 0, s[6:7] ; bf130680
s_mul_i32 s6, -1, scc ; 9206fdc1
v_mov_b32_e32 v1, s5 ; 7e020205
v_xor_b32_e32 v13, 0x80000000, v1 ; 2a1a02ff 80000000
s_waitcnt vmcnt(0) ; bf8ccf70
v_mul_f32_e32 v8, s12, v8 ; a10100c0
v_mul_f32_e32 v9, s12, v9 ; a12120c0
v_mul_f32_e32 v10, s12, v10 ; a14140c0
v_subrev_f32_e32 v14, s15, v11 ; 61c160f0
v_cmp_gt_f32_e64 s[20:21], s13, |v14| ; d0440214 21c0d000
v_cndmask_b32_e64 v1, v13, v1, s[20:21] ; d1000001 52030d00
v_cmp_gt_f32_e64 s[20:21], 0, v1 ; d0440014 20280000
s_and_b64 s[20:21], vcc, s[20:21] ; 8694146a
s_and_b64 s[20:21], s[20:21], exec ; 86947e14
s_mov_b64 exec, s[0:1] ; befe0100
s_andn2_b64 exec, exec, s[20:21] ; 89fe147e
s_cbranch_scc1 3 ; bf850003
exp null off, off, off, off done vm ; c4001890 80808080
s_endpgm ; bf810000
s_andn2_b32 s12, s6, s14 ; 890c0e06
v_mul_f32_e32 v1, s5, v14 ; a021c050
v_cmp_lt_f32_e64 vcc, v1, -s4 ; d041006a 40000901
v_cndmask_b32_e64 v1, 0, 1, vcc ; d1000001 1a902800
v_sub_u32_e32 v1, vcc, 0, v1 ; 34020280
v_and_b32_e32 v1, s12, v1 ; 2602020c
v_cmp_ne_i32_e32 vcc, 0, v1 ; 7d8a0280
s_and_b64 s[0:1], vcc, exec ; 86807e6a
s_andn2_b64 exec, exec, s[0:1] ; 89fe007e
s_cbranch_scc1 3 ; bf850003
exp null off, off, off, off done vm ; c4001890 80808080
s_endpgm ; bf810000
s_load_dwordx8 s[20:27], s[2:3], 0x100 ; c00e0501 10000000
s_load_dwordx4 s[4:7], s[2:3], 0x0 ; c00a0101 00000000
s_buffer_load_dwordx4 s[28:31], s[16:19], 0xc ; c02a0708 c0000000
s_buffer_load_dwordx4 s[32:35], s[16:19], 0x1c ; c02a0808 1c000000
s_buffer_load_dwordx2 s[0:1], s[16:19], 0x0 ; c0260008 00000000
s_buffer_load_dwordx4 s[36:39], s[16:19], 0x2c ; c02a0908 2c000000
s_buffer_load_dword s12, s[16:19], 0x3c ; c0220308 3c000000
s_buffer_load_dwordx2 s[14:15], s[16:19], 0xa90 ; c0260388 a9000000
s_buffer_load_dwordx2 s[40:41], s[16:19], 0xaa0 ; c0260a08 aa000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mul_f32_e32 v1, s29, v5 ; a020a1d0
v_mul_f32_e32 v13, s30, v5 ; a1a0a1e0
v_mul_f32_e32 v14, s32, v5 ; a1c0a200
v_mac_f32_e32 v1, s0, v4 ; 2c020800
v_mac_f32_e32 v13, s1, v4 ; 2c1a0801
v_mac_f32_e32 v14, s28, v4 ; 2c1c081c
v_mac_f32_e32 v1, s33, v6 ; 2c020c21
v_mac_f32_e32 v13, s34, v6 ; 2c1a0c22
v_mac_f32_e32 v14, s36, v6 ; 2c1c0c24
v_add_f32_e64 v1, s37, v1 div:2 ; d1010001 18020225
v_add_f32_e64 v13, s38, v13 div:2 ; d101000d 18021a26
v_add_f32_e32 v14, s12, v14 ; 21c1c0c0
v_rcp_f32_e32 v14, v14 ; 7e1c450e
v_mad_f32 v1, v1, v14, 0.5 ; d1c10001 3c21d010
v_mad_f32 v13, v13, -v14, 0.5 ; d1c1000d 43c21d0d
v_cvt_i32_f32_e32 v14, s14 ; 7e1c100e
v_cvt_i32_f32_e32 v15, s15 ; 7e1e100f
v_cvt_f32_u32_e32 v14, v14 ; 7e1c0d0e
v_cvt_f32_u32_e32 v15, v15 ; 7e1e0d0f
v_rcp_f32_e32 v16, s40 ; 7e204428
v_rcp_f32_e32 v17, s41 ; 7e224429
v_rcp_f32_e32 v14, v14 ; 7e1c450e
v_rcp_f32_e32 v15, v15 ; 7e1e450f
v_mul_f32_e32 v16, v14, v16 ; a20210e0
v_mul_f32_e32 v17, v15, v17 ; a22230f0
v_rcp_f32_e32 v16, v16 ; 7e204510
v_rcp_f32_e32 v17, v17 ; 7e224511
v_mul_f32_e32 v1, v1, v16 ; a0221010
v_mul_f32_e32 v13, v13, v17 ; a1a230d0
v_mul_f32_e32 v16, v14, v1 ; a20030e0
v_mul_f32_e32 v17, v15, v13 ; a221b0f0
image_sample_lz v[20:23], v16, s[20:27], s[4:7] dmask:0xf ; f09c0f00 25141000
s_load_dwordx8 s[20:27], s[2:3], 0x40 ; c00e0501 40000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
image_sample_lz v[16:19], v16, s[20:27], s[4:7] dmask:0xf ; f09c0f00 25101000
s_load_dwordx8 s[20:27], s[2:3], 0x140 ; c00e0501 14000000
s_buffer_load_dword s12, s[16:19], 0xa80 ; c0220308 a8000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_cvt_f32_i32_e32 v1, s12 ; 7e020a0c
v_rcp_f32_e32 v1, v1 ; 7e024501
v_mul_f32_e32 v2, v2, v1 ; a0403020
v_mul_f32_e32 v1, v3, v1 ; a0203030
v_cvt_i32_f32_e32 v2, v2 ; 7e041102
v_cvt_i32_f32_e32 v1, v1 ; 7e021101
v_ashrrev_i32_e32 v2, 6, v2 ; 22040486
v_ashrrev_i32_e32 v3, 6, v1 ; 22060286
image_load v[1:2], v2, s[20:27] dmask:0x3 unorm ; f0001300 50102000
s_waitcnt vmcnt(1) ; bf8ccf71
v_mul_f32_e32 v3, s13, v16 ; a06200d0
v_madmk_f32 v20, v20, 0x40a00000, v3 ; 2e280714 40a00000
v_madmk_f32 v21, v21, 0x40a00000, v3 ; 2e2a0715 40a00000
v_madmk_f32 v22, v22, 0x40a00000, v3 ; 2e2c0716 40a00000
v_add_f32_e32 v3, v23, v3 ; 20607170
v_mov_b32_e32 v13, 0 ; 7e1a0280
v_mov_b32_e32 v14, 0 ; 7e1c0280
v_mov_b32_e32 v15, 0 ; 7e1e0280
v_mov_b32_e32 v23, 0 ; 7e2e0280
v_mov_b32_e32 v24, 0 ; 7e300280
v_mov_b32_e32 v25, 0 ; 7e320280
s_wqm_b64 exec, exec ; befe077e
s_mov_b64 s[0:1], exec ; be80017e
s_mov_b32 s4, 0 ; be840080
BB1:
s_waitcnt vmcnt(0) ; bf8ccf70
v_cmp_ge_u32_e32 vcc, s4, v2 ; 7d9c0404
s_and_saveexec_b64 s[6:7], vcc ; be86206a
s_cbranch_execz BB9 ; bf880002
BB2:
s_andn2_b64 s[6:7], s[6:7], exec ; 89867e06
s_cbranch_scc0 BB10 ; bf84006e
BB9:
s_mov_b64 exec, s[6:7] ; befe0106
s_load_dwordx4 s[12:15], s[2:3], 0x180 ; c00a0301 18000000
v_add_u32_e32 v26, vcc, 1, v1 ; 32340281
s_waitcnt lgkmcnt(0) ; bf8cc07f
buffer_load_format_x v1, v1, s[12:15], 0 idxen ; e0002000 80030101
s_waitcnt vmcnt(0) ; bf8ccf70
v_add_u32_e32 v27, vcc, 33, v1 ; 323602a1
v_lshlrev_b32_e32 v27, 4, v27 ; 24363684
buffer_load_dword v28, v27, s[16:19], 0 offen offset:12 ; e050100c 80041c1b
buffer_load_dwordx3 v[29:31], v27, s[16:19], 0 offen ; e0581000 80041d1b
v_add_u32_e32 v1, vcc, 0x61, v1 ; 320202ff 61000000
v_lshlrev_b32_e32 v1, 4, v1 ; 24020284
buffer_load_dword v27, v1, s[16:19], 0 offen offset:12 ; e050100c 80041b01
buffer_load_dwordx3 v[32:34], v1, s[16:19], 0 offen ; e0581000 80042001
s_mov_b32 s5, 0x3ca3d70a ; be8500ff 3ca3d70a
s_add_u32 s4, s4, 1 ; 80048104
s_waitcnt vmcnt(3) ; bf8ccf73
v_cmp_lt_f32_e32 vcc, -0.5, v28 ; 7c8238f1
v_cndmask_b32_e64 v1, 0, 1, vcc ; d1000001 1a902800
v_sub_u32_e32 v1, vcc, 0, v1 ; 34020280
v_cmp_le_f32_e32 vcc, 0x40200000, v28 ; 7c8638ff 40200000
s_mov_b32 s6, 0x3fc00000 ; be8600ff 3fc00000
v_cmp_le_f32_e64 s[6:7], s6, v28 ; d0430006 23806000
v_cmp_le_f32_e64 s[12:13], 0.5, v28 ; d043000c 238f0000
v_cmp_gt_f32_e64 s[14:15], 0.5, v28 ; d044000e 238f0000
v_cndmask_b32_e64 v28, 0, 1.0, s[14:15] ; d100001c 39e48000
v_cndmask_b32_e64 v35, 0, 1.0, s[12:13] ; d1000023 31e48000
v_cndmask_b32_e64 v28, v28, 0, s[6:7] ; d100001c 19011c00
v_cndmask_b32_e64 v35, v35, 0, s[6:7] ; d1000023 19012300
v_cndmask_b32_e64 v36, 0, 1.0, s[6:7] ; d1000024 19e48000
v_cndmask_b32_e64 v28, v28, 0, vcc ; d100001c 1a9011c0
v_cndmask_b32_e64 v35, v35, 0, vcc ; d1000023 1a901230
v_cndmask_b32_e64 v36, v36, 0, vcc ; d1000024 1a901240
v_cndmask_b32_e64 v37, 0, 1.0, vcc ; d1000025 1a9e4800
v_and_b32_e32 v28, v1, v28 ; 26383901
v_and_b32_e32 v35, v1, v35 ; 26464701
v_and_b32_e32 v36, v1, v36 ; 26484901
v_and_b32_e32 v1, v1, v37 ; 26024b01
v_mul_f32_e32 v35, v17, v35 ; a4647110
v_mac_f32_e32 v35, v16, v28 ; 2c463910
v_mac_f32_e32 v35, v18, v36 ; 2c464912
v_mac_f32_e32 v35, v19, v1 ; 2c460313
s_waitcnt vmcnt(2) ; bf8ccf72
v_subrev_f32_e32 v1, v4, v29 ; 6023b040
v_subrev_f32_e32 v28, v5, v30 ; 6383d050
v_subrev_f32_e32 v29, v6, v31 ; 63a3f060
v_mul_f32_e32 v30, v28, v28 ; a3c391c0
v_mac_f32_e32 v30, v1, v1 ; 2c3c0301
v_mac_f32_e32 v30, v29, v29 ; 2c3c3b1d
v_sqrt_f32_e32 v31, v30 ; 7e3e4f1e
v_rsq_f32_e32 v30, v30 ; 7e3c491e
v_mul_f32_e32 v1, v1, v30 ; a023d010
v_mul_f32_e32 v28, v28, v30 ; a383d1c0
v_mul_f32_e32 v29, v29, v30 ; a3a3d1d0
v_mul_f32_e32 v28, v28, v12 ; a38191c0
v_mac_f32_e32 v28, v1, v7 ; 2c380f01
v_mad_f32 v1, v29, v0, v28 clamp ; d1c18001 472011d0
s_waitcnt vmcnt(1) ; bf8ccf71
v_rcp_f32_e32 v27, v27 ; 7e36451b
v_mul_f32_e64 v36, v31, v27 clamp ; d1058024 2371f000
v_mad_f32 v36, v36, v36, s5 ; d1c10024 16492400
v_rcp_f32_e32 v36, v36 ; 7e484524
v_add_f32_e32 v36, 0xbf7afafb, v36 ; 24848ff0 bf7afafb
v_mul_f32_e32 v36, 0x3e50e560, v36 ; a4848ff0 3e50e560
s_waitcnt vmcnt(0) ; bf8ccf70
v_mul_f32_e32 v37, v36, v32 ; a4a41240
v_mul_f32_e32 v38, v36, v33 ; a4c43240
v_mul_f32_e32 v36, v36, v34 ; a4845240
v_mul_f32_e32 v37, v35, v37 ; a4a4b230
v_mul_f32_e32 v38, v35, v38 ; a4c4d230
v_mul_f32_e32 v39, v35, v36 ; a4e49230
v_mul_f32_e32 v27, v37, v1 ; a3603250
v_mul_f32_e32 v28, v38, v1 ; a3803260
v_mul_f32_e32 v1, v39, v1 ; a0203270
v_add_f32_e32 v25, v37, v25 ; 23233250
v_add_f32_e32 v24, v38, v24 ; 23031260
v_add_f32_e32 v23, v39, v23 ; 22e2f270
v_mac_f32_e32 v15, v27, v8 ; 2c1e111b
v_mac_f32_e32 v14, v28, v9 ; 2c1c131c
v_mac_f32_e32 v13, v1, v10 ; 2c1a1501
v_mov_b32_e32 v1, v26 ; 7e02031a
s_branch BB1 ; bf82ff8c
BB10:
s_mov_b64 exec, s[0:1] ; befe0100
s_buffer_load_dwordx4 s[4:7], s[8:11], 0x0 ; c02a0104 00000000
s_buffer_load_dword s0, s[16:19], 0xac4 ; c0220008 ac400000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mac_f32_e32 v15, s4, v8 ; 2c1e1004
v_mac_f32_e32 v14, s5, v9 ; 2c1c1205
v_mac_f32_e32 v13, s6, v10 ; 2c1a1406
s_cmp_lg_i32 s0, 0 ; bf018000
s_cbranch_scc0 BB12 ; bf84003d
BB11:
s_load_dwordx8 s[4:11], s[2:3], 0x80 ; c00e0101 80000000
s_nop 0 ; bf800000
s_load_dwordx4 s[0:3], s[2:3], 0x10 ; c00a0001 10000000
s_buffer_load_dword s12, s[16:19], 0xabc ; c0220308 abc00000
s_buffer_load_dword s13, s[16:19], 0x1c0 ; c0220348 1c000000
s_buffer_load_dwordx4 s[20:23], s[16:19], 0x1cc ; c02a0508 1cc00000
s_buffer_load_dwordx4 s[24:27], s[16:19], 0x1e0 ; c02a0608 1e000000
s_buffer_load_dwordx4 s[28:31], s[16:19], 0x1f0 ; c02a0708 1f000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mul_f32_e32 v1, s24, v12 ; a0218180
v_mul_f32_e32 v2, s25, v12 ; a0418190
v_mul_f32_e32 v12, s26, v12 ; a18181a0
v_mac_f32_e32 v1, s21, v7 ; 2c020e15
v_mac_f32_e32 v2, s22, v7 ; 2c040e16
v_mac_f32_e32 v12, s23, v7 ; 2c180e17
v_mac_f32_e32 v1, s28, v0 ; 2c02001c
v_mac_f32_e32 v2, s29, v0 ; 2c04001d
v_mac_f32_e32 v12, s30, v0 ; 2c18001e
v_cubema_f32 v0, v1, v2, v12 ; d1c70000 43205010
v_rcp_f32_e64 v0, |v0| ; d1620100 10000000
v_cubesc_f32 v7, v1, v2, v12 ; d1c50007 43205010
v_madak_f32 v16, v7, v0, 0x3fc00000 ; 30200107 3fc00000
v_cubetc_f32 v7, v1, v2, v12 ; d1c60007 43205010
v_madak_f32 v17, v7, v0, 0x3fc00000 ; 30220107 3fc00000
v_cubeid_f32 v18, v1, v2, v12 ; d1c40012 43205010
image_sample v[0:2], v16, s[4:11], s[0:3] dmask:0x7 da ; f0804700 10010000
v_mul_f32_e32 v7, s12, v25 ; a0e320c0
v_mul_f32_e32 v12, s12, v24 ; a18300c0
v_mul_f32_e32 v16, s12, v23 ; a202e0c0
v_add_f32_e32 v7, s13, v7 ; 20e0e0d0
v_add_f32_e32 v12, s13, v12 ; 218180d0
v_add_f32_e32 v16, s13, v16 ; 220200d0
v_add_f32_e32 v7, s20, v7 ; 20e0e140
v_add_f32_e32 v12, s20, v12 ; 21818140
v_add_f32_e32 v16, s20, v16 ; 22020140
s_waitcnt vmcnt(0) ; bf8ccf70
v_mul_f32_e32 v0, v7, v0 ; a0001070
v_mul_f32_e32 v1, v12, v1 ; a02030c0
v_mul_f32_e32 v2, v16, v2 ; a0405100
v_mul_f32_e32 v0, v8, v0 ; a0001080
v_mul_f32_e32 v1, v9, v1 ; a0203090
v_mul_f32_e32 v2, v10, v2 ; a04050a0
v_mul_f32_e32 v0, v3, v0 ; a0001030
v_mul_f32_e32 v1, v3, v1 ; a0203030
v_mul_f32_e32 v7, v3, v2 ; a0e05030
s_branch BB13 ; bf820003
BB12:
v_mov_b32_e32 v0, 0 ; 7e000280
v_mov_b32_e32 v1, 0 ; 7e020280
v_mov_b32_e32 v7, 0 ; 7e0e0280
BB13:
s_buffer_load_dword s0, s[16:19], 0xb24 ; c0220008 b2400000
v_mac_f32_e32 v0, v20, v8 ; 2c001114
v_mac_f32_e32 v1, v21, v9 ; 2c021315
v_mac_f32_e32 v7, v22, v10 ; 2c0e1516
s_buffer_load_dword s1, s[16:19], 0xacc ; c0220048 acc00000
v_add_f32_e32 v0, v0, v15 ; 2001f000
v_add_f32_e32 v12, v1, v14 ; 2181d010
v_add_f32_e32 v1, v7, v13 ; 2021b070
v_mul_f32_e32 v2, 0x3fb8aa3b, v0 ; a0400ff0 3fb8aa3b
v_mul_f32_e32 v3, 0x3fb8aa3b, v12 ; a0618ff0 3fb8aa3b
v_mul_f32_e32 v7, 0x3fb8aa3b, v1 ; a0e02ff0 3fb8aa3b
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_cmp_lt_f32_e64 vcc, 1.0, s0 ; d041006a f2000000
v_mul_f32_e32 v2, s0, v2 ; a0404000
v_mul_f32_e32 v3, s0, v3 ; a0606000
v_mul_f32_e32 v7, s0, v7 ; a0e0e000
v_exp_f32_e64 v2, -v2 ; d1600002 20000102
v_exp_f32_e64 v3, -v3 ; d1600003 20000103
v_exp_f32_e64 v7, -v7 ; d1600007 20000107
v_sub_f32_e32 v2, 1.0, v2 ; 40404f20
v_sub_f32_e32 v3, 1.0, v3 ; 40606f20
v_sub_f32_e32 v7, 1.0, v7 ; 40e0ef20
v_cndmask_b32_e32 v0, v0, v2, vcc ; 50000000
v_cndmask_b32_e32 v12, v12, v3, vcc ; 18070c00
v_cndmask_b32_e32 v1, v1, v7, vcc ; 20f01000
s_cmp_lg_i32 s1, 0 ; bf018001
s_cbranch_scc0 BB18 ; bf84002f
BB14:
s_buffer_load_dword s0, s[16:19], 0xae8 ; c0220008 ae800000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_cmp_lt_f32_e64 vcc, 0, s0 ; d041006a 80000000
s_cmp_lg_u64 0, vcc ; bf136a80
s_cbranch_scc0 BB16 ; bf840008
BB15:
s_buffer_load_dword s0, s[16:19], 0xae0 ; c0220008 ae000000
s_buffer_load_dword s1, s[16:19], 0x158 ; c0220048 15800000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v2, s0 ; 7e040200
v_add_f32_e32 v2, s1, v2 ; 20404010
s_branch BB17 ; bf820004
BB16:
s_buffer_load_dword s0, s[16:19], 0xae0 ; c0220008 ae000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v2, s0 ; 7e040200
BB17:
s_buffer_load_dwordx4 s[0:3], s[16:19], 0x150 ; c02a0008 15000000
s_buffer_load_dword s4, s[16:19], 0xae4 ; c0220108 ae400000
s_buffer_load_dword s5, s[16:19], 0xaec ; c0220148 aec00000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_subrev_f32_e32 v3, s0, v4 ; 60608000
v_subrev_f32_e32 v7, s1, v5 ; 60e0a010
v_subrev_f32_e32 v8, s2, v6 ; 6100c020
v_subrev_f32_e32 v2, v2, v6 ; 6040d020
v_rcp_f32_e64 v9, |v8| ; d1620109 10800000
v_mul_f32_e64 v2, v2, v9 clamp ; d1058002 21302000
v_mul_f32_e32 v7, v7, v7 ; a0e0f070
v_mac_f32_e32 v7, v3, v3 ; 2c0e0703
v_mac_f32_e32 v7, v8, v8 ; 2c0e1108
v_sqrt_f32_e32 v3, v7 ; 7e064f07
v_mul_f32_e32 v2, v2, v3 ; a0407020
v_rcp_f32_e32 v3, s4 ; 7e064404
v_mul_f32_e64 v2, v2, v3 clamp ; d1058002 20702000
v_log_f32_e32 v2, v2 ; 7e044302
v_mul_f32_e32 v2, s5, v2 ; a0404050
v_exp_f32_e32 v2, v2 ; 7e044102
v_min_f32_e32 v2, 1.0, v2 ; 140404f2
s_branch BB19 ; bf820001
BB18:
v_mov_b32_e32 v2, 0 ; 7e040280
BB19:
s_buffer_load_dword s0, s[16:19], 0xad0 ; c0220008 ad000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
s_cmp_lg_i32 s0, 0 ; bf018000
s_cbranch_scc0 BB25 ; bf84002a
BB20:
s_buffer_load_dword s0, s[16:19], 0xaf8 ; c0220008 af800000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_cmp_lt_f32_e64 vcc, 0, s0 ; d041006a 80000000
s_cmp_lg_u64 0, vcc ; bf136a80
s_cbranch_scc0 BB22 ; bf840008
BB21:
s_buffer_load_dword s0, s[16:19], 0xaf0 ; c0220008 af000000
s_buffer_load_dword s1, s[16:19], 0x158 ; c0220048 15800000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v3, s0 ; 7e060200
v_add_f32_e32 v3, s1, v3 ; 20606010
s_branch BB23 ; bf820004
BB22:
s_buffer_load_dword s0, s[16:19], 0xaf0 ; c0220008 af000000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v3, s0 ; 7e060200
BB23:
s_buffer_load_dwordx4 s[0:3], s[16:19], 0x150 ; c02a0008 15000000
s_buffer_load_dword s4, s[16:19], 0xaf4 ; c0220108 af400000
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_subrev_f32_e32 v4, s0, v4 ; 60808000
v_subrev_f32_e32 v5, s1, v5 ; 60a0a010
v_subrev_f32_e32 v7, s2, v6 ; 60e0c020
v_subrev_f32_e32 v3, v3, v6 ; 6060d030
v_rcp_f32_e64 v6, |v7| ; d1620106 10700000
v_mul_f32_e64 v3, v3, v6 clamp ; d1058003 20d03000
v_mul_f32_e32 v5, v5, v5 ; a0a0b050
v_mac_f32_e32 v5, v4, v4 ; 2c0a0904
v_mac_f32_e32 v5, v7, v7 ; 2c0a0f07
v_sqrt_f32_e32 v4, v5 ; 7e084f05
v_mul_f32_e32 v3, v3, v4 ; a0609030
v_rcp_f32_e32 v4, s4 ; 7e084404
v_mul_f32_e64 v3, v3, v4 clamp ; d1058003 20903000
v_add_f32_e32 v2, v3, v2 ; 20405030
v_min_f32_e32 v2, 1.0, v2 ; 140404f2
BB25:
v_sub_f32_e32 v2, 1.0, v2 ; 40404f20
v_mul_f32_e32 v0, v0, v2 ; a0005000
v_mul_f32_e32 v12, v12, v2 ; a18050c0
v_mul_f32_e32 v1, v1, v2 ; a0205010
v_mul_f32_e32 v2, 0x3e4ccccd, v15 ; a041eff0 3e4ccccd
v_mul_f32_e32 v3, 0x3e4ccccd, v14 ; a061cff0 3e4ccccd
v_mul_f32_e32 v4, 0x3e4ccccd, v13 ; a081aff0 3e4ccccd
v_min_f32_e32 v2, 1.0, v2 ; 140404f2
v_min_f32_e32 v3, 1.0, v3 ; 140606f2
v_min_f32_e32 v4, 1.0, v4 ; 140808f2
v_cvt_pkrtz_f16_f32 v0, v0, v12 ; d2960000 21900000
v_cvt_pkrtz_f16_f32 v1, v1, v11 ; d2960001 21701000
exp mrt0 v0, off, v1, off compr ; c4000405 80800100
v_cvt_pkrtz_f16_f32 v2, v2, v3 ; d2960002 20702000
v_cvt_pkrtz_f16_f32 v3, v4, v11 ; d2960003 21704000
exp mrt1 v2, off, v3, off done compr vm ; c4001c15 80800302
s_endpgm ; bf810000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment