Skip to content

Instantly share code, notes, and snippets.

@karolherbst
Created August 22, 2024 15:48
Show Gist options
  • Save karolherbst/4dddd5dc5686f06fe928bf3ccb3729dc to your computer and use it in GitHub Desktop.
Save karolherbst/4dddd5dc5686f06fe928bf3ccb3729dc to your computer and use it in GitHub Desktop.
Compute Shader:
Shader main disassembly:
BB0:
s_mov_b32 s0, s2 ; be800302
s_bfm_b32 s1, 1, 15 ; 92018f81
s_movk_i32 s2, 0x600 ; b0020600
s_mov_b32 s3, 0x31016fac ; be8303ff 31016fac
s_clause 0x2 ; bfa10002
s_buffer_load_dwordx4 s[8:11], s[0:3], null ; f4280200 fa000000
s_buffer_load_dwordx8 s[16:23], s[0:3], 0x20 ; f42c0400 fa000020
s_buffer_load_dwordx8 s[24:31], s[0:3], 0x40 ; f42c0600 fa000040
s_and_b32 s4, s4, 0x3ff ; 8704ff04 000003ff
s_waitcnt lgkmcnt(0) ; bf8cc07f
v_mov_b32_e32 v1, s9 ; 7e020209
v_mov_b32_e32 v2, s17 ; 7e040211
s_add_u32 s5, s5, s24 ; 80051805
s_addc_u32 s25, 0, s25 ; 82191980
s_mul_hi_u32 s0, s5, s4 ; 9a800405
s_mul_i32 s5, s5, s4 ; 93050405
v_add_co_u32 v0, s[2:3], s5, v0 ; d70f0200 00020005
s_mul_i32 s25, s25, s4 ; 93190419
s_add_u32 s0, s0, s25 ; 80001900
v_add_co_u32 v4, vcc, s16, v0 ; d70f6a04 00020010
v_add_co_ci_u32_e64 v3, s[2:3], 0, s0, s[2:3] ; d5280203 00080080
v_add_co_ci_u32_e32 v5, vcc, v3, v2, vcc ; 500a0503
v_lshlrev_b64 v[2:3], 2, v[4:5] ; d6ff0002 00020882
v_add_co_u32 v4, vcc, s8, v2 ; d70f6a04 00020408
v_add_co_ci_u32_e32 v5, vcc, v1, v3, vcc ; 500a0701
global_load_dword v4, v[4:5], off ; dc308000 047d0004
v_mov_b32_e32 v5, s11 ; 7e0a020b
v_add_co_u32 v0, vcc, s10, v2 ; d70f6a00 0002040a
v_add_co_ci_u32_e32 v1, vcc, v5, v3, vcc ; 50020705
s_waitcnt vmcnt(0) ; bf8c3f70
v_lshrrev_b32_e32 v4, 9, v4 ; 2c080889
v_mul_hi_u32 v4, v4, 0xff803fe1 ; d56a0004 0001ff04 ff803fe1
v_lshrrev_b32_e32 v4, 9, v4 ; 2c080889
global_store_dword v[0:1], v4, off ; dc708000 007d0400
s_endpgm ; bf810000
Compute Shader:
Shader main disassembly:
main:
BB0_0:
s_mov_b32 s0, s2 ; BE800302
s_mov_b32 s3, 0x31016fac ; BE8303FF 31016FAC
s_movk_i32 s2, 0x600 ; B0020600
s_mov_b32 s1, 0x8000 ; BE8103FF 00008000
s_clause 0x2 ; BFA10002
s_buffer_load_dwordx2 s[6:7], s[0:3], 0x40 ; F4240180 FA000040
s_buffer_load_dwordx2 s[8:9], s[0:3], 0x20 ; F4240200 FA000020
s_buffer_load_dwordx4 s[0:3], s[0:3], 0x0 ; F4280000 FA000000
s_and_b32 s4, s4, 0x3ff ; 8704FF04 000003FF
s_waitcnt lgkmcnt(0) ; BF8CC07F
s_add_u32 s5, s6, s5 ; 80050506
s_addc_u32 s6, s7, 0 ; 82068007
s_mul_hi_u32 s7, s5, s4 ; 9A870405
s_mul_i32 s6, s4, s6 ; 93060604
s_mul_i32 s4, s4, s5 ; 93040504
s_add_i32 s6, s6, s7 ; 81060706
s_add_u32 s4, s8, s4 ; 80040408
s_addc_u32 s6, s9, s6 ; 82060609
v_add_co_u32 v0, s[4:5], s4, v0 ; D70F0400 00020004
v_add_co_ci_u32_e64 v1, null, s6, 0, s[4:5] ; D5287D01 00110006
v_lshlrev_b64 v[0:1], 2, v[0:1] ; D6FF0000 00020082
v_add_co_u32 v2, vcc, s0, v0 ; D70F6A02 00020000
v_add_co_ci_u32_e32 v3, vcc, s1, v1, vcc ; 50060201
v_add_co_u32 v0, vcc, s2, v0 ; D70F6A00 00020002
v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc ; 50020203
global_load_dword v2, v[2:3], off ; DC308000 027D0002
s_waitcnt vmcnt(0) ; BF8C3F70
v_lshrrev_b32_e32 v2, 9, v2 ; 2C040489
v_mul_hi_i32_i24_e32 v2, 0xff803fe1, v2 ; 140404FF FF803FE1
v_lshrrev_b32_e32 v2, 9, v2 ; 2C040489
global_store_dword v[0:1], v2, off ; DC708000 007D0200
s_endpgm ; BF810000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment