Last active
March 22, 2022 19:50
-
-
Save JustinTArthur/3edfa2237854878ff4ae56cbd2861ada to your computer and use it in GitHub Desktop.
Radeon R600 GPU Code for Signal Waiting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module &signal:1:0:$full:$large:$default; | |
extension "amd:gcn"; | |
extension "IMAGE"; | |
// Sample Kernel From Michael Körber | |
prog kernel &__signal_test_kernel( | |
kernarg_u64 %val, | |
kernarg_sig64 %handle_in, | |
kernarg_sig64 %handle_out) | |
{ | |
@__OpenCL_signal_test_kernel_entry: | |
ld_kernarg_align(8)_width(all)_sig64 $d0, [%handle_in]; | |
ld_kernarg_align(8)_width(all)_sig64 $d1, [%handle_out]; | |
// load value pointer to d0 | |
ld_kernarg_align(8)_width(all)_u64 $d2, [%val]; | |
@loop: | |
atomicnoret_add_global_scacq_wg_s64 [$d2], 1; | |
signal_wait_eq_rlx_s64_sig64 $d1, $d0, 1; | |
cmp_ne_b1_s64 $c1, $d1, 1; | |
cbr_width(all)_b1 $c1, @loop; | |
ret; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
asic(VI) | |
type(CS) | |
// | |
// amd_kernel_code_t for &__signal_test_kernel (000000000000 - 000000000100) | |
// | |
// | |
// &__signal_test_kernel: | |
// | |
s_mov_b32 m0, 0x00010000 // 000000000100: BEFC00FF 00010000 | |
s_load_dwordx2 s[0:1], s[4:5], 0x08 // 000000000108: C0060002 00000008 | |
s_load_dwordx2 s[2:3], s[4:5], 0x00 // 000000000110: C0060082 00000000 | |
s_mov_b64 s[4:5], exec // 000000000118: BE84017E | |
label_0047: | |
s_waitcnt lgkmcnt(0) // 00000000011C: BF8C007F | |
v_mov_b32 v0, s2 // 000000000120: 7E000202 | |
v_mov_b32 v1, s3 // 000000000124: 7E020203 | |
v_lshlrev_b64 v[2:3], 0, 1 // 000000000128: D28F0002 00010280 | |
flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] // 000000000130: DD880000 00000200 | |
s_memrealtime s[6:7] // 000000000138: C0940180 00000000 | |
s_mov_b64 s[8:9], exec // 000000000140: BE88017E | |
s_mov_b64 s[10:11], exec // 000000000144: BE8A017E | |
s_nop 0x0000 // 000000000148: BF800000 | |
s_nop 0x0000 // 00000000014C: BF800000 | |
s_nop 0x0000 // 000000000150: BF800000 | |
s_nop 0x0000 // 000000000154: BF800000 | |
s_nop 0x0000 // 000000000158: BF800000 | |
s_nop 0x0000 // 00000000015C: BF800000 | |
label_0058: | |
v_add_u32 v0, s[12:13], s0, 8 // 000000000160: D1190C00 00011000 | |
v_mov_b32 v1, s1 // 000000000168: 7E020201 | |
v_addc_u32 v1, vcc, v1, 0, s[12:13] // 00000000016C: D11C6A01 00310101 | |
flat_load_dwordx2 v[2:3], v[0:1] glc // 000000000174: DC550000 02000000 | |
s_memrealtime s[12:13] // 00000000017C: C0940300 00000000 | |
s_waitcnt lgkmcnt(0) // 000000000184: BF8C007F | |
s_sub_u32 s12, s12, s6 // 000000000188: 808C060C | |
s_subb_u32 s13, s13, s7 // 00000000018C: 828D070D | |
v_mov_b32 v0, 0x0000ffff // 000000000190: 7E0002FF 0000FFFF | |
v_mov_b32 v1, 0 // 000000000198: 7E020280 | |
v_cmp_ge_u64 s[12:13], s[12:13], v[0:1] // 00000000019C: D0EE000C 0002000C | |
s_waitcnt vmcnt(0) // 0000000001A4: BF8C0F70 | |
v_cmp_eq_i64 s[14:15], v[2:3], 1 // 0000000001A8: D0E2000E 00010302 | |
s_or_b64 s[12:13], s[14:15], s[12:13] // 0000000001B0: 878C0C0E | |
s_mov_b64 s[14:15], exec // 0000000001B4: BE8E017E | |
s_andn2_b64 exec, s[14:15], s[12:13] // 0000000001B8: 89FE0C0E | |
s_cbranch_execz label_0070 // 0000000001BC: BF880000 | |
label_0070: | |
s_andn2_b64 exec, s[14:15], exec // 0000000001C0: 89FE7E0E | |
s_cbranch_execz label_0074 // 0000000001C4: BF880002 | |
s_andn2_b64 s[10:11], s[10:11], exec // 0000000001C8: 898A7E0A | |
s_cbranch_scc0 label_0077 // 0000000001CC: BF840003 | |
label_0074: | |
s_and_b64 exec, s[14:15], s[10:11] // 0000000001D0: 86FE0A0E | |
s_sleep 0x0001 // 0000000001D4: BF8E0001 | |
s_branch label_0058 // 0000000001D8: BF82FFE1 | |
label_0077: | |
s_mov_b64 exec, s[8:9] // 0000000001DC: BEFE0108 | |
v_cmp_ne_i64 s[6:7], v[2:3], 1 // 0000000001E0: D0E50006 00010302 | |
s_mov_b64 s[8:9], exec // 0000000001E8: BE88017E | |
s_andn2_b64 exec, s[8:9], s[6:7] // 0000000001EC: 89FE0608 | |
s_cbranch_execz label_007F // 0000000001F0: BF880002 | |
s_andn2_b64 s[4:5], s[4:5], exec // 0000000001F4: 89847E04 | |
s_cbranch_scc0 label_0081 // 0000000001F8: BF840002 | |
label_007F: | |
s_and_b64 exec, s[8:9], s[4:5] // 0000000001FC: 86FE0408 | |
s_branch label_0047 // 000000000200: BF82FFC6 | |
label_0081: | |
s_endpgm // 000000000204: BF810000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
asic(VI) | |
type(CS) | |
// | |
// amd_kernel_code_t for &__signal_test_kernel (000000000000 - 000000000100) | |
// | |
// | |
// &__signal_test_kernel: | |
// | |
s_mov_b32 m0, 0x00010000 // 000000000100: BEFC00FF 00010000 | |
s_load_dwordx4 s[0:3], s[4:5], 0x00 // 000000000108: C00A0002 00000000 | |
label_0044: | |
v_lshlrev_b64 v[0:1], 0, 1 // 000000000110: D28F0000 00010280 | |
s_waitcnt lgkmcnt(0) // 000000000118: BF8C007F | |
v_mov_b32 v2, s0 // 00000000011C: 7E040200 | |
v_mov_b32 v3, s1 // 000000000120: 7E060201 | |
flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] // 000000000124: DD880000 00000002 | |
s_memrealtime s[4:5] // 00000000012C: C0940100 00000000 | |
s_nop 0x0000 // 000000000134: BF800000 | |
s_nop 0x0000 // 000000000138: BF800000 | |
s_nop 0x0000 // 00000000013C: BF800000 | |
label_0050: | |
s_add_u32 s6, s2, 8 // 000000000140: 80068802 | |
s_addc_u32 s7, s3, 0 // 000000000144: 82078003 | |
v_mov_b32 v0, s6 // 000000000148: 7E000206 | |
v_mov_b32 v1, s7 // 00000000014C: 7E020207 | |
flat_load_dwordx2 v[2:3], v[0:1] glc // 000000000150: DC550000 02000000 | |
s_memrealtime s[6:7] // 000000000158: C0940180 00000000 | |
s_waitcnt lgkmcnt(0) // 000000000160: BF8C007F | |
s_sub_u32 s6, s6, s4 // 000000000164: 80860406 | |
s_subb_u32 s7, s7, s5 // 000000000168: 82870507 | |
v_mov_b32 v0, 0x0000ffff // 00000000016C: 7E0002FF 0000FFFF | |
v_mov_b32 v1, 0 // 000000000174: 7E020280 | |
v_cmp_ge_u64 vcc, s[6:7], v[0:1] // 000000000178: 7DDC0006 | |
s_waitcnt vmcnt(0) // 00000000017C: BF8C0F70 | |
v_cmp_eq_i64 s[6:7], v[2:3], 1 // 000000000180: D0E20006 00010302 | |
s_or_b64 vcc, s[6:7], vcc // 000000000188: 87EA6A06 | |
s_and_b64 vcc, vcc, exec // 00000000018C: 86EA7E6A | |
s_cbranch_vccnz label_0067 // 000000000190: BF870002 | |
s_sleep 0x0001 // 000000000194: BF8E0001 | |
s_branch label_0050 // 000000000198: BF82FFE9 | |
label_0067: | |
v_lshlrev_b64 v[0:1], 0, 1 // 00000000019C: D28F0000 00010280 | |
v_cmp_eq_i64 vcc, v[2:3], v[0:1] // 0000000001A4: 7DC40102 | |
s_cbranch_vccz label_0044 // 0000000001A8: BF86FFD9 | |
s_endpgm // 0000000001AC: BF810000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment