Skip to content

Instantly share code, notes, and snippets.

@reinsteam
Last active January 15, 2020 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reinsteam/d0b525b7bbb1a72e506e35d75f39b405 to your computer and use it in GitHub Desktop.
Save reinsteam/d0b525b7bbb1a72e506e35d75f39b405 to your computer and use it in GitHub Desktop.
/*
set isa_file=%~1.isa
set analysis_file=%~1.a
set isa_file
rga --define COMPILER_AMD_RGA=1 --source-kind hlsl --asic Pitcairn --profile cs_5_0 --function %2 --intrinsics --isa %isa_file% %1
*/
#if COMPILER_AMD_RGA
#include "ags_shader_intrinsics_dx11.hlsl"
uint2 ballot(bool pred)
{
return AmdDxExtShaderIntrinsics_Ballot(pred);
}
uint ReadFirstLane(uint x)
{
return AmdDxExtShaderIntrinsics_ReadfirstlaneU(x);
}
float ReadFirstLane(float x)
{
return AmdDxExtShaderIntrinsics_ReadfirstlaneF(x);
}
uint ReadLane(uint x, uint laneId)
{
return AmdDxExtShaderIntrinsics_ReadlaneU(x, laneId);
}
bool AnyExecSet()
{
return any(ballot(true));
}
#endif
Texture2D<uint> InTexture : register(t0);
RWTexture2D<uint> OutTexture : register(u0);
[numthreads(8, 8, 1)]
void main(uint2 TexelId : SV_DispatchThreadId)
{
const uint VgprData = InTexture[TexelId];
uint NumDivergentGroups = 0;
#if 1
for (; ; ++NumDivergentGroups)
{
const uint SgprData = ReadFirstLane(VgprData);
if (SgprData == VgprData)
{
break;
}
if (AnyExecSet() == false)
{
break;
}
}
#endif
OutTexture[TexelId] = NumDivergentGroups;
}
/*
; -------- Disassembly --------------------
shader main
asic(SI)
type(CS)
v_mad_u32_u24 v0, s12, 8, v0 // 00000000: D2860000 0401100C
v_mad_u32_u24 v1, s13, 8, v1 // 00000008: D2860001 0405100D
s_load_dwordx8 s[12:19], s[2:3], 0x00 // 00000010: C0C60300
s_waitcnt lgkmcnt(0) // 00000014: BF8C007F
image_load v2, v[0:3], s[12:19] unorm // 00000018: F0001100 00030200
s_mov_b64 s[0:1], exec // 00000020: BE80047E
s_mov_b64 s[2:3], exec // 00000024: BE82047E
v_mov_b32 v3, 0 // 00000028: 7E060280
s_nop 0x0000 // 0000002C: BF800000
s_nop 0x0000 // 00000030: BF800000
s_nop 0x0000 // 00000034: BF800000
s_nop 0x0000 // 00000038: BF800000
s_nop 0x0000 // 0000003C: BF800000
label_0010:
s_waitcnt vmcnt(0) // 00000040: BF8C0F70
v_readfirstlane_b32 s12, v2 // 00000044: 7E180502
v_cmp_eq_i32 vcc, s12, v2 // 00000048: 7D04040C
s_and_saveexec_b64 s[12:13], vcc // 0000004C: BE8C246A
s_andn2_b64 s[2:3], s[2:3], exec // 00000050: 8A827E02
s_cbranch_scc0 label_001C // 00000054: BF840006
s_and_b64 exec, s[12:13], s[2:3] // 00000058: 87FE020C
s_or_b32 s12, exec_lo, exec_hi // 0000005C: 880C7F7E
s_cmp_eq_i32 s12, 0 // 00000060: BF00800C
s_cbranch_scc1 label_001C // 00000064: BF850002
v_add_i32 v3, vcc, 1, v3 // 00000068: 4A060681
s_branch label_0010 // 0000006C: BF82FFF4
label_001C:
s_mov_b64 exec, s[0:1] // 00000070: BEFE0400
v_mov_b32 v2, v3 // 00000074: 7E040303
v_mov_b32 v4, v3 // 00000078: 7E080303
v_mov_b32 v5, v3 // 0000007C: 7E0A0303
image_store v[2:5], v[0:3], s[4:11] dmask:0xf unorm glc // 00000080: F0203F00 00010200
s_endpgm // 00000088: BF810000
end
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment