Skip to content

Instantly share code, notes, and snippets.

@rygorous
Last active December 31, 2015 07:39
Embed
What would you like to do?
I got nothing.
RWBuffer<uint2> output;
[numthreads(4,1,1)]
void main(uint3 invoc : SV_GroupThreadID)
{
output[invoc.x] = uint2((invoc.x & 1) * 16, (invoc.x & 2) * 8);
}
// > fxc /Tcs_5_0 /Cc test.hlsl
//
// produces:
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.30.9200.16384
//
//
///
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// output UAV uint2 buf 0 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_0
dcl_globalFlags refactoringAllowed
dcl_uav_typed_buffer (uint,uint,uint,uint) u0
dcl_input vThreadIDInGroup.x
dcl_temps 1
dcl_thread_group 4, 1, 1
bfi r0.xzw, l(1, 0, 1, 1), l(4, 0, 4, 4), vThreadIDInGroup.xxxx, l(0, 0, 0, 0)
mov r0.y, l(0) // <--- NO.
store_uav_typed u0.xyzw, vThreadIDInGroup.xxxx, r0.xyzw
ret
// Approximately 4 instruction slots used
RWBuffer<uint> out_buf : register(u0);
[numthreads(64,1,1)]
void main(uint invoc : SV_GroupThreadID)
{
uint block_id = (invoc >> 0) & 3;
uint col = (invoc >> 2) & 7;
uint row = (invoc >> 3) & 4;
// NOTE: in this simple case, replacing the multiplies by shifts generates valid
// code. However, in the actual shader this happened in, the compiler still ended
// up ANDing the result with 255 (via ubfe), so "row" still got ignored.
out_buf[invoc] = block_id + col * 8 + row * 64;
// Replacing this expression with the equivalent:
// out_buf[invoc] = (invoc & 3) | ((invoc << 1) & 0x38) | ((invoc << 3) & 0x100);
// fixes the problem.
}
// > fxc /Tcs_5_0 /Cc example2.hlsl
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.30.9200.16384
//
//
///
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// out_buf UAV uint buf 0 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_0
dcl_globalFlags refactoringAllowed
dcl_uav_typed_buffer (uint,uint,uint,uint) u0
dcl_input vThreadIDInGroup.x
dcl_temps 1
dcl_thread_group 64, 1, 1
ubfe r0.x, l(3), l(2), vThreadIDInGroup.x // NOTE: r0.x = (invoc >> 2) & 7
ishl r0.x, r0.x, l(3) // NOTE: r0.x <<= 3
bfi r0.x, l(2), l(0), vThreadIDInGroup.x, r0.x // NOTE: r0.x = (r0.x & ~3) | (invoc & 3)
// NOTE: this accounts for "block_id" and "col" but not "row"!
store_uav_typed u0.xyzw, vThreadIDInGroup.xxxx, r0.xxxx // NOTE: store
ret
// Approximately 5 instruction slots used
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment