sebbbi/SinglePassMipPyramid.hlsl

## SinglePassMipPyramid.hlsl
// NOTE: Must bind 8x single mip RWTexture views, because HLSL doesn't have .mips member for RWTexture2D. (SRVs only have .mips member)
// NOTE: globallycoherent attribute is needed. Without it writes aren't guaranteed to be seen by other groups
globallycoherent RWTexture2D<float> MipTextures[8];
RWTexture2D<uint> Counters[8];
groupshared uint CounterReturnLDS;

[numthreads(16, 16, 1)]
void GenerateMipPyramid(uint3 Tid : SV_DispatchThreadID, uint3 Group : SV_GroupId, uint Gix : SV_GroupIndex)
{
	[unroll]
	for (int Mip = 0; Mip < 8-1; ++Mip)
	{
		// 2x2 downsample
		float Sum =
			MipTextures[Mip][Tid.xy * 2 + uint2(0, 0)] +
			MipTextures[Mip][Tid.xy * 2 + uint2(1, 0)] +
			MipTextures[Mip][Tid.xy * 2 + uint2(0, 1)] +
			MipTextures[Mip][Tid.xy * 2 + uint2(1, 1)];

		MipTextures[Mip+1][Tid.xy] = Sum * 0.25;

		// Four groups in 2x2 tile of groups increment the same counter.
		if (Gix == 0)
		{
			InterlockedAdd(Counters[Mip][Group.xy / 2], 1, CounterReturnLDS);
		}

		// We do a full memory barrier here. In next mip the surviving thread group will read data generated by 3 other thread groups. Data needs to be visible.
		AllMemoryBarrierWithGroupSync();

		// Kill all groups except the last one to finish in 2x2 tile. This branch is allowed because CounterReturnLDS is group invariant.
		if (CounterReturnLDS < 3)
		{
			return;
		}

		// Needed to ensure that all threads in group read CounterReturnLDS before it is modified in next loop iteration
		GroupMemoryBarrierWithGroup();

		Tid.xy /= 2;
		Group.xy /= 2;
	}
}
	// NOTE: Must bind 8x single mip RWTexture views, because HLSL doesn't have .mips member for RWTexture2D. (SRVs only have .mips member)
	// NOTE: globallycoherent attribute is needed. Without it writes aren't guaranteed to be seen by other groups
	globallycoherent RWTexture2D<float> MipTextures[8];
	RWTexture2D<uint> Counters[8];
	groupshared uint CounterReturnLDS;

	[numthreads(16, 16, 1)]
	void GenerateMipPyramid(uint3 Tid : SV_DispatchThreadID, uint3 Group : SV_GroupId, uint Gix : SV_GroupIndex)
	{
	[unroll]
	for (int Mip = 0; Mip < 8-1; ++Mip)
	{
	// 2x2 downsample
	float Sum =
	MipTextures[Mip][Tid.xy * 2 + uint2(0, 0)] +
	MipTextures[Mip][Tid.xy * 2 + uint2(1, 0)] +
	MipTextures[Mip][Tid.xy * 2 + uint2(0, 1)] +
	MipTextures[Mip][Tid.xy * 2 + uint2(1, 1)];

	MipTextures[Mip+1][Tid.xy] = Sum * 0.25;

	// Four groups in 2x2 tile of groups increment the same counter.
	if (Gix == 0)
	{
	InterlockedAdd(Counters[Mip][Group.xy / 2], 1, CounterReturnLDS);
	}

	// We do a full memory barrier here. In next mip the surviving thread group will read data generated by 3 other thread groups. Data needs to be visible.
	AllMemoryBarrierWithGroupSync();

	// Kill all groups except the last one to finish in 2x2 tile. This branch is allowed because CounterReturnLDS is group invariant.
	if (CounterReturnLDS < 3)
	{
	return;
	}

	// Needed to ensure that all threads in group read CounterReturnLDS before it is modified in next loop iteration
	GroupMemoryBarrierWithGroup();

	Tid.xy /= 2;
	Group.xy /= 2;
	}
	}