Skip to content

Instantly share code, notes, and snippets.

@RoyLab
Last active June 1, 2017 03:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RoyLab/ae84c7bde922aef7694e2aad7be77116 to your computer and use it in GitHub Desktop.
Save RoyLab/ae84c7bde922aef7694e2aad7be77116 to your computer and use it in GitHub Desktop.
groupshared memory
RWStructuredBuffer<float> g_data;
#define groupDim_x 128
groupshared float sdata[groupDim_x];
[numthreads(groupDim_x, 1, 1)]
void reduce1(uint3 threadIdx : SV_GroupThreadID,
uint3 groupIdx : SV_GroupID)
{
// each thread loads one element from global to shared mem
unsigned int tid = threadIdx.x;
unsigned int i = groupIdx.x * groupDim_x + threadIdx.x;
sdata[tid] = g_data[i];
GroupMemoryBarrierWithGroupSync();
// do reduction in shared mem
for (unsigned int s = 1; s < groupDim_x; s *= 2)
{
if (tid % (2 * s) == 0)
{
sdata[tid] += sdata[tid + s];
}
GroupMemoryBarrierWithGroupSync();
}
// write result for this block to global mem
if (tid == 0) g_data[groupIdx.x] = sdata[0];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment