-
-
Save httpdigest/976a81b6b68f3d4218629da142f0b967 to your computer and use it in GitHub Desktop.
WarpAggregatedAtomics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 core | |
#extension GL_NV_shader_thread_shuffle : enable | |
#extension GL_NV_shader_thread_group : enable | |
#extension GL_NV_gpu_shader5 : enable | |
#extension GL_ARB_shader_atomic_counter_ops : enable | |
layout(binding = 0) uniform atomic_uint counter; | |
layout(packed, binding=1) readonly restrict buffer Predicates { uint8_t[] predicates; }; | |
layout(packed, binding=2) readonly restrict buffer IntputData { uint[] inputData; }; | |
layout(packed, binding=3) writeonly restrict buffer OutputDate { uint[] outputData; }; | |
uniform uint algorithm = 0u; | |
uint outIndex() { | |
switch (algorithm) { | |
case 0u: | |
return atomicCounterIncrement(counter); | |
case 1u: | |
uint act = activeThreadsNV(), | |
total = bitCount(act), | |
leader = findLSB(act), | |
rank = bitCount(gl_ThreadLtMaskNV & act), | |
warpRes = 0u; | |
if (rank == 0u) | |
warpRes = atomicCounterAddARB(counter, total); | |
return shuffleNV(warpRes, leader, gl_WarpSizeNV) + rank; | |
} | |
} | |
layout (local_size_x = 16, local_size_y = 8) in; | |
void main(void) { | |
uint off = gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x; | |
if (off >= inputData.length()) | |
return; | |
if (predicates[off] == 1u) | |
outputData[outIndex()] = inputData[off]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment