Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save devshgraphicsprogramming/e6733bd4cb89076b6d3ba7dbd28c3635 to your computer and use it in GitHub Desktop.
Save devshgraphicsprogramming/e6733bd4cb89076b6d3ba7dbd28c3635 to your computer and use it in GitHub Desktop.
# 1 "direct.comp"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "direct.comp"
layout(local_size_x = 256) in;
# 1 "../../../../nbl/builtin/glsl/scan/descriptors.glsl" 1
# 13 "../../../../nbl/builtin/glsl/scan/descriptors.glsl"
# 1 "../../../../nbl/builtin/glsl/scan/declarations.glsl" 1
# 1 "../../../../nbl/builtin/glsl/scan/parameters_struct.glsl" 1
# 10 "../../../../nbl/builtin/glsl/scan/parameters_struct.glsl"
struct nbl_glsl_scan_Parameters_t
{
uint topLevel;
uint lastElement[7 / 2 + 1];
uint temporaryStorageOffset[7 / 2];
};
# 6 "../../../../nbl/builtin/glsl/scan/declarations.glsl" 2
nbl_glsl_scan_Parameters_t nbl_glsl_scan_getParameters();
# 19 "../../../../nbl/builtin/glsl/scan/declarations.glsl"
void nbl_glsl_scan_getData(
inout _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
in uint levelInvocationIndex,
in uint localWorkgroupIndex,
in uint treeLevel,
in uint pseudoLevel);
void nbl_glsl_scan_setData(
in _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
in uint levelInvocationIndex,
in uint localWorkgroupIndex,
in uint treeLevel,
in uint pseudoLevel,
in bool inRange);
# 14 "../../../../nbl/builtin/glsl/scan/descriptors.glsl" 2
layout(set = 0, binding = 0, std430) restrict buffer ScanBuffer
{
_NBL_GLSL_SCAN_STORAGE_TYPE_ data[];
}
scanBuffer;
layout(set = 0, binding = 1, std430) restrict coherent buffer ScanScratchBuffer
{
uint workgroupsStarted;
uint data[];
}
scanScratch;
void nbl_glsl_scan_getData(
inout _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
in uint levelInvocationIndex,
in uint localWorkgroupIndex,
in uint treeLevel,
in uint pseudoLevel)
{
const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();
uint offset = levelInvocationIndex;
const bool notFirstOrLastLevel = bool(pseudoLevel);
if (notFirstOrLastLevel)
offset += params.temporaryStorageOffset[pseudoLevel - 1u];
if (pseudoLevel != treeLevel)
{
const bool notFirstInvocationInGroup = gl_LocalInvocationIndex != 0u;
if (bool(localWorkgroupIndex) && gl_LocalInvocationIndex == 0u)
data = scanScratch.data[localWorkgroupIndex + params.temporaryStorageOffset[pseudoLevel]];
if (notFirstOrLastLevel)
{
if (notFirstInvocationInGroup)
data = scanScratch.data[offset - 1u];
}
else
{
data += scanBuffer.data[offset];
}
}
else
{
if (notFirstOrLastLevel)
data = scanScratch.data[offset];
else
data = scanBuffer.data[offset];
}
}
void nbl_glsl_scan_setData(
in _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
in uint levelInvocationIndex,
in uint localWorkgroupIndex,
in uint treeLevel,
in uint pseudoLevel,
in bool inRange)
{
const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();
if (treeLevel < params.topLevel)
{
const bool lastInvocationInGroup = gl_LocalInvocationIndex == (256 - 1);
if (lastInvocationInGroup)
scanScratch.data[localWorkgroupIndex + params.temporaryStorageOffset[treeLevel]] = data;
}
else if (inRange)
{
if (bool(pseudoLevel))
{
const uint offset = params.temporaryStorageOffset[pseudoLevel - 1u];
scanScratch.data[levelInvocationIndex + offset] = data;
}
else
scanBuffer.data[levelInvocationIndex] = data;
}
}
# 8 "direct.comp" 2
# 1 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 1
# 1 "../../../../nbl/builtin/glsl/limits/numeric.glsl" 1
# 42 "../../../../nbl/builtin/glsl/limits/numeric.glsl"
# 1 "../../../../nbl/builtin/glsl/ieee754.glsl" 1
uint nbl_glsl_ieee754_exponent_bias(in uint exponentBits)
{
return (0x1u << (exponentBits - 1)) - 1;
}
uint nbl_glsl_ieee754_extract_biased_exponent(float x)
{
return bitfieldExtract(floatBitsToUint(x), 23, 8);
}
int nbl_glsl_ieee754_extract_exponent(float x)
{
return int(nbl_glsl_ieee754_extract_biased_exponent(x) - nbl_glsl_ieee754_exponent_bias(8));
}
uint nbl_glsl_ieee754_compute_exponent_mask(in uint exponentBits, in uint mantissaBits)
{
return ((1 << exponentBits) - 1) << mantissaBits;
}
float nbl_glsl_ieee754_replace_biased_exponent(float x, uint exp_plus_bias)
{
return uintBitsToFloat(bitfieldInsert(floatBitsToUint(x), exp_plus_bias, 23, 8));
}
float nbl_glsl_ieee754_fast_mul_exp2(float x, int n)
{
return nbl_glsl_ieee754_replace_biased_exponent(x, nbl_glsl_ieee754_extract_biased_exponent(x) + uint(n));
}
uint nbl_glsl_ieee754_compute_mantissa_mask(in uint mantissaBits)
{
return (0x1u << mantissaBits) - 1;
}
uint nbl_glsl_ieee754_extract_mantissa(in float x)
{
return (floatBitsToUint(x) & 0x7fffffu);
}
float nbl_glsl_ieee754_true_min(in uint exponentBits, in uint mantissaBits)
{
return exp2(1 - int(nbl_glsl_ieee754_exponent_bias(exponentBits)) - mantissaBits);
}
float nbl_glsl_ieee754_min(in uint exponentBits, in uint mantissaBits)
{
const float e = exp2(1 - int(nbl_glsl_ieee754_exponent_bias(exponentBits)));
const uint m = 0x1u << (23 - mantissaBits);
return uintBitsToFloat(floatBitsToUint(e) | m);
}
float nbl_glsl_ieee754_max(in uint exponentBits, in uint mantissaBits)
{
const uint biasedMaxExp = (((1 << exponentBits) - 1) - 1);
const float e = exp2(biasedMaxExp - int(nbl_glsl_ieee754_exponent_bias(exponentBits)));
const uint m = 0x7fFFffu & (0x7fFFffu << (23 - mantissaBits));
return uintBitsToFloat(floatBitsToUint(e) | m);
}
uint nbl_glsl_ieee754_encode_ufloat_impl(in int exponent, in uint exponentBits, in uint mantissa, in uint mantissaBits)
{
const uint expBias = nbl_glsl_ieee754_exponent_bias(exponentBits);
const uint e = uint(exponent + expBias);
const uint m = mantissa >> (23 - mantissaBits);
const uint encodedValue = (e << mantissaBits) | m;
return encodedValue;
}
float nbl_glsl_numeric_limits_float_epsilon(float n);
float nbl_glsl_numeric_limits_float_epsilon(int n);
float nbl_glsl_numeric_limits_float_epsilon();
float nbl_glsl_ieee754_gamma(float n)
{
const float a = nbl_glsl_numeric_limits_float_epsilon(n);
return a / (1.f - a);
}
float nbl_glsl_ieee754_rcpgamma(float n)
{
const float a = nbl_glsl_numeric_limits_float_epsilon(n);
return 1.f / a - 1.f;
}
float nbl_glsl_ieee754_gamma(uint n)
{
return nbl_glsl_ieee754_gamma(float(n));
}
float nbl_glsl_ieee754_rcpgamma(uint n)
{
return nbl_glsl_ieee754_rcpgamma(float(n));
}
vec3 nbl_glsl_ieee754_add_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in vec3 b, in vec3 b_error)
{
error = (a_error + b_error) / nbl_glsl_numeric_limits_float_epsilon(1u);
vec3 sum = a + b;
error += abs(sum);
return sum;
}
vec3 nbl_glsl_ieee754_sub_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in vec3 b, in vec3 b_error)
{
error = (a_error + b_error) / nbl_glsl_numeric_limits_float_epsilon(1u);
vec3 sum = a - b;
error += abs(sum);
return sum;
}
vec3 nbl_glsl_ieee754_mul_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in float b, in float b_error)
{
vec3 crossCorrelationA = abs(a) * b_error;
vec3 crossCorrelationB = a_error * abs(b);
error = (crossCorrelationB + crossCorrelationA + crossCorrelationB * crossCorrelationA) / nbl_glsl_numeric_limits_float_epsilon(1u);
vec3 product = a * b;
error += abs(product);
return product;
}
# 43 "../../../../nbl/builtin/glsl/limits/numeric.glsl" 2
float nbl_glsl_numeric_limits_float_epsilon(float n)
{
return nbl_glsl_ieee754_fast_mul_exp2(n, -24);
}
float nbl_glsl_numeric_limits_float_epsilon(int n)
{
return nbl_glsl_numeric_limits_float_epsilon(float(n));
}
float nbl_glsl_numeric_limits_float_epsilon()
{
return 5.96046447754e-08;
}
# 5 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
# 1 "../../../../nbl/builtin/glsl/math/typeless_arithmetic.glsl" 1
int nbl_glsl_identityFunction(in int x)
{
return x;
}
uint nbl_glsl_identityFunction(in uint x) { return x; }
float nbl_glsl_identityFunction(in float x) { return x; }
int nbl_glsl_and(in int x, in int y) { return x & y; }
uint nbl_glsl_and(in uint x, in uint y) { return x & y; }
int nbl_glsl_xor(in int x, in int y) { return x ^ y; }
uint nbl_glsl_xor(in uint x, in uint y) { return x ^ y; }
int nbl_glsl_or(in int x, in int y) { return x | y; }
uint nbl_glsl_or(in uint x, in uint y) { return x | y; }
int nbl_glsl_add(in int x, in int y) { return x + y; }
uint nbl_glsl_add(in uint x, in uint y) { return x + y; }
float nbl_glsl_add(in float x, in float y) { return x + y; }
int nbl_glsl_mul(in int x, in int y) { return x * y; }
uint nbl_glsl_mul(in uint x, in uint y) { return x * y; }
float nbl_glsl_mul(in float x, in float y) { return x * y; }
# 6 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
# 1 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 1
# 1 "../../../../nbl/builtin/glsl/workgroup/shared_arithmetic.glsl" 1
# 1 "../../../../nbl/builtin/glsl/workgroup/shared_clustered.glsl" 1
# 1 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 1
# 1 "../../../../nbl/builtin/glsl/workgroup/basic.glsl" 1
# 1 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl" 1
# 1 "../../../../nbl/builtin/glsl/macros.glsl" 1
# 7 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl" 2
# 99 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl"
void nbl_glsl_subgroupBarrier()
{
}
void nbl_glsl_subgroupMemoryBarrier()
{
memoryBarrier();
}
void nbl_glsl_subgroupMemoryBarrierBuffer()
{
memoryBarrierBuffer();
}
void nbl_glsl_subgroupMemoryBarrierShared()
{
memoryBarrierShared();
}
void nbl_glsl_subgroupMemoryBarrierImage()
{
memoryBarrierImage();
}
# 7 "../../../../nbl/builtin/glsl/workgroup/basic.glsl" 2
bool nbl_glsl_workgroupElect()
{
return gl_LocalInvocationIndex == 0u;
}
# 7 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 2
# 1 "../../../../nbl/builtin/glsl/subgroup/shared_arithmetic_portability.glsl" 1
# 8 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 2
# 7 "../../../../nbl/builtin/glsl/workgroup/shared_clustered.glsl" 2
# 7 "../../../../nbl/builtin/glsl/workgroup/shared_arithmetic.glsl" 2
# 6 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 2
# 16 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
shared uint nbl_glsl_workgroupArithmeticScratchShared[(((((256 - 1 & (-(0x1 << 2))) << 1) | (256 - 1 & ((0x1 << 2) - 1))) + ((0x1 << 2) >> 1) + 1) + (256 - 1 >> (2)) + (256 - 1 >> (2 * 2)) + (256 - 1 >> (2 * 3)) + (256 - 1 >> (2 * 4)) + (256 - 1 >> (2 * 5)) + 5)];
# 1 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl" 1
# 21 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl"
# 1 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl" 1
# 45 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
# 1 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl" 1
# 14 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
uint nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(in uint loMask, in uint invocationIndex)
{
return invocationIndex & (~loMask);
}
uint nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(in uint loMask, in uint invocationIndex)
{
return invocationIndex & loMask;
}
uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(in uint pseudoSubgroupElectedInvocation)
{
return pseudoSubgroupElectedInvocation << 1u;
}
uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(in uint subgroupMemoryStart, in uint pseudoSubgroupInvocation, out uint lastLoadOffset)
{
lastLoadOffset = (subgroupMemoryStart | pseudoSubgroupInvocation);
return lastLoadOffset + ((0x1 << 2) >> 1);
}
uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(in uint subgroupMemoryStart, in uint pseudoSubgroupInvocation)
{
uint dummy;
return nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, dummy);
}
uint nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(in uint loMask, in uint invocationIndex)
{
return nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(
nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(
nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, invocationIndex)),
nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, invocationIndex));
}
# 116 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
uint nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupAnd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupXor_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupOr_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupAdd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(lastItem);
;
}
uint nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupMul_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(lastItem);
;
}
uint nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return int(lastItem);
;
}
float nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(lastItem);
;
}
uint nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(lastItem);
;
}
int nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return int(lastItem);
;
}
float nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
uint lastSubgroupInvocation = loMask;
if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
lastSubgroupInvocation &= 256 - 1u;
const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(lastItem);
;
}
# 224 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
uint nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupInclusiveAnd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupInclusiveAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupExclusiveAnd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupExclusiveAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupInclusiveXor_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupInclusiveXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupExclusiveXor_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupExclusiveXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupInclusiveOr_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupInclusiveOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupExclusiveOr_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, float value)
{
return uintBitsToFloat(nbl_glsl_subgroupExclusiveOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
}
uint nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupInclusiveAdd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
uint nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupExclusiveAdd_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(prevItem);
;
}
uint nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupInclusiveMul_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
uint nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, int value)
{
return int(nbl_glsl_subgroupExclusiveMul_impl(clearScratchToIdentity, uint(value)));
}
float nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(prevItem);
;
}
uint nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
float nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
uint nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return int(prevItem);
;
}
float nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(prevItem);
;
}
uint nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
int nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
float nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return value;
}
uint nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, uint value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return nbl_glsl_identityFunction(prevItem);
;
}
int nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, int value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return int(prevItem);
;
}
float nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, float value)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
if (clearScratchToIdentity)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
{
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
}
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
nbl_glsl_subgroupBarrier();
nbl_glsl_subgroupMemoryBarrierShared();
return uintBitsToFloat(prevItem);
;
}
# 46 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl" 2
void nbl_glsl_workgroupBallot_noBarriers(in bool value)
{
if (gl_LocalInvocationIndex < (256 + 31 >> 5))
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = 0u;
barrier();
if (value)
atomicOr(nbl_glsl_workgroupArithmeticScratchShared[(gl_LocalInvocationIndex >> 5)], 1u << (gl_LocalInvocationIndex & 31u));
}
void nbl_glsl_workgroupBallot(in bool value)
{
barrier();
nbl_glsl_workgroupBallot_noBarriers(value);
barrier();
}
bool nbl_glsl_workgroupBallotBitExtract_noEndBarriers(in uint index)
{
return (nbl_glsl_workgroupArithmeticScratchShared[(index >> 5)] & (1u << (index & 31u))) != 0u;
}
bool nbl_glsl_workgroupBallotBitExtract(in uint index)
{
barrier();
const bool retval = nbl_glsl_workgroupBallotBitExtract_noEndBarriers(index);
barrier();
return retval;
}
bool nbl_glsl_workgroupInverseBallot_noEndBarriers()
{
return nbl_glsl_workgroupBallotBitExtract_noEndBarriers(gl_LocalInvocationIndex);
}
bool nbl_glsl_workgroupInverseBallot()
{
return nbl_glsl_workgroupBallotBitExtract(gl_LocalInvocationIndex);
}
uint nbl_glsl_workgroupBallotBitCount_noEndBarriers()
{
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = 0u;
barrier();
if (gl_LocalInvocationIndex < (256 + 31 >> 5))
{
const uint localBallot = nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex];
const uint localBallotBitCount = bitCount(localBallot);
atomicAdd(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)], localBallotBitCount);
}
barrier();
return nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)];
}
uint nbl_glsl_workgroupBallotBitCount()
{
barrier();
const uint retval = nbl_glsl_workgroupBallotBitCount_noEndBarriers();
barrier();
return retval;
}
uint nbl_glsl_workgroupBroadcast_noBarriers(in uint val, in uint id)
{
if (gl_LocalInvocationIndex == id)
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = nbl_glsl_identityFunction(val);
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
}
bool nbl_glsl_workgroupBroadcast_noBarriers(in bool val, in uint id)
{
if (gl_LocalInvocationIndex == id)
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = uint(val);
barrier();
return bool(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
}
float nbl_glsl_workgroupBroadcast_noBarriers(in float val, in uint id)
{
if (gl_LocalInvocationIndex == id)
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = floatBitsToUint(val);
barrier();
return uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
}
int nbl_glsl_workgroupBroadcast_noBarriers(in int val, in uint id)
{
if (gl_LocalInvocationIndex == id)
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = uint(val);
barrier();
return int(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
}
# 144 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
uint nbl_glsl_workgroupBroadcast(in uint val, in uint id)
{
barrier();
const uint retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
barrier();
return retval;
}
bool nbl_glsl_workgroupBroadcast(in bool val, in uint id)
{
barrier();
const bool retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
barrier();
return retval;
}
float nbl_glsl_workgroupBroadcast(in float val, in uint id)
{
barrier();
const float retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
barrier();
return retval;
}
int nbl_glsl_workgroupBroadcast(in int val, in uint id)
{
barrier();
const int retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
barrier();
return retval;
}
uint nbl_glsl_workgroupBroadcastFirst_noBarriers(in uint val)
{
if (nbl_glsl_workgroupElect())
nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = val;
barrier();
return nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)];
}
uint nbl_glsl_workgroupBroadcastFirst(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupBroadcastFirst_noBarriers(val);
barrier();
return retval;
}
bool nbl_glsl_workgroupBroadcastFirst(in bool val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
float nbl_glsl_workgroupBroadcastFirst(in float val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
int nbl_glsl_workgroupBroadcastFirst(in int val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
# 256 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
uint nbl_glsl_workgroupBallotScanBitCount_impl(in bool exclusive);
uint nbl_glsl_workgroupBallotInclusiveBitCount()
{
return nbl_glsl_workgroupBallotScanBitCount_impl(false);
}
uint nbl_glsl_workgroupBallotExclusiveBitCount()
{
return nbl_glsl_workgroupBallotScanBitCount_impl(true);
}
uint nbl_glsl_workgroupBallotScanBitCount_impl_impl(in uint localBitCount)
{
barrier();
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(localBitCount);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, (256 + 31 >> 5) - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = (256 + 31 >> 5) - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, localBitCount));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
}
uint nbl_glsl_workgroupBallotScanBitCount_impl(in bool exclusive)
{
const uint _dword = (gl_LocalInvocationIndex >> 5);
const uint localBitfield = nbl_glsl_workgroupArithmeticScratchShared[_dword];
uint globalCount;
{
uint localBitfieldBackup;
if (gl_LocalInvocationIndex < (256 + 31 >> 5))
localBitfieldBackup = nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex];
nbl_glsl_workgroupBallotScanBitCount_impl_impl(bitCount(localBitfieldBackup));
globalCount = _dword != 0u ? nbl_glsl_workgroupArithmeticScratchShared[_dword] : 0u;
barrier();
if (gl_LocalInvocationIndex < (256 + 31 >> 5))
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = localBitfieldBackup;
barrier();
}
const uint mask = (exclusive ? 0x7fffffffu : 0xffffffffu) >> (31u - (gl_LocalInvocationIndex & 31u));
return globalCount + bitCount(localBitfield & mask);
}
# 22 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl" 2
# 22 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 2
# 53 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
uint nbl_glsl_workgroupAnd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupAnd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupAnd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupAnd_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupAnd_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupAnd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupAnd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupAnd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupAnd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupAnd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupAnd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupOr_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupOr_noBarriers(in int val)
{
return int(nbl_glsl_workgroupOr_noBarriers(uint(val)));
}
float nbl_glsl_workgroupOr_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupOr(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupOr_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupOr(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupOr_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupOr(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupOr_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupXor_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupXor_noBarriers(in int val)
{
return int(nbl_glsl_workgroupXor_noBarriers(uint(val)));
}
float nbl_glsl_workgroupXor_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupXor(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupXor_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupXor(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupXor_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupXor(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupXor_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupAdd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupAdd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupAdd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupAdd_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupAdd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupAdd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupAdd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupAdd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupAdd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupAdd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupMul_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupMul_noBarriers(in int val)
{
return int(nbl_glsl_workgroupMul_noBarriers(uint(val)));
}
float nbl_glsl_workgroupMul_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupMul(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupMul_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupMul(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupMul_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupMul(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupMul_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupMin_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupMin_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return int(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
float nbl_glsl_workgroupMin_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupMin(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupMin_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupMin(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupMin_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupMin(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupMin_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupMax_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
int nbl_glsl_workgroupMax_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return int(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
float nbl_glsl_workgroupMax_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (false)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (false)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
};
barrier();
return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
}
uint nbl_glsl_workgroupMax(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupMax_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupMax(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupMax_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupMax(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupMax_noBarriers(val);
barrier();
return retval;
}
# 186 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
uint nbl_glsl_workgroupInclusiveAnd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0xffFFffFFu;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveAnd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupInclusiveAnd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupInclusiveAnd_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupInclusiveAnd_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupInclusiveAnd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveAnd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveAnd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveAnd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0xffFFffFFu;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveAnd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupExclusiveAnd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupExclusiveAnd_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupExclusiveAnd_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupExclusiveAnd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveAnd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveAnd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveOr_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveOr_noBarriers(in int val)
{
return int(nbl_glsl_workgroupInclusiveOr_noBarriers(uint(val)));
}
float nbl_glsl_workgroupInclusiveOr_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupInclusiveOr_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupInclusiveOr(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveOr(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveOr(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveOr_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveOr_noBarriers(in int val)
{
return int(nbl_glsl_workgroupExclusiveOr_noBarriers(uint(val)));
}
float nbl_glsl_workgroupExclusiveOr_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupExclusiveOr_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupExclusiveOr(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveOr(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveOr(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveXor_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveXor_noBarriers(in int val)
{
return int(nbl_glsl_workgroupInclusiveXor_noBarriers(uint(val)));
}
float nbl_glsl_workgroupInclusiveXor_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupInclusiveXor_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupInclusiveXor(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveXor(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveXor(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveXor_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveXor_noBarriers(in int val)
{
return int(nbl_glsl_workgroupExclusiveXor_noBarriers(uint(val)));
}
float nbl_glsl_workgroupExclusiveXor_noBarriers(in float val)
{
return uintBitsToFloat(nbl_glsl_workgroupExclusiveXor_noBarriers(floatBitsToUint(val)));
}
uint nbl_glsl_workgroupExclusiveXor(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveXor(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveXor(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveAdd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveAdd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupInclusiveAdd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupInclusiveAdd_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0.0;
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupInclusiveAdd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveAdd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveAdd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveAdd_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveAdd_noBarriers(in int val)
{
return int(nbl_glsl_workgroupExclusiveAdd_noBarriers(uint(val)));
}
float nbl_glsl_workgroupExclusiveAdd_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0.0;
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupExclusiveAdd(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveAdd(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveAdd(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveMul_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveMul_noBarriers(in int val)
{
return int(nbl_glsl_workgroupInclusiveMul_noBarriers(uint(val)));
}
float nbl_glsl_workgroupInclusiveMul_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1.0;
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupInclusiveMul(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveMul(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveMul(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveMul_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveMul_noBarriers(in int val)
{
return int(nbl_glsl_workgroupExclusiveMul_noBarriers(uint(val)));
}
float nbl_glsl_workgroupExclusiveMul_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1.0;
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupExclusiveMul(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveMul(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveMul(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveMin_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 4294967295u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveMin_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(min(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = uint(min(int(higherLevelExclusive), int(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 2147483647;
}
else
return int(firstLevelScan);
;
}
float nbl_glsl_workgroupInclusiveMin_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(min(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(min(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : (1.f / 0.f);
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupInclusiveMin(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveMin(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveMin(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveMin_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 4294967295u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveMin_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(min(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = uint(min(int(higherLevelExclusive), int(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 2147483647;
}
else
return int(firstLevelScan);
;
}
float nbl_glsl_workgroupExclusiveMin_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(min(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(min(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : (1.f / 0.f);
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupExclusiveMin(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveMin(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveMin(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupInclusiveMax_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupInclusiveMax_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(max(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = uint(max(int(higherLevelExclusive), int(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -2147483648;
}
else
return int(firstLevelScan);
;
}
float nbl_glsl_workgroupInclusiveMax_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(max(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(max(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (false)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -(1.f / 0.f);
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupInclusiveMax(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupInclusiveMax(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupInclusiveMax(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
barrier();
return retval;
}
uint nbl_glsl_workgroupExclusiveMax_noBarriers(in uint val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
}
else
return nbl_glsl_identityFunction(firstLevelScan);
;
}
int nbl_glsl_workgroupExclusiveMax_noBarriers(in int val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(max(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = uint(max(int(higherLevelExclusive), int(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -2147483648;
}
else
return int(firstLevelScan);
;
}
float nbl_glsl_workgroupExclusiveMax_noBarriers(in float val)
{
const uint loMask = (0x1 << 2) - 1u;
const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
uint lastLoadOffset = 0xdeadbeefu;
const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
{
nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
const uint halfMask = loMask >> 1u;
nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
if (256 < ((0x1 << 2) >> 1))
{
const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
}
barrier();
}
const uint lastInvocation = 256 - 1u;
uint lastInvocationInLevel = lastInvocation;
uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
uint scan = firstLevelScan;
const bool possibleProp = pseudoSubgroupInvocation == loMask;
const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
if (true)
scanStoreIndex += lastInvocationInLevel + 1u;
}
if (lastInvocationInLevel >= (0x1 << 2))
{
barrier();
if (participate)
{
if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
}
barrier();
participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
if (participate)
{
const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
if (true)
nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
}
}
barrier();
if (lastInvocation >= (0x1 << 2))
{
uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
{
lastInvocationInLevel = lastInvocation >> logShift;
barrier();
const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
if (shiftedInvocationIndex <= lastInvocationInLevel)
nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(max(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
scanLoadIndex = currentLevelIndex;
}
barrier();
if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
{
const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
firstLevelScan = floatBitsToUint(max(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
}
}
if (true)
{
if (gl_LocalInvocationIndex < lastInvocation)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
barrier();
return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -(1.f / 0.f);
}
else
return uintBitsToFloat(firstLevelScan);
;
}
uint nbl_glsl_workgroupExclusiveMax(in uint val)
{
barrier();
const uint retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
barrier();
return retval;
}
int nbl_glsl_workgroupExclusiveMax(in int val)
{
barrier();
const int retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
barrier();
return retval;
}
float nbl_glsl_workgroupExclusiveMax(in float val)
{
barrier();
const float retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
barrier();
return retval;
}
# 7 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
void nbl_glsl_scan_virtualWorkgroup(in uint treeLevel, in uint localWorkgroupIndex)
{
const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();
const uint levelInvocationIndex = localWorkgroupIndex * 256 + gl_LocalInvocationIndex;
const bool lastInvocationInGroup = gl_LocalInvocationIndex == (256 - 1);
const uint lastLevel = params.topLevel << 1u;
const uint pseudoLevel = treeLevel > params.topLevel ? (lastLevel - treeLevel) : treeLevel;
const bool inRange = levelInvocationIndex <= params.lastElement[pseudoLevel];
# 61 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl"
_NBL_GLSL_SCAN_STORAGE_TYPE_ data = IDENTITY;
if (inRange)
nbl_glsl_scan_getData(data, levelInvocationIndex, localWorkgroupIndex, treeLevel, pseudoLevel);
if (treeLevel < params.topLevel)
data = REDUCTION(data);
else if (params.topLevel == 0u)
data = INCLUSIVE(data);
else if (treeLevel != params.topLevel)
data = INCLUSIVE(data);
else
data = EXCLUSIVE(data);
nbl_glsl_scan_setData(data, levelInvocationIndex, localWorkgroupIndex, treeLevel, pseudoLevel, inRange);
}
# 1 "../../../../nbl/builtin/glsl/scan/default_scheduler.glsl" 1
# 9 "../../../../nbl/builtin/glsl/scan/default_scheduler.glsl"
struct nbl_glsl_scan_DefaultSchedulerParameters_t
{
uint finishedFlagOffset[7 - 1];
uint cumulativeWorkgroupCount[7];
};
void nbl_glsl_scan_scheduler_computeParameters(in uint elementCount, out nbl_glsl_scan_Parameters_t _scanParams, out nbl_glsl_scan_DefaultSchedulerParameters_t _schedulerParams)
{
_scanParams.lastElement[0] = elementCount - 1u;
_scanParams.topLevel = findMSB(_scanParams.lastElement[0]) / _NBL_GLSL_WORKGROUP_SIZE_LOG2_;
for (int i = 0; i < 7 / 2;)
{
const int next = i + 1;
_scanParams.lastElement[next] = _scanParams.lastElement[i] >> _NBL_GLSL_WORKGROUP_SIZE_LOG2_;
i = next;
}
_schedulerParams.cumulativeWorkgroupCount[0] = (_scanParams.lastElement[0 + 1] + 1u);
_schedulerParams.finishedFlagOffset[0] = 0u;
switch (_scanParams.topLevel)
{
case 1u:
_schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + 1u;
_schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + (_scanParams.lastElement[0 + 1] + 1u);
_schedulerParams.finishedFlagOffset[1] = 1u;
_scanParams.temporaryStorageOffset[0] = 2u;
break;
case 2u:
_schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + 1u;
_schedulerParams.cumulativeWorkgroupCount[3] = _schedulerParams.cumulativeWorkgroupCount[2] + (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[4] = _schedulerParams.cumulativeWorkgroupCount[3] + (_scanParams.lastElement[0 + 1] + 1u);
_schedulerParams.finishedFlagOffset[1] = (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.finishedFlagOffset[2] = _schedulerParams.finishedFlagOffset[1] + 1u;
_schedulerParams.finishedFlagOffset[3] = _schedulerParams.finishedFlagOffset[1] + 2u;
_scanParams.temporaryStorageOffset[0] = _schedulerParams.finishedFlagOffset[3] + (_scanParams.lastElement[1 + 1] + 1u);
_scanParams.temporaryStorageOffset[1] = _scanParams.temporaryStorageOffset[0] + (_scanParams.lastElement[0 + 1] + 1u);
break;
case 3u:
_schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + (_scanParams.lastElement[2 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[3] = _schedulerParams.cumulativeWorkgroupCount[2] + 1u;
_schedulerParams.cumulativeWorkgroupCount[4] = _schedulerParams.cumulativeWorkgroupCount[3] + (_scanParams.lastElement[2 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[5] = _schedulerParams.cumulativeWorkgroupCount[4] + (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.cumulativeWorkgroupCount[6] = _schedulerParams.cumulativeWorkgroupCount[5] + (_scanParams.lastElement[0 + 1] + 1u);
_schedulerParams.finishedFlagOffset[1] = (_scanParams.lastElement[1 + 1] + 1u);
_schedulerParams.finishedFlagOffset[2] = _schedulerParams.finishedFlagOffset[1] + (_scanParams.lastElement[2 + 1] + 1u);
_schedulerParams.finishedFlagOffset[3] = _schedulerParams.finishedFlagOffset[2] + 1u;
_schedulerParams.finishedFlagOffset[4] = _schedulerParams.finishedFlagOffset[2] + 2u;
_schedulerParams.finishedFlagOffset[5] = _schedulerParams.finishedFlagOffset[4] + (_scanParams.lastElement[2 + 1] + 1u);
_scanParams.temporaryStorageOffset[0] = _schedulerParams.finishedFlagOffset[5] + (_scanParams.lastElement[1 + 1] + 1u);
_scanParams.temporaryStorageOffset[1] = _scanParams.temporaryStorageOffset[0] + (_scanParams.lastElement[0 + 1] + 1u);
_scanParams.temporaryStorageOffset[2] = _scanParams.temporaryStorageOffset[1] + (_scanParams.lastElement[1 + 1] + 1u);
break;
default:
break;
}
}
bool nbl_glsl_scan_scheduler_getWork(in nbl_glsl_scan_DefaultSchedulerParameters_t params, in uint topLevel, out uint treeLevel, out uint localWorkgroupIndex)
{
if (gl_LocalInvocationIndex == 0u)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = atomicAdd(scanScratch.workgroupsStarted, 1u);
else if (gl_LocalInvocationIndex == 1u)
nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = 0u;
barrier();
const uint globalWorkgroupIndex = nbl_glsl_workgroupArithmeticScratchShared[0u];
const uint lastLevel = topLevel << 1u;
if (gl_LocalInvocationIndex <= lastLevel && globalWorkgroupIndex >= params.cumulativeWorkgroupCount[gl_LocalInvocationIndex])
atomicAdd(nbl_glsl_workgroupArithmeticScratchShared[1u], 1u);
barrier();
treeLevel = nbl_glsl_workgroupArithmeticScratchShared[1u];
if (treeLevel > lastLevel)
return true;
localWorkgroupIndex = globalWorkgroupIndex;
const bool dependantLevel = treeLevel != 0u;
if (dependantLevel)
{
const uint prevLevel = treeLevel - 1u;
localWorkgroupIndex -= params.cumulativeWorkgroupCount[prevLevel];
if (gl_LocalInvocationIndex == 0u)
{
uint dependentsCount = 1u;
if (treeLevel <= topLevel)
{
dependentsCount = 256;
const bool lastWorkgroup = (globalWorkgroupIndex + 1u) == params.cumulativeWorkgroupCount[treeLevel];
if (lastWorkgroup)
{
const nbl_glsl_scan_Parameters_t scanParams = nbl_glsl_scan_getParameters();
dependentsCount = scanParams.lastElement[treeLevel] + 1u;
if (treeLevel < topLevel)
dependentsCount -= scanParams.lastElement[treeLevel + 1u] * 256;
}
}
uint dependentsFinishedFlagOffset = localWorkgroupIndex;
if (treeLevel > topLevel)
dependentsFinishedFlagOffset /= 256;
dependentsFinishedFlagOffset += params.finishedFlagOffset[prevLevel];
while (scanScratch.data[dependentsFinishedFlagOffset] != dependentsCount)
memoryBarrierBuffer();
}
}
barrier();
memoryBarrierBuffer();
return false;
}
void nbl_glsl_scan_scheduler_markComplete(in nbl_glsl_scan_DefaultSchedulerParameters_t params, in uint topLevel, in uint treeLevel, in uint localWorkgroupIndex)
{
memoryBarrierBuffer();
if (gl_LocalInvocationIndex == 0u)
{
uint finishedFlagOffset = params.finishedFlagOffset[treeLevel];
if (treeLevel < topLevel)
{
finishedFlagOffset += localWorkgroupIndex / 256;
atomicAdd(scanScratch.data[finishedFlagOffset], 1u);
}
else if (treeLevel != (topLevel << 1u))
{
finishedFlagOffset += localWorkgroupIndex;
scanScratch.data[finishedFlagOffset] = 1u;
}
}
}
# 86 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
nbl_glsl_scan_DefaultSchedulerParameters_t nbl_glsl_scan_getSchedulerParameters();
void nbl_glsl_scan_main()
{
const nbl_glsl_scan_DefaultSchedulerParameters_t schedulerParams = nbl_glsl_scan_getSchedulerParameters();
const uint topLevel = nbl_glsl_scan_getParameters().topLevel;
while (true)
{
uint treeLevel, localWorkgroupIndex;
if (nbl_glsl_scan_scheduler_getWork(schedulerParams, topLevel, treeLevel, localWorkgroupIndex))
return;
nbl_glsl_scan_virtualWorkgroup(treeLevel, localWorkgroupIndex);
nbl_glsl_scan_scheduler_markComplete(schedulerParams, topLevel, treeLevel, localWorkgroupIndex);
}
}
# 9 "direct.comp" 2
layout(push_constant) uniform PushConstants
{
nbl_glsl_scan_Parameters_t scanParams;
nbl_glsl_scan_DefaultSchedulerParameters_t schedulerParams;
}
pc;
nbl_glsl_scan_Parameters_t nbl_glsl_scan_getParameters()
{
return pc.scanParams;
}
nbl_glsl_scan_DefaultSchedulerParameters_t nbl_glsl_scan_getSchedulerParameters()
{
return pc.schedulerParams;
}
void main()
{
nbl_glsl_scan_main();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment