Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BeRo1985/369261704adb16ce805a to your computer and use it in GitHub Desktop.
Save BeRo1985/369261704adb16ce805a to your computer and use it in GitHub Desktop.
SDSM compute shader NVIDIA driver 347.09 WHQL slowdown bug
With version 347.09 WHQL tooks my SDSM tighting compute shader whole 70 ms per frame instead with the old verson 344.75 under 1ms (circa 0.7ms) on my GTX970.
See sdsm_reduce_tighting.glsl here, the $-stuff is my own preprocessor.
I'ver tracked to the "((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w))" (in sdsm_reduce_tighting.glsl) comparsion down, but really just the comparsion itself, because:
70ms per frame:
if((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w)){
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz);
maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz);
}
70 ms per frame:
bool b = (lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w);
minBoundsSun[lPartitionIndex] = mix(minBoundsSun[lPartitionIndex], min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b);
maxBoundsSun[lPartitionIndex] = mix(maxBoundsSun[lPartitionIndex], max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b);
35 ms per frame:
bool b = (lLinearZ >= reduceDataPartitions[lPartitionIndex].x);
minBoundsSun[lPartitionIndex] = mix(minBoundsSun[lPartitionIndex], min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b);
maxBoundsSun[lPartitionIndex] = mix(maxBoundsSun[lPartitionIndex], max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b);
without any comparsion checks, it is under 1ms per frame again but the result is unusable then:
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz);
$include "shadow.glsl"
uniform vec3 uClipPlaneDepthConstants;
uniform vec2 uClipPlaneNearFar;
$define NUM_PARTITIONS NUM_SHADOW_CASCADES
$define UNROLL_LOOPS
const float FLT_MAX = 3.4e+38;
const uint FLT_MAX_UINT = 0x7f7fffffu;
float sdsmConvertZBufferDepthToLinear(const in float d){
return clamp(uClipPlaneDepthConstants.x / (uClipPlaneDepthConstants.y - (d * uClipPlaneDepthConstants.z)), uClipPlaneNearFar.x, uClipPlaneNearFar.y);
}
uint sdsmFloatToUInt(const in float f){
uint ui = floatBitsToUint(f);
return ui ^ uint((0u - uint(ui >> 31)) | 0x80000000u);
}
float sdsmUIntToFloat(const in uint ui){
return uintBitsToFloat(ui ^ uint((uint(ui >> 31) - 1u) | 0x80000000u));
}
#version 430
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
$include "sdsm_reduce.glsl"
layout(std430) buffer reduceData {
float reduceDataNearZ;
float reduceDataFarZ;
float reduceDataPadding0;
float reduceDataPadding1;
vec4 reduceDataPartitions[NUM_PARTITIONS];
uint reduceDataAABBVectors[(NUM_PARTITIONS * 6) * 2];
};
void main(){
reduceDataNearZ = 1.0;
reduceDataFarZ = 0.0;
int i;
for(i = 0; i < NUM_PARTITIONS; i++){
reduceDataPartitions[i] = vec4(3.4e+38, 3.4e+38, -3.4e+38, -3.4e+38);
}
for(i = 0; i < (3 * NUM_PARTITIONS); i++){
reduceDataAABBVectors[i + (0 * NUM_PARTITIONS)] = 0xff7fffffu;
reduceDataAABBVectors[i + (3 * NUM_PARTITIONS)] = 0x00800000u;
reduceDataAABBVectors[i + (6 * NUM_PARTITIONS)] = 0xff7fffffu;
reduceDataAABBVectors[i + (9 * NUM_PARTITIONS)] = 0x00800000u;
}
}
#version 430
$define NUM_SAMPLES 8
$define LOCAL_SIZE 8
$define NUM_INVOCATIONS (LOCAL_SIZE * LOCAL_SIZE)
layout(local_size_x=LOCAL_SIZE, local_size_y=LOCAL_SIZE, local_size_z=1) in;
uniform sampler2D uTexDepthBuffer;
uniform ivec2 uTexDepthBufferSize;
$include "sdsm_reduce.glsl"
layout(std430) buffer reduceData {
int reduceDataNearZ;
int reduceDataFarZ;
};
$define NUM_SHARED NUM_INVOCATIONS
shared float sharedMinZ[NUM_SHARED];
shared float sharedMaxZ[NUM_SHARED];
void main(){
float lMinZ = 1.0;
float lMaxZ = 0.0;
{
ivec2 lBaseUV = ivec2((ivec2(gl_WorkGroupID.xy) * LOCAL_SIZE) + ivec2(gl_LocalInvocationID.xy)) * NUM_SAMPLES;
ivec2 lMaxUV = uTexDepthBufferSize.xy - ivec2(1);
for(int lY = 0; lY < NUM_SAMPLES; lY++){
for(int lX = 0; lX < NUM_SAMPLES; lX++){
float lDepth = texelFetch(uTexDepthBuffer, min(lBaseUV + ivec2(lX, lY), lMaxUV), 0).x;
bool lSelect = (lDepth < 1.0);
lMinZ = mix(lMinZ, min(lMinZ, lDepth), lSelect);
lMaxZ = mix(lMaxZ, max(lMaxZ, lDepth), lSelect);
}
}
}
uint lInvocationIndex = gl_LocalInvocationIndex;
{
sharedMinZ[lInvocationIndex] = lMinZ;
sharedMaxZ[lInvocationIndex] = lMaxZ;
}
$ifndef UNROLL_LOOP
{
for(uint lOffset = NUM_SHARED >> 1; lOffset > 0u; lOffset >>= 1){
memoryBarrierShared();
barrier();
if(lInvocationIndex < lOffset){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lOffset + lInvocationIndex]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lOffset + lInvocationIndex]);
}
}
}
$else
{
$if NUM_SHARED >= 256
memoryBarrierShared();
barrier();
if(lInvocationIndex < 128u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 128u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 128u]);
}
$endif
$if NUM_SHARED >= 128
memoryBarrierShared();
barrier();
if(lInvocationIndex < 64u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 64u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 64u]);
}
$endif
$if NUM_SHARED >= 64
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 32u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 32u]);
}
$endif
$if NUM_SHARED >= 32
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 16u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 16u]);
}
$endif
$if NUM_SHARED >= 16
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 8u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 8u]);
}
$endif
$if NUM_SHARED >= 8
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 4u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 4u]);
}
$endif
$if NUM_SHARED >= 4
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 2u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 2u]);
}
$endif
$if NUM_SHARED >= 2
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 1u]);
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 1u]);
}
$endif
}
$endif
{
memoryBarrierShared();
barrier();
if(lInvocationIndex == 0u){
atomicMin(reduceDataNearZ, floatBitsToInt(sharedMinZ[0]));
atomicMax(reduceDataFarZ, floatBitsToInt(sharedMaxZ[0]));
}
}
}
#version 430
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
$include "sdsm_reduce.glsl"
layout(std430) buffer reduceData {
float reduceDataNearZ;
float reduceDataFarZ;
float reduceDataPadding0;
float reduceDataPadding1;
vec4 reduceDataPartitions[NUM_PARTITIONS];
};
float sdsmLogPartitionFromRange(const in int pPartition, const in float pMinZ, const in float pMaxZ){
if(pPartition < 0){
return pMinZ;
}else if(pPartition >= NUM_PARTITIONS){
return pMaxZ;
}else{
return clamp(pMinZ * pow(pMaxZ / pMinZ, float(pPartition) * (1.0 / float(NUM_PARTITIONS))), pMinZ, pMaxZ);
}
}
void main(){
int lGroupIndex = int(gl_WorkGroupID.x);
float lMinZ = sdsmConvertZBufferDepthToLinear(reduceDataNearZ);
float lMaxZ = sdsmConvertZBufferDepthToLinear(reduceDataFarZ);
float lLastZ = sdsmLogPartitionFromRange(max(lGroupIndex - 1, 0), lMinZ, lMaxZ);
float lThisZ = sdsmLogPartitionFromRange(lGroupIndex, lMinZ, lMaxZ);
float lNextZ = sdsmLogPartitionFromRange(min(lGroupIndex + 1, NUM_PARTITIONS - 1), lMinZ, lMaxZ);
float lOverlap = 0.1;//(lThisZ < 128) ? 0.5 : ((lThisZ < 512) ? 0.25 : 0.1);
// x = overlapping near, y = non-overlapping near, z = non-overlapping far, w = overlapping far
reduceDataPartitions[lGroupIndex].xy = (lGroupIndex == 0) ? vec2(uClipPlaneNearFar.x) : vec2(mix(lLastZ, lThisZ, clamp(1.0 - lOverlap, 0.0, 1.0)), lThisZ);
reduceDataPartitions[lGroupIndex].zw = (lGroupIndex == (NUM_PARTITIONS - 1)) ? vec2(uClipPlaneNearFar.y) : vec2(lNextZ);//, min(lNextZ + ((lNextZ - lThisZ) * 0.05), uClipPlaneNearFar.y));
//reduceDataPartitions[lGroupIndex].zw = (lGroupIndex == (NUM_PARTITIONS - 1)) ? vec2(uClipPlaneNearFar.y) : vec2(lNextZ, min(lNextZ + ((lNextZ - lThisZ) * clamp(lOverlap, 0.0, 1.0)), uClipPlaneNearFar.y));
}
#version 430
// the $-lines is my own except $ vs. # otherwise C99-compatible preprocessor with some extensions of me
$define NUM_SAMPLES 8
$define LOCAL_SIZE 8
$define NUM_INVOCATIONS (LOCAL_SIZE * LOCAL_SIZE)
layout(local_size_x=LOCAL_SIZE, local_size_y=LOCAL_SIZE, local_size_z=1) in;
uniform sampler2D uTexDepthBuffer;
uniform ivec2 uTexDepthBufferSize;
uniform mat4 uViewSpaceInverseProjectionMatrix;
uniform mat4 uViewSpaceToSunLightSpaceMatrix;
uniform mat4 uViewSpaceToMoonLightSpaceMatrix;
$include "sdsm_reduce.glsl"
layout(std430) buffer reduceData {
float reduceDataNearZ;
float reduceDataFarZ;
float reduceDataPadding0;
float reduceDataPadding1;
vec4 reduceDataPartitions[NUM_PARTITIONS];
uint reduceDataAABBVectors[(NUM_PARTITIONS * 6) * 2];
};
$define NUM_SHARED (NUM_INVOCATIONS * NUM_PARTITIONS)
$ifdef SUN
shared vec3 sharedMinBoundsSun[NUM_SHARED];
shared vec3 sharedMaxBoundsSun[NUM_SHARED];
$endif
$ifdef MOON
shared vec3 sharedMinBoundsMoon[NUM_SHARED];
shared vec3 sharedMaxBoundsMoon[NUM_SHARED];
$endif
void main(){
$ifdef SUN
vec3 minBoundsSun[NUM_PARTITIONS];
vec3 maxBoundsSun[NUM_PARTITIONS];
$endif
$ifdef MOON
vec3 minBoundsMoon[NUM_PARTITIONS];
vec3 maxBoundsMoon[NUM_PARTITIONS];
$endif
{
for(int lPartitionIndex = 0; lPartitionIndex < NUM_PARTITIONS; lPartitionIndex++){
$ifdef SUN
minBoundsSun[lPartitionIndex] = vec3(3.4e+38, 3.4e+38, 3.4e+38);
maxBoundsSun[lPartitionIndex] = vec3(-3.4e+38, -3.4e+38, -3.4e+38);
$endif
$ifdef MOON
minBoundsMoon[lPartitionIndex] = vec3(3.4e+38, 3.4e+38, 3.4e+38);
maxBoundsMoon[lPartitionIndex] = vec3(-3.4e+38, -3.4e+38, -3.4e+38);
$endif
}
}
{
vec3 lMul = vec3(vec2(vec2(2.0) / vec2(uTexDepthBufferSize.xy - vec2(1.0))), 2.0);
ivec2 lBaseUV = ivec2((ivec2(gl_WorkGroupID.xy) * LOCAL_SIZE) + ivec2(gl_LocalInvocationID.xy)) * NUM_SAMPLES;
ivec2 lMaxUV = uTexDepthBufferSize.xy - ivec2(1);
for(int lY = 0; lY < NUM_SAMPLES; lY++){
for(int lX = 0; lX < NUM_SAMPLES; lX++){
ivec2 lUV = min(lBaseUV + ivec2(lX, lY), lMaxUV);
float lDepth = texelFetch(uTexDepthBuffer, lUV, 0).x;
if(lDepth < 1.0){
vec4 lViewSpaceCoord = uViewSpaceInverseProjectionMatrix * vec4((vec3(lUV.xy, lDepth) * lMul) - vec3(1.0), 1.0);
lViewSpaceCoord = vec4(lViewSpaceCoord.xyz / lViewSpaceCoord.w, 1.0);
float lLinearZ = -lViewSpaceCoord.z;
$ifdef SUN
vec3 lSunLightSpaceCoord = (uViewSpaceToSunLightSpaceMatrix * lViewSpaceCoord).xyz;
$endif
$ifdef MOON
vec3 lMoonLightSpaceCoord = (uViewSpaceToMoonLightSpaceMatrix * lViewSpaceCoord).xyz;
$endif
for(int lPartitionIndex = 0; lPartitionIndex < NUM_PARTITIONS; lPartitionIndex++){
if((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w)){
// if((lLinearZ >= (reduceDataPartitions[lPartitionIndex].x * 0.9)) && (lLinearZ <= (reduceDataPartitions[lPartitionIndex].w * 1.1))){
$ifdef SUN
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz);
maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz);
$endif
$ifdef MOON
minBoundsMoon[lPartitionIndex] = min(minBoundsMoon[lPartitionIndex], lMoonLightSpaceCoord.xyz);
maxBoundsMoon[lPartitionIndex] = max(maxBoundsMoon[lPartitionIndex], lMoonLightSpaceCoord.xyz);
$endif
}
}
}
}
}
}
uint lInvocationIndex = gl_LocalInvocationIndex;
{
for(uint lPartitionIndex = 0u; lPartitionIndex < uint(NUM_PARTITIONS); lPartitionIndex++){
uint lIndex = (lInvocationIndex * NUM_PARTITIONS) + lPartitionIndex;
$ifdef SUN
sharedMinBoundsSun[lIndex] = minBoundsSun[lPartitionIndex];
sharedMaxBoundsSun[lIndex] = maxBoundsSun[lPartitionIndex];
$endif
$ifdef MOON
sharedMinBoundsMoon[lIndex] = minBoundsMoon[lPartitionIndex];
sharedMaxBoundsMoon[lIndex] = maxBoundsMoon[lPartitionIndex];
$endif
}
}
$if (!defined(UNROLL_LOOPS)) || (NUM_INVOCATIONS > 64) || (NUM_PARTITIONS == 3) || (NUM_PARTITIONS > 4)
for(uint lOffset = NUM_SHARED >> 1; lOffset >= uint(NUM_PARTITIONS); lOffset >>= 1){
memoryBarrierShared();
barrier();
for(uint lDst = lInvocationIndex; lDst < lOffset; lDst += uint(NUM_INVOCATIONS)){
uint lSrc = lDst + lOffset;
$ifdef SUN
sharedMinBoundsSun[lDst] = min(sharedMinBoundsSun[lDst], sharedMinBoundsSun[lSrc]);
sharedMaxBoundsSun[lDst] = max(sharedMaxBoundsSun[lDst], sharedMaxBoundsSun[lSrc]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lDst] = min(sharedMinBoundsMoon[lDst], sharedMinBoundsMoon[lSrc]);
sharedMaxBoundsMoon[lDst] = max(sharedMaxBoundsMoon[lDst], sharedMaxBoundsMoon[lSrc]);
$endif
}
}
$else
{
$if NUM_SHARED >= 256
memoryBarrierShared();
barrier();
if(lInvocationIndex < 128u){
$if NUM_INVOCATIONS == 64
uint lOtherInvocationIndex = lInvocationIndex + 64u;
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 128u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 128u]);
sharedMinBoundsSun[lOtherInvocationIndex] = min(sharedMinBoundsSun[lOtherInvocationIndex], sharedMinBoundsSun[lOtherInvocationIndex + 128u]);
sharedMaxBoundsSun[lOtherInvocationIndex] = max(sharedMaxBoundsSun[lOtherInvocationIndex], sharedMaxBoundsSun[lOtherInvocationIndex + 128u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 128u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 128u]);
sharedMinBoundsMoon[lOtherInvocationIndex] = min(sharedMinBoundsMoon[lOtherInvocationIndex], sharedMinBoundsMoon[lOtherInvocationIndex + 128u]);
sharedMaxBoundsMoon[lOtherInvocationIndex] = max(sharedMaxBoundsMoon[lOtherInvocationIndex], sharedMaxBoundsMoon[lOtherInvocationIndex + 128u]);
$endif
$else
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 128u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 128u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 128u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 128u]);
$endif
$endif
}
$endif
$if NUM_SHARED >= 128
memoryBarrierShared();
barrier();
if(lInvocationIndex < 64u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 64u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 64u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 64u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 64u]);
$endif
}
$endif
$if NUM_SHARED >= 64
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 32u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 32u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 32u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 32u]);
$endif
}
$endif
$if NUM_SHARED >= 32
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 16u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 16u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 16u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 16u]);
$endif
}
$endif
$if NUM_SHARED >= 16
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 8u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 8u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 8u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 8u]);
$endif
}
$endif
$if (NUM_SHARED >= 8) && (NUM_PARTITIONS <= 4)
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 4u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 4u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 4u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 4u]);
$endif
}
$endif
$if (NUM_SHARED >= 4) && (NUM_PARTITIONS <= 2)
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 2u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 2u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 2u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 2u]);
$endif
}
$endif
$if (NUM_SHARED >= 2) && (NUM_PARTITIONS <= 1)
memoryBarrierShared();
barrier();
if(lInvocationIndex < 32u){
$ifdef SUN
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 1u]);
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 1u]);
$endif
$ifdef MOON
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 1u]);
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 1u]);
$endif
}
$endif
}
$endif
{
memoryBarrierShared();
barrier();
if(lInvocationIndex < uint(3 * NUM_PARTITIONS)){
uint lSourceVectorElementIndex = lInvocationIndex / 3u;
uint lDestinationVectorElementIndex = lInvocationIndex - (lSourceVectorElementIndex * 3u);
$ifdef SUN
vec3 lSunMin = sharedMinBoundsSun[lSourceVectorElementIndex];
vec3 lSunMax = sharedMaxBoundsSun[lSourceVectorElementIndex];
$endif
$ifdef MOON
vec3 lMoonMin = sharedMinBoundsMoon[lSourceVectorElementIndex];
vec3 lMoonMax = sharedMaxBoundsMoon[lSourceVectorElementIndex];
$endif
$ifdef SUN
atomicMin(reduceDataAABBVectors[lInvocationIndex + uint(0 * NUM_PARTITIONS)], sdsmFloatToUInt(lSunMin[lDestinationVectorElementIndex]));
atomicMax(reduceDataAABBVectors[lInvocationIndex + uint(3 * NUM_PARTITIONS)], sdsmFloatToUInt(lSunMax[lDestinationVectorElementIndex]));
$endif
$ifdef MOON
atomicMin(reduceDataAABBVectors[lInvocationIndex + uint(6 * NUM_PARTITIONS)], sdsmFloatToUInt(lMoonMin[lDestinationVectorElementIndex]));
atomicMax(reduceDataAABBVectors[lInvocationIndex + uint(9 * NUM_PARTITIONS)], sdsmFloatToUInt(lMoonMax[lDestinationVectorElementIndex]));
$endif
}
}
}
$define NUM_SHADOW_CASCADES 4
$define SHADOW_DATA_COUNT ((NUM_SHADOW_CASCADES + 1) * 2)
$define SHADOW_BASE_SUN 0
$define SHADOW_BASE_MOON (NUM_SHADOW_CASCADES + 1)
begin
{$ifdef RendererProfiling}
if not Renderer.QueryFirstFrame then begin
glGetQueryObjectuiv(Renderer.Queries[9],GL_QUERY_RESULT,@Renderer.QueryResults[9]);
end;
glBeginQuery(GL_TIME_ELAPSED,Renderer.Queries[9]);
{$endif}
begin
Renderer.DepthBufferContext.ViewMatrix:=ViewMatrix;
Renderer.DepthBufferContext.FOV:=FOV;
Renderer.DepthBufferContext.Render(AViewPortX,AViewPortY,AViewPortWidth,AViewPortHeight,Renderer.DepthBufferFrameBufferObject);
end;
{$ifdef RendererProfiling}
glEndQuery(GL_TIME_ELAPSED);
{$endif}
if Renderer.ShadowMode in [4] then begin
if Renderer.SDSMReduceDataSSBO>0 then begin
{$ifdef RendererProfiling}
if not Renderer.QueryFirstFrame then begin
glGetQueryObjectuiv(Renderer.Queries[10],GL_QUERY_RESULT,@Renderer.QueryResults[10]);
end;
glBeginQuery(GL_TIME_ELAPSED,Renderer.Queries[10]);
{$endif}
Renderer.State.UseShader(nil);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,Renderer.SDSMReduceDataSSBO);
if Renderer.State.UseShader(Renderer.SDSMReduceClear) then begin
if Renderer.SDSMReduceClear_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReduceClear_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReduceClear_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReduceClear_uClipPlaneNearFar,ZNear,ZFar);
end;
if Renderer.SDSMReduceClear_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceClear_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(1,1,1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
if Renderer.State.UseShader(Renderer.SDSMReduceCollect) then begin
if Renderer.SDSMReduceCollect_uTexDepthBuffer>=0 then begin
Renderer.State.SetUniform1i(Renderer.SDSMReduceCollect_uTexDepthBuffer,0);
end;
if Renderer.SDSMReduceCollect_uTexDepthBufferSize>=0 then begin
Renderer.State.SetUniform2i(Renderer.SDSMReduceCollect_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height);
end;
if Renderer.SDSMReduceCollect_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReduceCollect_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReduceCollect_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReduceCollect_uClipPlaneNearFar,ZNear,ZFar);
end;
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D);
if Renderer.SDSMReduceCollect_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceCollect_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
if Renderer.State.UseShader(Renderer.SDSMReducePartitioning) then begin
if Renderer.SDSMReducePartitioning_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReducePartitioning_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReducePartitioning_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReducePartitioning_uClipPlaneNearFar,ZNear,ZFar);
end;
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D);
if Renderer.SDSMReducePartitioning_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReducePartitioning_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(MaxCascadedShadowMaps,1,1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
{}if Renderer.ShadowMode in [4] then begin
if Renderer.SunActive and Renderer.MoonActive then begin
if Renderer.State.UseShader(Renderer.SDSMReduceTightingSunMoon) then begin
if Renderer.SDSMReduceTightingSunMoon_uTexDepthBuffer>=0 then begin
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingSunMoon_uTexDepthBuffer,0);
end;
if Renderer.SDSMReduceTightingSunMoon_uTexDepthBufferSize>=0 then begin
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingSunMoon_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height);
end;
if Renderer.SDSMReduceTightingSunMoon_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingSunMoon_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReduceTightingSunMoon_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingSunMoon_uClipPlaneNearFar,ZNear,ZFar);
end;
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceInverseProjectionMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix));
end;
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceToSunLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix);
end;
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceToMoonLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix);
end;
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D);
if Renderer.SDSMReduceTightingSunMoon_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingSunMoon_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
end else if Renderer.SunActive then begin
if Renderer.State.UseShader(Renderer.SDSMReduceTightingSun) then begin
if Renderer.SDSMReduceTightingSun_uTexDepthBuffer>=0 then begin
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingSun_uTexDepthBuffer,0);
end;
if Renderer.SDSMReduceTightingSun_uTexDepthBufferSize>=0 then begin
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingSun_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height);
end;
if Renderer.SDSMReduceTightingSun_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingSun_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReduceTightingSun_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingSun_uClipPlaneNearFar,ZNear,ZFar);
end;
if Renderer.SDSMReduceTightingSun_uViewSpaceInverseProjectionMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix));
end;
if Renderer.SDSMReduceTightingSun_uViewSpaceToSunLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix);
end;
if Renderer.SDSMReduceTightingSun_uViewSpaceToMoonLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix);
end;
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D);
if Renderer.SDSMReduceTightingSun_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingSun_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
end else if Renderer.MoonActive then begin
if Renderer.State.UseShader(Renderer.SDSMReduceTightingMoon) then begin
if Renderer.SDSMReduceTightingMoon_uTexDepthBuffer>=0 then begin
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingMoon_uTexDepthBuffer,0);
end;
if Renderer.SDSMReduceTightingMoon_uTexDepthBufferSize>=0 then begin
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingMoon_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height);
end;
if Renderer.SDSMReduceTightingMoon_uClipPlaneDepthConstants>=0 then begin
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingMoon_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear);
end;
if Renderer.SDSMReduceTightingMoon_uClipPlaneNearFar>=0 then begin
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingMoon_uClipPlaneNearFar,ZNear,ZFar);
end;
if Renderer.SDSMReduceTightingMoon_uViewSpaceInverseProjectionMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix));
end;
if Renderer.SDSMReduceTightingMoon_uViewSpaceToSunLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix);
end;
if Renderer.SDSMReduceTightingMoon_uViewSpaceToMoonLightSpaceMatrix>=0 then begin
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix);
end;
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D);
if Renderer.SDSMReduceTightingMoon_RecudeDataSSBOLocation>=0 then begin
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingMoon_RecudeDataSSBOLocation,0);
end;
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
Renderer.State.UseShader(nil);
end;
end;
end;{}
begin
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,Renderer.SDSMReduceDataSSBO);
p:=glMapBuffer(GL_SHADER_STORAGE_BUFFER,GL_READ_ONLY);
if assigned(p) then begin
Move(p^,Renderer.SDSMReduceData,SizeOf(TEngineRendererSDSMReduceData));
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
end;
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0);
end;
{$ifdef RendererProfiling}
glEndQuery(GL_TIME_ELAPSED);
{$endif}
end;
end;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment