Last active
April 11, 2020 04:38
-
-
Save BeRo1985/369261704adb16ce805a to your computer and use it in GitHub Desktop.
SDSM compute shader NVIDIA driver 347.09 WHQL slowdown bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
With version 347.09 WHQL tooks my SDSM tighting compute shader whole 70 ms per frame instead with the old verson 344.75 under 1ms (circa 0.7ms) on my GTX970. | |
See sdsm_reduce_tighting.glsl here, the $-stuff is my own preprocessor. | |
I'ver tracked to the "((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w))" (in sdsm_reduce_tighting.glsl) comparsion down, but really just the comparsion itself, because: | |
70ms per frame: | |
if((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w)){ | |
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); | |
maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); | |
} | |
70 ms per frame: | |
bool b = (lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w); | |
minBoundsSun[lPartitionIndex] = mix(minBoundsSun[lPartitionIndex], min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b); | |
maxBoundsSun[lPartitionIndex] = mix(maxBoundsSun[lPartitionIndex], max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b); | |
35 ms per frame: | |
bool b = (lLinearZ >= reduceDataPartitions[lPartitionIndex].x); | |
minBoundsSun[lPartitionIndex] = mix(minBoundsSun[lPartitionIndex], min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b); | |
maxBoundsSun[lPartitionIndex] = mix(maxBoundsSun[lPartitionIndex], max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz), b); | |
without any comparsion checks, it is under 1ms per frame again but the result is unusable then: | |
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$include "shadow.glsl" | |
uniform vec3 uClipPlaneDepthConstants; | |
uniform vec2 uClipPlaneNearFar; | |
$define NUM_PARTITIONS NUM_SHADOW_CASCADES | |
$define UNROLL_LOOPS | |
const float FLT_MAX = 3.4e+38; | |
const uint FLT_MAX_UINT = 0x7f7fffffu; | |
float sdsmConvertZBufferDepthToLinear(const in float d){ | |
return clamp(uClipPlaneDepthConstants.x / (uClipPlaneDepthConstants.y - (d * uClipPlaneDepthConstants.z)), uClipPlaneNearFar.x, uClipPlaneNearFar.y); | |
} | |
uint sdsmFloatToUInt(const in float f){ | |
uint ui = floatBitsToUint(f); | |
return ui ^ uint((0u - uint(ui >> 31)) | 0x80000000u); | |
} | |
float sdsmUIntToFloat(const in uint ui){ | |
return uintBitsToFloat(ui ^ uint((uint(ui >> 31) - 1u) | 0x80000000u)); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 | |
layout(local_size_x=1, local_size_y=1, local_size_z=1) in; | |
$include "sdsm_reduce.glsl" | |
layout(std430) buffer reduceData { | |
float reduceDataNearZ; | |
float reduceDataFarZ; | |
float reduceDataPadding0; | |
float reduceDataPadding1; | |
vec4 reduceDataPartitions[NUM_PARTITIONS]; | |
uint reduceDataAABBVectors[(NUM_PARTITIONS * 6) * 2]; | |
}; | |
void main(){ | |
reduceDataNearZ = 1.0; | |
reduceDataFarZ = 0.0; | |
int i; | |
for(i = 0; i < NUM_PARTITIONS; i++){ | |
reduceDataPartitions[i] = vec4(3.4e+38, 3.4e+38, -3.4e+38, -3.4e+38); | |
} | |
for(i = 0; i < (3 * NUM_PARTITIONS); i++){ | |
reduceDataAABBVectors[i + (0 * NUM_PARTITIONS)] = 0xff7fffffu; | |
reduceDataAABBVectors[i + (3 * NUM_PARTITIONS)] = 0x00800000u; | |
reduceDataAABBVectors[i + (6 * NUM_PARTITIONS)] = 0xff7fffffu; | |
reduceDataAABBVectors[i + (9 * NUM_PARTITIONS)] = 0x00800000u; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 | |
$define NUM_SAMPLES 8 | |
$define LOCAL_SIZE 8 | |
$define NUM_INVOCATIONS (LOCAL_SIZE * LOCAL_SIZE) | |
layout(local_size_x=LOCAL_SIZE, local_size_y=LOCAL_SIZE, local_size_z=1) in; | |
uniform sampler2D uTexDepthBuffer; | |
uniform ivec2 uTexDepthBufferSize; | |
$include "sdsm_reduce.glsl" | |
layout(std430) buffer reduceData { | |
int reduceDataNearZ; | |
int reduceDataFarZ; | |
}; | |
$define NUM_SHARED NUM_INVOCATIONS | |
shared float sharedMinZ[NUM_SHARED]; | |
shared float sharedMaxZ[NUM_SHARED]; | |
void main(){ | |
float lMinZ = 1.0; | |
float lMaxZ = 0.0; | |
{ | |
ivec2 lBaseUV = ivec2((ivec2(gl_WorkGroupID.xy) * LOCAL_SIZE) + ivec2(gl_LocalInvocationID.xy)) * NUM_SAMPLES; | |
ivec2 lMaxUV = uTexDepthBufferSize.xy - ivec2(1); | |
for(int lY = 0; lY < NUM_SAMPLES; lY++){ | |
for(int lX = 0; lX < NUM_SAMPLES; lX++){ | |
float lDepth = texelFetch(uTexDepthBuffer, min(lBaseUV + ivec2(lX, lY), lMaxUV), 0).x; | |
bool lSelect = (lDepth < 1.0); | |
lMinZ = mix(lMinZ, min(lMinZ, lDepth), lSelect); | |
lMaxZ = mix(lMaxZ, max(lMaxZ, lDepth), lSelect); | |
} | |
} | |
} | |
uint lInvocationIndex = gl_LocalInvocationIndex; | |
{ | |
sharedMinZ[lInvocationIndex] = lMinZ; | |
sharedMaxZ[lInvocationIndex] = lMaxZ; | |
} | |
$ifndef UNROLL_LOOP | |
{ | |
for(uint lOffset = NUM_SHARED >> 1; lOffset > 0u; lOffset >>= 1){ | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < lOffset){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lOffset + lInvocationIndex]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lOffset + lInvocationIndex]); | |
} | |
} | |
} | |
$else | |
{ | |
$if NUM_SHARED >= 256 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 128u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 128u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 128u]); | |
} | |
$endif | |
$if NUM_SHARED >= 128 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 64u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 64u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 64u]); | |
} | |
$endif | |
$if NUM_SHARED >= 64 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 32u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 32u]); | |
} | |
$endif | |
$if NUM_SHARED >= 32 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 16u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 16u]); | |
} | |
$endif | |
$if NUM_SHARED >= 16 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 8u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 8u]); | |
} | |
$endif | |
$if NUM_SHARED >= 8 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 4u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 4u]); | |
} | |
$endif | |
$if NUM_SHARED >= 4 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 2u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 2u]); | |
} | |
$endif | |
$if NUM_SHARED >= 2 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
sharedMinZ[lInvocationIndex] = min(sharedMinZ[lInvocationIndex], sharedMinZ[lInvocationIndex + 1u]); | |
sharedMaxZ[lInvocationIndex] = max(sharedMaxZ[lInvocationIndex], sharedMaxZ[lInvocationIndex + 1u]); | |
} | |
$endif | |
} | |
$endif | |
{ | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex == 0u){ | |
atomicMin(reduceDataNearZ, floatBitsToInt(sharedMinZ[0])); | |
atomicMax(reduceDataFarZ, floatBitsToInt(sharedMaxZ[0])); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 | |
layout(local_size_x=1, local_size_y=1, local_size_z=1) in; | |
$include "sdsm_reduce.glsl" | |
layout(std430) buffer reduceData { | |
float reduceDataNearZ; | |
float reduceDataFarZ; | |
float reduceDataPadding0; | |
float reduceDataPadding1; | |
vec4 reduceDataPartitions[NUM_PARTITIONS]; | |
}; | |
float sdsmLogPartitionFromRange(const in int pPartition, const in float pMinZ, const in float pMaxZ){ | |
if(pPartition < 0){ | |
return pMinZ; | |
}else if(pPartition >= NUM_PARTITIONS){ | |
return pMaxZ; | |
}else{ | |
return clamp(pMinZ * pow(pMaxZ / pMinZ, float(pPartition) * (1.0 / float(NUM_PARTITIONS))), pMinZ, pMaxZ); | |
} | |
} | |
void main(){ | |
int lGroupIndex = int(gl_WorkGroupID.x); | |
float lMinZ = sdsmConvertZBufferDepthToLinear(reduceDataNearZ); | |
float lMaxZ = sdsmConvertZBufferDepthToLinear(reduceDataFarZ); | |
float lLastZ = sdsmLogPartitionFromRange(max(lGroupIndex - 1, 0), lMinZ, lMaxZ); | |
float lThisZ = sdsmLogPartitionFromRange(lGroupIndex, lMinZ, lMaxZ); | |
float lNextZ = sdsmLogPartitionFromRange(min(lGroupIndex + 1, NUM_PARTITIONS - 1), lMinZ, lMaxZ); | |
float lOverlap = 0.1;//(lThisZ < 128) ? 0.5 : ((lThisZ < 512) ? 0.25 : 0.1); | |
// x = overlapping near, y = non-overlapping near, z = non-overlapping far, w = overlapping far | |
reduceDataPartitions[lGroupIndex].xy = (lGroupIndex == 0) ? vec2(uClipPlaneNearFar.x) : vec2(mix(lLastZ, lThisZ, clamp(1.0 - lOverlap, 0.0, 1.0)), lThisZ); | |
reduceDataPartitions[lGroupIndex].zw = (lGroupIndex == (NUM_PARTITIONS - 1)) ? vec2(uClipPlaneNearFar.y) : vec2(lNextZ);//, min(lNextZ + ((lNextZ - lThisZ) * 0.05), uClipPlaneNearFar.y)); | |
//reduceDataPartitions[lGroupIndex].zw = (lGroupIndex == (NUM_PARTITIONS - 1)) ? vec2(uClipPlaneNearFar.y) : vec2(lNextZ, min(lNextZ + ((lNextZ - lThisZ) * clamp(lOverlap, 0.0, 1.0)), uClipPlaneNearFar.y)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 | |
// the $-lines is my own except $ vs. # otherwise C99-compatible preprocessor with some extensions of me | |
$define NUM_SAMPLES 8 | |
$define LOCAL_SIZE 8 | |
$define NUM_INVOCATIONS (LOCAL_SIZE * LOCAL_SIZE) | |
layout(local_size_x=LOCAL_SIZE, local_size_y=LOCAL_SIZE, local_size_z=1) in; | |
uniform sampler2D uTexDepthBuffer; | |
uniform ivec2 uTexDepthBufferSize; | |
uniform mat4 uViewSpaceInverseProjectionMatrix; | |
uniform mat4 uViewSpaceToSunLightSpaceMatrix; | |
uniform mat4 uViewSpaceToMoonLightSpaceMatrix; | |
$include "sdsm_reduce.glsl" | |
layout(std430) buffer reduceData { | |
float reduceDataNearZ; | |
float reduceDataFarZ; | |
float reduceDataPadding0; | |
float reduceDataPadding1; | |
vec4 reduceDataPartitions[NUM_PARTITIONS]; | |
uint reduceDataAABBVectors[(NUM_PARTITIONS * 6) * 2]; | |
}; | |
$define NUM_SHARED (NUM_INVOCATIONS * NUM_PARTITIONS) | |
$ifdef SUN | |
shared vec3 sharedMinBoundsSun[NUM_SHARED]; | |
shared vec3 sharedMaxBoundsSun[NUM_SHARED]; | |
$endif | |
$ifdef MOON | |
shared vec3 sharedMinBoundsMoon[NUM_SHARED]; | |
shared vec3 sharedMaxBoundsMoon[NUM_SHARED]; | |
$endif | |
void main(){ | |
$ifdef SUN | |
vec3 minBoundsSun[NUM_PARTITIONS]; | |
vec3 maxBoundsSun[NUM_PARTITIONS]; | |
$endif | |
$ifdef MOON | |
vec3 minBoundsMoon[NUM_PARTITIONS]; | |
vec3 maxBoundsMoon[NUM_PARTITIONS]; | |
$endif | |
{ | |
for(int lPartitionIndex = 0; lPartitionIndex < NUM_PARTITIONS; lPartitionIndex++){ | |
$ifdef SUN | |
minBoundsSun[lPartitionIndex] = vec3(3.4e+38, 3.4e+38, 3.4e+38); | |
maxBoundsSun[lPartitionIndex] = vec3(-3.4e+38, -3.4e+38, -3.4e+38); | |
$endif | |
$ifdef MOON | |
minBoundsMoon[lPartitionIndex] = vec3(3.4e+38, 3.4e+38, 3.4e+38); | |
maxBoundsMoon[lPartitionIndex] = vec3(-3.4e+38, -3.4e+38, -3.4e+38); | |
$endif | |
} | |
} | |
{ | |
vec3 lMul = vec3(vec2(vec2(2.0) / vec2(uTexDepthBufferSize.xy - vec2(1.0))), 2.0); | |
ivec2 lBaseUV = ivec2((ivec2(gl_WorkGroupID.xy) * LOCAL_SIZE) + ivec2(gl_LocalInvocationID.xy)) * NUM_SAMPLES; | |
ivec2 lMaxUV = uTexDepthBufferSize.xy - ivec2(1); | |
for(int lY = 0; lY < NUM_SAMPLES; lY++){ | |
for(int lX = 0; lX < NUM_SAMPLES; lX++){ | |
ivec2 lUV = min(lBaseUV + ivec2(lX, lY), lMaxUV); | |
float lDepth = texelFetch(uTexDepthBuffer, lUV, 0).x; | |
if(lDepth < 1.0){ | |
vec4 lViewSpaceCoord = uViewSpaceInverseProjectionMatrix * vec4((vec3(lUV.xy, lDepth) * lMul) - vec3(1.0), 1.0); | |
lViewSpaceCoord = vec4(lViewSpaceCoord.xyz / lViewSpaceCoord.w, 1.0); | |
float lLinearZ = -lViewSpaceCoord.z; | |
$ifdef SUN | |
vec3 lSunLightSpaceCoord = (uViewSpaceToSunLightSpaceMatrix * lViewSpaceCoord).xyz; | |
$endif | |
$ifdef MOON | |
vec3 lMoonLightSpaceCoord = (uViewSpaceToMoonLightSpaceMatrix * lViewSpaceCoord).xyz; | |
$endif | |
for(int lPartitionIndex = 0; lPartitionIndex < NUM_PARTITIONS; lPartitionIndex++){ | |
if((lLinearZ >= reduceDataPartitions[lPartitionIndex].x) && (lLinearZ <= reduceDataPartitions[lPartitionIndex].w)){ | |
// if((lLinearZ >= (reduceDataPartitions[lPartitionIndex].x * 0.9)) && (lLinearZ <= (reduceDataPartitions[lPartitionIndex].w * 1.1))){ | |
$ifdef SUN | |
minBoundsSun[lPartitionIndex] = min(minBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); | |
maxBoundsSun[lPartitionIndex] = max(maxBoundsSun[lPartitionIndex], lSunLightSpaceCoord.xyz); | |
$endif | |
$ifdef MOON | |
minBoundsMoon[lPartitionIndex] = min(minBoundsMoon[lPartitionIndex], lMoonLightSpaceCoord.xyz); | |
maxBoundsMoon[lPartitionIndex] = max(maxBoundsMoon[lPartitionIndex], lMoonLightSpaceCoord.xyz); | |
$endif | |
} | |
} | |
} | |
} | |
} | |
} | |
uint lInvocationIndex = gl_LocalInvocationIndex; | |
{ | |
for(uint lPartitionIndex = 0u; lPartitionIndex < uint(NUM_PARTITIONS); lPartitionIndex++){ | |
uint lIndex = (lInvocationIndex * NUM_PARTITIONS) + lPartitionIndex; | |
$ifdef SUN | |
sharedMinBoundsSun[lIndex] = minBoundsSun[lPartitionIndex]; | |
sharedMaxBoundsSun[lIndex] = maxBoundsSun[lPartitionIndex]; | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lIndex] = minBoundsMoon[lPartitionIndex]; | |
sharedMaxBoundsMoon[lIndex] = maxBoundsMoon[lPartitionIndex]; | |
$endif | |
} | |
} | |
$if (!defined(UNROLL_LOOPS)) || (NUM_INVOCATIONS > 64) || (NUM_PARTITIONS == 3) || (NUM_PARTITIONS > 4) | |
for(uint lOffset = NUM_SHARED >> 1; lOffset >= uint(NUM_PARTITIONS); lOffset >>= 1){ | |
memoryBarrierShared(); | |
barrier(); | |
for(uint lDst = lInvocationIndex; lDst < lOffset; lDst += uint(NUM_INVOCATIONS)){ | |
uint lSrc = lDst + lOffset; | |
$ifdef SUN | |
sharedMinBoundsSun[lDst] = min(sharedMinBoundsSun[lDst], sharedMinBoundsSun[lSrc]); | |
sharedMaxBoundsSun[lDst] = max(sharedMaxBoundsSun[lDst], sharedMaxBoundsSun[lSrc]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lDst] = min(sharedMinBoundsMoon[lDst], sharedMinBoundsMoon[lSrc]); | |
sharedMaxBoundsMoon[lDst] = max(sharedMaxBoundsMoon[lDst], sharedMaxBoundsMoon[lSrc]); | |
$endif | |
} | |
} | |
$else | |
{ | |
$if NUM_SHARED >= 256 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 128u){ | |
$if NUM_INVOCATIONS == 64 | |
uint lOtherInvocationIndex = lInvocationIndex + 64u; | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 128u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 128u]); | |
sharedMinBoundsSun[lOtherInvocationIndex] = min(sharedMinBoundsSun[lOtherInvocationIndex], sharedMinBoundsSun[lOtherInvocationIndex + 128u]); | |
sharedMaxBoundsSun[lOtherInvocationIndex] = max(sharedMaxBoundsSun[lOtherInvocationIndex], sharedMaxBoundsSun[lOtherInvocationIndex + 128u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 128u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 128u]); | |
sharedMinBoundsMoon[lOtherInvocationIndex] = min(sharedMinBoundsMoon[lOtherInvocationIndex], sharedMinBoundsMoon[lOtherInvocationIndex + 128u]); | |
sharedMaxBoundsMoon[lOtherInvocationIndex] = max(sharedMaxBoundsMoon[lOtherInvocationIndex], sharedMaxBoundsMoon[lOtherInvocationIndex + 128u]); | |
$endif | |
$else | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 128u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 128u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 128u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 128u]); | |
$endif | |
$endif | |
} | |
$endif | |
$if NUM_SHARED >= 128 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 64u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 64u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 64u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 64u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 64u]); | |
$endif | |
} | |
$endif | |
$if NUM_SHARED >= 64 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 32u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 32u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 32u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 32u]); | |
$endif | |
} | |
$endif | |
$if NUM_SHARED >= 32 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 16u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 16u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 16u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 16u]); | |
$endif | |
} | |
$endif | |
$if NUM_SHARED >= 16 | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 8u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 8u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 8u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 8u]); | |
$endif | |
} | |
$endif | |
$if (NUM_SHARED >= 8) && (NUM_PARTITIONS <= 4) | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 4u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 4u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 4u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 4u]); | |
$endif | |
} | |
$endif | |
$if (NUM_SHARED >= 4) && (NUM_PARTITIONS <= 2) | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 2u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 2u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 2u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 2u]); | |
$endif | |
} | |
$endif | |
$if (NUM_SHARED >= 2) && (NUM_PARTITIONS <= 1) | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < 32u){ | |
$ifdef SUN | |
sharedMinBoundsSun[lInvocationIndex] = min(sharedMinBoundsSun[lInvocationIndex], sharedMinBoundsSun[lInvocationIndex + 1u]); | |
sharedMaxBoundsSun[lInvocationIndex] = max(sharedMaxBoundsSun[lInvocationIndex], sharedMaxBoundsSun[lInvocationIndex + 1u]); | |
$endif | |
$ifdef MOON | |
sharedMinBoundsMoon[lInvocationIndex] = min(sharedMinBoundsMoon[lInvocationIndex], sharedMinBoundsMoon[lInvocationIndex + 1u]); | |
sharedMaxBoundsMoon[lInvocationIndex] = max(sharedMaxBoundsMoon[lInvocationIndex], sharedMaxBoundsMoon[lInvocationIndex + 1u]); | |
$endif | |
} | |
$endif | |
} | |
$endif | |
{ | |
memoryBarrierShared(); | |
barrier(); | |
if(lInvocationIndex < uint(3 * NUM_PARTITIONS)){ | |
uint lSourceVectorElementIndex = lInvocationIndex / 3u; | |
uint lDestinationVectorElementIndex = lInvocationIndex - (lSourceVectorElementIndex * 3u); | |
$ifdef SUN | |
vec3 lSunMin = sharedMinBoundsSun[lSourceVectorElementIndex]; | |
vec3 lSunMax = sharedMaxBoundsSun[lSourceVectorElementIndex]; | |
$endif | |
$ifdef MOON | |
vec3 lMoonMin = sharedMinBoundsMoon[lSourceVectorElementIndex]; | |
vec3 lMoonMax = sharedMaxBoundsMoon[lSourceVectorElementIndex]; | |
$endif | |
$ifdef SUN | |
atomicMin(reduceDataAABBVectors[lInvocationIndex + uint(0 * NUM_PARTITIONS)], sdsmFloatToUInt(lSunMin[lDestinationVectorElementIndex])); | |
atomicMax(reduceDataAABBVectors[lInvocationIndex + uint(3 * NUM_PARTITIONS)], sdsmFloatToUInt(lSunMax[lDestinationVectorElementIndex])); | |
$endif | |
$ifdef MOON | |
atomicMin(reduceDataAABBVectors[lInvocationIndex + uint(6 * NUM_PARTITIONS)], sdsmFloatToUInt(lMoonMin[lDestinationVectorElementIndex])); | |
atomicMax(reduceDataAABBVectors[lInvocationIndex + uint(9 * NUM_PARTITIONS)], sdsmFloatToUInt(lMoonMax[lDestinationVectorElementIndex])); | |
$endif | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$define NUM_SHADOW_CASCADES 4 | |
$define SHADOW_DATA_COUNT ((NUM_SHADOW_CASCADES + 1) * 2) | |
$define SHADOW_BASE_SUN 0 | |
$define SHADOW_BASE_MOON (NUM_SHADOW_CASCADES + 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
begin | |
{$ifdef RendererProfiling} | |
if not Renderer.QueryFirstFrame then begin | |
glGetQueryObjectuiv(Renderer.Queries[9],GL_QUERY_RESULT,@Renderer.QueryResults[9]); | |
end; | |
glBeginQuery(GL_TIME_ELAPSED,Renderer.Queries[9]); | |
{$endif} | |
begin | |
Renderer.DepthBufferContext.ViewMatrix:=ViewMatrix; | |
Renderer.DepthBufferContext.FOV:=FOV; | |
Renderer.DepthBufferContext.Render(AViewPortX,AViewPortY,AViewPortWidth,AViewPortHeight,Renderer.DepthBufferFrameBufferObject); | |
end; | |
{$ifdef RendererProfiling} | |
glEndQuery(GL_TIME_ELAPSED); | |
{$endif} | |
if Renderer.ShadowMode in [4] then begin | |
if Renderer.SDSMReduceDataSSBO>0 then begin | |
{$ifdef RendererProfiling} | |
if not Renderer.QueryFirstFrame then begin | |
glGetQueryObjectuiv(Renderer.Queries[10],GL_QUERY_RESULT,@Renderer.QueryResults[10]); | |
end; | |
glBeginQuery(GL_TIME_ELAPSED,Renderer.Queries[10]); | |
{$endif} | |
Renderer.State.UseShader(nil); | |
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); | |
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,Renderer.SDSMReduceDataSSBO); | |
if Renderer.State.UseShader(Renderer.SDSMReduceClear) then begin | |
if Renderer.SDSMReduceClear_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReduceClear_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReduceClear_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReduceClear_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
if Renderer.SDSMReduceClear_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceClear_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(1,1,1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
if Renderer.State.UseShader(Renderer.SDSMReduceCollect) then begin | |
if Renderer.SDSMReduceCollect_uTexDepthBuffer>=0 then begin | |
Renderer.State.SetUniform1i(Renderer.SDSMReduceCollect_uTexDepthBuffer,0); | |
end; | |
if Renderer.SDSMReduceCollect_uTexDepthBufferSize>=0 then begin | |
Renderer.State.SetUniform2i(Renderer.SDSMReduceCollect_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height); | |
end; | |
if Renderer.SDSMReduceCollect_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReduceCollect_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReduceCollect_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReduceCollect_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D); | |
if Renderer.SDSMReduceCollect_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceCollect_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
if Renderer.State.UseShader(Renderer.SDSMReducePartitioning) then begin | |
if Renderer.SDSMReducePartitioning_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReducePartitioning_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReducePartitioning_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReducePartitioning_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D); | |
if Renderer.SDSMReducePartitioning_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReducePartitioning_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(MaxCascadedShadowMaps,1,1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
{}if Renderer.ShadowMode in [4] then begin | |
if Renderer.SunActive and Renderer.MoonActive then begin | |
if Renderer.State.UseShader(Renderer.SDSMReduceTightingSunMoon) then begin | |
if Renderer.SDSMReduceTightingSunMoon_uTexDepthBuffer>=0 then begin | |
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingSunMoon_uTexDepthBuffer,0); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uTexDepthBufferSize>=0 then begin | |
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingSunMoon_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingSunMoon_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingSunMoon_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceInverseProjectionMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix)); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceToSunLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix); | |
end; | |
if Renderer.SDSMReduceTightingSunMoon_uViewSpaceToMoonLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSunMoon_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix); | |
end; | |
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D); | |
if Renderer.SDSMReduceTightingSunMoon_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingSunMoon_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
end else if Renderer.SunActive then begin | |
if Renderer.State.UseShader(Renderer.SDSMReduceTightingSun) then begin | |
if Renderer.SDSMReduceTightingSun_uTexDepthBuffer>=0 then begin | |
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingSun_uTexDepthBuffer,0); | |
end; | |
if Renderer.SDSMReduceTightingSun_uTexDepthBufferSize>=0 then begin | |
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingSun_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height); | |
end; | |
if Renderer.SDSMReduceTightingSun_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingSun_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReduceTightingSun_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingSun_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
if Renderer.SDSMReduceTightingSun_uViewSpaceInverseProjectionMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix)); | |
end; | |
if Renderer.SDSMReduceTightingSun_uViewSpaceToSunLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix); | |
end; | |
if Renderer.SDSMReduceTightingSun_uViewSpaceToMoonLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingSun_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix); | |
end; | |
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D); | |
if Renderer.SDSMReduceTightingSun_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingSun_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
end else if Renderer.MoonActive then begin | |
if Renderer.State.UseShader(Renderer.SDSMReduceTightingMoon) then begin | |
if Renderer.SDSMReduceTightingMoon_uTexDepthBuffer>=0 then begin | |
Renderer.State.SetUniform1i(Renderer.SDSMReduceTightingMoon_uTexDepthBuffer,0); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uTexDepthBufferSize>=0 then begin | |
Renderer.State.SetUniform2i(Renderer.SDSMReduceTightingMoon_uTexDepthBufferSize,Renderer.DepthBufferFrameBufferObject.Width,Renderer.DepthBufferFrameBufferObject.Height); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uClipPlaneDepthConstants>=0 then begin | |
Renderer.State.SetUniform3f(Renderer.SDSMReduceTightingMoon_uClipPlaneDepthConstants,2.0*(ZNear*ZFar),ZFar+ZNear,ZFar-ZNear); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uClipPlaneNearFar>=0 then begin | |
Renderer.State.SetUniform2f(Renderer.SDSMReduceTightingMoon_uClipPlaneNearFar,ZNear,ZFar); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uViewSpaceInverseProjectionMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceInverseProjectionMatrix,Matrix4x4TermInverse(ProjectionMatrix)); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uViewSpaceToSunLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceToSunLightSpaceMatrix,Renderer.SunFromViewSpaceToLightSpaceMatrix); | |
end; | |
if Renderer.SDSMReduceTightingMoon_uViewSpaceToMoonLightSpaceMatrix>=0 then begin | |
Renderer.State.SetUniformMatrix4f(Renderer.SDSMReduceTightingMoon_uViewSpaceToMoonLightSpaceMatrix,Renderer.MoonFromViewSpaceToLightSpaceMatrix); | |
end; | |
Renderer.State.BindTexture(GL_TEXTURE0,Renderer.DepthBufferFrameBufferObject.TextureHandles[Renderer.DepthBufferFrameBufferObject.Textures],GL_TEXTURE_2D); | |
if Renderer.SDSMReduceTightingMoon_RecudeDataSSBOLocation>=0 then begin | |
Renderer.State.ShaderStorageBlockBinding(Renderer.SDSMReduceTightingMoon_RecudeDataSSBOLocation,0); | |
end; | |
glDispatchCompute(Max((Renderer.DepthBufferFrameBufferObject.Width+63) shr 6,1),Max((Renderer.DepthBufferFrameBufferObject.Height+63) shr 6,1),1); | |
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | |
Renderer.State.UseShader(nil); | |
end; | |
end; | |
end;{} | |
begin | |
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,Renderer.SDSMReduceDataSSBO); | |
p:=glMapBuffer(GL_SHADER_STORAGE_BUFFER,GL_READ_ONLY); | |
if assigned(p) then begin | |
Move(p^,Renderer.SDSMReduceData,SizeOf(TEngineRendererSDSMReduceData)); | |
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); | |
end; | |
Renderer.State.BindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0); | |
end; | |
{$ifdef RendererProfiling} | |
glEndQuery(GL_TIME_ELAPSED); | |
{$endif} | |
end; | |
end; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment