Load depth shader
Load light culling shader
Load light accumulation shader
Load HDR shader
Setup depth fbo
Setup hdr fbo
Load 3D model
Initialize Scene
Use depthShader and set 'model' uniform matrix
Use lightCullingShader and set 'lightCount' uniform
Set lightCullingShader 'screenSize' uniform
Use lightAccumulationShader and set 'model' uniform
Set lightAccumulationShader 'numberOfTilesX' uniform to workGroupsX
// Set viewport dimensions and background color
glViewport(0, 0, SCREEN_SIZE.x, SCREEN_SIZE.y);
glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
loop {
update camera
update lights
// calculate projection and view matrices from camera
// Step 1: Render the depth of the scene to a depth map
Use depthShader and set 'projection' matrix
Set depthShader 'view' matrix
// Bind the depth map's frame buffer and draw the scene
glBindFramebuffer(GL_FRAMEBUFFER, depthMapFBO);
glClear(GL_DEPTH_BUFFER_BIT);
// Draw Scene using depth shader
glBindFramebuffer(GL_FRAMEBUFFER, 0);
// Step 2: Perform light culling on point lights in the scene
Use lightCullingShader and set 'projection' matrix
Set lightCullingShader 'view' matrix
// Bind the depth map texture to texture location 4 (not used by any model texture)
glActiveTexture(GL_TEXTURE4);
Set lightCullingShader 'depthMap' texture to 4
glBindTexture(GL_TEXTURE_2D, depthMap)
// Bind shader storage buffer objects for the light and index buffers
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, lightBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, visibleLightIndicesBuffer);
// Dispatch the compute shader, using the workgroup values calculated earlier
glDispatchCompute(workGroupsX, workGroupsY, 1);
// Unbind the depth map
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_2D, 0);
// Step 3: Accumulate the remaining lights after culling and render (or execute one of the debug views of a flag is enabled
// Here is where we would run debug shaders
// We render the scene into the floating point HDR frame buffer
glBindFramebuffer(GL_FRAMEBUFFER, hdrFBO);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Use lightAccumulationShader and set 'projection' matrix
Set lightAccumulationShader 'view' matrix
Set lightAccumulationShader 'viewPosition' vector
// Draw Opaque objects
// Draw Mask objects
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glBlendFunc(GL_ONE_MINUS_DST_ALPHA, GL_DST_ALPHA);
// Draw alpha blended objects
glDisable(GL_BLEND)
// Tonemap the HDR colors to the default framebuffer
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Use the HDR shader
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, colorBuffer);
Set HDR shader 'exposure'
Draw fullscreen quad
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, lightBuffer);
PointLight *pointLights = (PointLight*)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_WRITE);
for (int i = 0; i < NUM_LIGHTS; i++) {
PointLight &light = pointLights[i];
float min = LIGHT_MIN_BOUNDS[1];
float max = LIGHT_MAX_BOUNDS[1];
light.position.y = fmod((light.position.y + (-4.5f * deltaTime) - min + max), max) + min;
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
// TODO: We can do this fully in the vertex shader!
if (quadVAO == 0) {
GLfloat quadVertices[] = {
-1.0f, 1.0f, 0.0f, 0.0f, 1.0f,
-1.0f, -1.0f, 0.0f, 0.0f, 0.0f,
1.0f, 1.0f, 0.0f, 1.0f, 1.0f,
1.0f, -1.0f, 0.0f, 1.0f, 0.0f,
};
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(quadVertices), &quadVertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
}
glBindVertexArray(quadVAO);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindVertexArray(0);
// Create a depth map FBO, used in the depth pass
GLuint depthMapFBO;
glGenFramebuffers(1, &depthMapFBO);
GLuint depthMap;
glGenTextures(1, &depthMap);
glBindTexture(GL_TEXTURE_2D, depthMap);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, SCREEN_SIZE.x, SCREEN_SIZE.y, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
GLfloat borderColor[] = { 1.0f, 1.0f, 1.0f, 1.0f };
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, borderColor);
glBindFramebuffer(GL_FRAMEBUFFER, depthMapFBO);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthMap, 0);
glDrawBuffer(GL_NONE);
glReadBuffer(GL_NONE);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
// Create a floating point HDR frame buffer and a floating point color buffer (as a texture)
GLuint hdrFBO;
glGenFramebuffers(1, &hdrFBO);
GLuint colorBuffer;
glGenTextures(1, &colorBuffer);
glBindTexture(GL_TEXTURE_2D, colorBuffer);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, SCREEN_SIZE.x, SCREEN_SIZE.y, 0, GL_RGB, GL_FLOAT, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
// It will also need a depth component as a render buffer, attached to the hdrFBO
GLuint rboDepth;
glGenRenderbuffers(1, &rboDepth);
glBindRenderbuffer(GL_RENDERBUFFER, rboDepth);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, SCREEN_SIZE.x, SCREEN_SIZE.y);
glBindFramebuffer(GL_FRAMEBUFFER, hdrFBO);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorBuffer, 0);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rboDepth);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
// Enable any OpenGL features we want to use
glEnable(GL_DEPTH_TEST);
glDepthMask(GL_TRUE);
glEnable(GL_CULL_FACE);
glEnable(GL_MULTISAMPLE);
// Define work group sizes in x and y direction based off screen size and tile size (in pixels)
workGroupsX = (SCREEN_SIZE.x + (SCREEN_SIZE.x % 16)) / 16;
workGroupsY = (SCREEN_SIZE.y + (SCREEN_SIZE.y % 16)) / 16;
size_t numberOfTiles = workGroupsX * workGroupsY;
// Generate our shader storage buffers
glGenBuffers(1, &lightBuffer);
glGenBuffers(1, &visibleLightIndicesBuffer);
// Bind light buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, lightBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_LIGHTS * sizeof(PointLight), 0, GL_DYNAMIC_DRAW);
// Bind visible light indices buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, visibleLightIndicesBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, numberOfTiles * sizeof(VisibleIndex) * 1024, 0, GL_STATIC_DRAW);
// Setup lights
glBindBuffer(GL_SHADER_STORAGE_BUFFER, lightBuffer);
PointLight *pointLights = (PointLight*)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_WRITE);
for (int i = 0; i < NUM_LIGHTS; i++) {
PointLight &light = pointLights[i];
light.position = glm::vec4(RandomPosition(dis, gen), 1.0f);
light.color = glm::vec4(1.0f + dis(gen), 1.0f + dis(gen), 1.0f + dis(gen), 1.0f);
light.paddingAndRadius = glm::vec4(glm::vec3(0.0f), LIGHT_RADIUS);
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
// Bind the storage buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
const DEPTH_VERTEX_SHADER_SOURCE: &'static str = &r#"
#version 430
layout (location = 0) in vec3 position;
// Uniforms
uniform mat4 projection;
uniform mat4 view;
uniform mat4 model;
void main() {
gl_Position = projection * view * model * vec4(position, 1.0);
}
"#;
const DEPTH_FRAGMENT_SHADER_SOURCE: &'static str = &r#"
#version 430
void main() {}
"#;
const LIGHT_ACCUMULATION_VERTEX_SHADER_SOURCE: &'static str = &r#"
#version 330 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 normal;
layout (location = 2) in vec2 texCoords;
layout (location = 3) in vec3 tangent;
layout (location = 4) in vec3 bitangent;
out VERTEX_OUT {
vec3 fragmentPosition;
vec2 textureCoordinates;
mat3 TBN;
vec3 tangentViewPosition;
vec3 tangentFragmentPosition;
} vertex_out;
// Uniforms
uniform mat4 projection;
uniform mat4 view;
uniform mat4 model;
uniform vec3 viewPosition;
void main() {
gl_Position = projection * view * model * vec4(position, 1.0);
vertex_out.fragmentPosition = vec3(model * vec4(position, 1.0));
vertex_out.textureCoordinates = texCoords;
mat3 normalMatrix = transpose(inverse(mat3(model)));
vec3 tan = normalize(normalMatrix * tangent);
vec3 bitan = normalize(normalMatrix * bitangent);
vec3 norm = normalize(normalMatrix * normal);
// For tangent space normal mapping
mat3 TBN = transpose(mat3(tan, bitan, norm));
vertex_out.tangentViewPosition = TBN * viewPosition;
vertex_out.tangentFragmentPosition = TBN * vertex_out.fragmentPosition;
vertex_out.TBN = TBN;
}
"#;
const LIGHT_ACCUMULATION_FRAGMENT_SHADER_SOURCE: &'static str = &r#"
#version 430
in VERTEX_OUT{
vec3 fragmentPosition;
vec2 textureCoordinates;
mat3 TBN;
vec3 tangentViewPosition;
vec3 tangentFragmentPosition;
} fragment_in;
struct PointLight {
vec4 color;
vec4 position;
vec4 paddingAndRadius;
};
struct VisibleIndex {
int index;
};
// Shader storage buffer objects
layout(std430, binding = 0) readonly buffer LightBuffer {
PointLight data[];
} lightBuffer;
layout(std430, binding = 1) readonly buffer VisibleLightIndicesBuffer {
VisibleIndex data[];
} visibleLightIndicesBuffer;
// Uniforms
uniform sampler2D texture_diffuse1;
uniform sampler2D texture_specular1;
uniform sampler2D texture_normal1;
uniform int numberOfTilesX;
out vec4 fragColor;
// Attenuate the point light intensity
float attenuate(vec3 lightDirection, float radius) {
float cutoff = 0.5;
float attenuation = dot(lightDirection, lightDirection) / (100.0 * radius);
attenuation = 1.0 / (attenuation * 15.0 + 1.0);
attenuation = (attenuation - cutoff) / (1.0 - cutoff);
return clamp(attenuation, 0.0, 1.0);
}
void main() {
// Determine which tile this pixel belongs to
ivec2 location = ivec2(gl_FragCoord.xy);
ivec2 tileID = location / ivec2(16, 16);
uint index = tileID.y * numberOfTilesX + tileID.x;
// Get color and normal components from texture maps
vec4 base_diffuse = texture(texture_diffuse1, fragment_in.textureCoordinates);
vec4 base_specular = texture(texture_specular1, fragment_in.textureCoordinates);
vec3 normal = texture(texture_normal1, fragment_in.textureCoordinates).rgb;
normal = normalize(normal * 2.0 - 1.0);
vec4 color = vec4(0.0, 0.0, 0.0, 1.0);
vec3 viewDirection = normalize(fragment_in.tangentViewPosition - fragment_in.tangentFragmentPosition);
// The offset is this tile's position in the global array of valid light indices.
// Loop through all these indices until we hit max number of lights or the end (indicated by an index of -1)
// Calculate the lighting contribution from each visible point light
uint offset = index * 1024;
for (uint i = 0; i < 1024 && visibleLightIndicesBuffer.data[offset + i].index != -1; i++) {
uint lightIndex = visibleLightIndicesBuffer.data[offset + i].index;
PointLight light = lightBuffer.data[lightIndex];
vec4 lightColor = light.color;
vec3 tangentLightPosition = fragment_in.TBN * light.position.xyz;
float lightRadius = light.paddingAndRadius.w;
// Calculate the light attenuation on the pre-normalized lightDirection
vec3 lightDirection = tangentLightPosition - fragment_in.tangentFragmentPosition;
float attenuation = attenuate(lightDirection, lightRadius);
// Normalize the light direction and calculate the halfway vector
lightDirection = normalize(lightDirection);
vec3 halfway = normalize(lightDirection + viewDirection);
// Calculate the diffuse and specular components of the irradiance, then irradiance, and accumulate onto color
float diffuse = max(dot(lightDirection, normal), 0.0);
// How do I change the material propery for the spec exponent? is it the alpha of the spec texture?
float specular = pow(max(dot(normal, halfway), 0.0), 32.0);
// Hacky fix to handle issue where specular light still effects scene once point light has passed into an object
if (diffuse == 0.0) {
specular = 0.0;
}
vec3 irradiance = lightColor.rgb * ((base_diffuse.rgb * diffuse) + (base_specular.rgb * vec3(specular))) * attenuation;
color.rgb += irradiance;
}
color.rgb += base_diffuse.rgb * 0.08;
// Use the mask to discard any fragments that are transparent
if (base_diffuse.a <= 0.2) {
discard;
}
fragColor = color;
}
"#;
const LIGHT_CULLING_COMPUTE_SHADER_SOURCE: &'static str = &r#"
#version 430
struct PointLight {
vec4 color;
vec4 position;
vec4 paddingAndRadius;
};
struct VisibleIndex {
int index;
};
// Shader storage buffer objects
layout(std430, binding = 0) readonly buffer LightBuffer {
PointLight data[];
} lightBuffer;
layout(std430, binding = 1) writeonly buffer VisibleLightIndicesBuffer {
VisibleIndex data[];
} visibleLightIndicesBuffer;
// Uniforms
uniform sampler2D depthMap;
uniform mat4 view;
uniform mat4 projection;
uniform ivec2 screenSize;
uniform int lightCount;
// Shared values between all the threads in the group
shared uint minDepthInt;
shared uint maxDepthInt;
shared uint visibleLightCount;
shared vec4 frustumPlanes[6];
// Shared local storage for visible indices, will be written out to the global buffer at the end
shared int visibleLightIndices[1024];
shared mat4 viewProjection;
// Took some light culling guidance from Dice's deferred renderer
// http://www.dice.se/news/directx-11-rendering-battlefield-3/
#define TILE_SIZE 16
layout(local_size_x = TILE_SIZE, local_size_y = TILE_SIZE, local_size_z = 1) in;
void main() {
ivec2 location = ivec2(gl_GlobalInvocationID.xy);
ivec2 itemID = ivec2(gl_LocalInvocationID.xy);
ivec2 tileID = ivec2(gl_WorkGroupID.xy);
ivec2 tileNumber = ivec2(gl_NumWorkGroups.xy);
uint index = tileID.y * tileNumber.x + tileID.x;
// Initialize shared global values for depth and light count
if (gl_LocalInvocationIndex == 0) {
minDepthInt = 0xFFFFFFFF;
maxDepthInt = 0;
visibleLightCount = 0;
viewProjection = projection * view;
}
barrier();
// Step 1: Calculate the minimum and maximum depth values (from the depth buffer) for this group's tile
float maxDepth, minDepth;
vec2 text = vec2(location) / screenSize;
float depth = texture(depthMap, text).r;
// Linearize the depth value from depth buffer (must do this because we created it using projection)
depth = (0.5 * projection[3][2]) / (depth + 0.5 * projection[2][2] - 0.5);
// Convert depth to uint so we can do atomic min and max comparisons between the threads
uint depthInt = floatBitsToUint(depth);
atomicMin(minDepthInt, depthInt);
atomicMax(maxDepthInt, depthInt);
barrier();
// Step 2: One thread should calculate the frustum planes to be used for this tile
if (gl_LocalInvocationIndex == 0) {
// Convert the min and max across the entire tile back to float
minDepth = uintBitsToFloat(minDepthInt);
maxDepth = uintBitsToFloat(maxDepthInt);
// Steps based on tile sale
vec2 negativeStep = (2.0 * vec2(tileID)) / vec2(tileNumber);
vec2 positiveStep = (2.0 * vec2(tileID + ivec2(1, 1))) / vec2(tileNumber);
// Set up starting values for planes using steps and min and max z values
frustumPlanes[0] = vec4(1.0, 0.0, 0.0, 1.0 - negativeStep.x); // Left
frustumPlanes[1] = vec4(-1.0, 0.0, 0.0, -1.0 + positiveStep.x); // Right
frustumPlanes[2] = vec4(0.0, 1.0, 0.0, 1.0 - negativeStep.y); // Bottom
frustumPlanes[3] = vec4(0.0, -1.0, 0.0, -1.0 + positiveStep.y); // Top
frustumPlanes[4] = vec4(0.0, 0.0, -1.0, -minDepth); // Near
frustumPlanes[5] = vec4(0.0, 0.0, 1.0, maxDepth); // Far
// Transform the first four planes
for (uint i = 0; i < 4; i++) {
frustumPlanes[i] *= viewProjection;
frustumPlanes[i] /= length(frustumPlanes[i].xyz);
}
// Transform the depth planes
frustumPlanes[4] *= view;
frustumPlanes[4] /= length(frustumPlanes[4].xyz);
frustumPlanes[5] *= view;
frustumPlanes[5] /= length(frustumPlanes[5].xyz);
}
barrier();
// Step 3: Cull lights.
// Parallelize the threads against the lights now.
// Can handle 256 simultaniously. Anymore lights than that and additional passes are performed
uint threadCount = TILE_SIZE * TILE_SIZE;
uint passCount = (lightCount + threadCount - 1) / threadCount;
for (uint i = 0; i < passCount; i++) {
// Get the lightIndex to test for this thread / pass. If the index is >= light count, then this thread can stop testing lights
uint lightIndex = i * threadCount + gl_LocalInvocationIndex;
if (lightIndex >= lightCount) {
break;
}
vec4 position = lightBuffer.data[lightIndex].position;
float radius = lightBuffer.data[lightIndex].paddingAndRadius.w;
// We check if the light exists in our frustum
float distance = 0.0;
for (uint j = 0; j < 6; j++) {
distance = dot(position, frustumPlanes[j]) + radius;
// If one of the tests fails, then there is no intersection
if (distance <= 0.0) {
break;
}
}
// If greater than zero, then it is a visible light
if (distance > 0.0) {
// Add index to the shared array of visible indices
uint offset = atomicAdd(visibleLightCount, 1);
visibleLightIndices[offset] = int(lightIndex);
}
}
barrier();
// One thread should fill the global light buffer
if (gl_LocalInvocationIndex == 0) {
uint offset = index * 1024; // Determine bosition in global buffer
for (uint i = 0; i < visibleLightCount; i++) {
visibleLightIndicesBuffer.data[offset + i].index = visibleLightIndices[i];
}
if (visibleLightCount != 1024) {
// Unless we have totally filled the entire array, mark it's end with -1
// Final shader step will use this to determine where to stop (without having to pass the light count)
visibleLightIndicesBuffer.data[offset + visibleLightCount].index = -1;
}
}
}
"#;
const HDR_VERTEX_SHADER_SOURCE: &'static str = &r#"
#version 430 core
layout(location = 0) in vec3 position;
layout(location = 1) in vec2 textureCoordinates;
out vec2 TextureCoordinates;
void main() {
gl_Position = vec4(position, 1.0);
TextureCoordinates = textureCoordinates;
}
"#;
const HDR_FRAGMENT_SHADER_SOURCE: &'static str = &r#"
#version 430 core
in vec2 TextureCoordinates;
// Uniforms
// Texture for the hdr buffer
uniform sampler2D hdrBuffer;
// Controls exposure level of image
uniform float exposure;
out vec4 fragColor;
// Uses Reinhard tonemapping https://www.cs.utah.edu/~reinhard/cdrom/tonemap.pdf
// with an added controllable exposure component
void main() {
vec3 color = texture(hdrBuffer, TextureCoordinates).rgb;
vec3 result = vec3(1.0) - exp(-color * exposure);
// Minor gamma correction. Need to expand on it
const float gamma = 2.2;
result = pow(result, vec3(1.0 / gamma));
fragColor = vec4(result, 1.0);
}
"#;