BeRo1985/order_independent_transparency.glsl

## order_independent_transparency.glsl
// Hybrid atomic loop weighted blended order independent transparency implementation (work in progress)
// Copyright (C) 2014 by Benjamin 'BeRo' Rosseaux
// Licensed under the CC0 license, since in the German legislation exists no public
// domain.

// This implementation needs at least OpenGL 4.3 as minimum OpenGL version, due to my usage of shader storage buffer objects here

// Supports also additive blending for emission color portions

uniform sampler2D uTexTailWeightedBlendedOrderIndependentTransparencyColor;
uniform sampler2D uTexTailWeightedBlendedOrderIndependentTransparencyAlpha;

uniform int uOrderIndependentTransparencyWidth;
uniform int uOrderIndependentTransparencyLayers;

layout(std430) coherent buffer orderIndependentTransparencyDepthBufferData {
  uint orderIndependentTransparencyDepthBuffer[];
};

layout(std430) buffer orderIndependentTransparencyColorBufferData {
  uvec4 orderIndependentTransparencyColorBuffer[]; // Two packed RGBA16F values: xy = alpha blended color and zw = emission color
};

int oitGetCoordBaseBufferIndex(){
  return ((int(gl_FragCoord.y) * uOrderIndependentTransparencyWidth) + int(gl_FragCoord.x)) * uOrderIndependentTransparencyLayers;
}

// For one-way clearing at after depth buffer creation, when it shouldn't happen for whatever reason on the CPU itself
void oitClear(){
  int lBufferIndex = oitGetCoordBaseBufferIndex();
  for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
    orderIndependentTransparencyDepthBuffer[lBufferIndex++] = 0xffffffffu;
  }
}

// Optional: Pass "zero" with alpha test against alpha value 1.0 for better weighted blended order independent transparency portion results

// First pass for finding K closest depth values
void oitPassOne(){
  uint lFragDepth = floatBitsToUint(gl_FragCoord.z);
  int lBufferIndex = oitGetCoordBaseBufferIndex();
  for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
    uint lOldDepth = atomicMin(orderIndependentTransparencyDepthBuffer[lBufferIndex++], lFragDepth);
    if((lOldDepth == 0xffffffffu) || (lOldDepth == lFragDepth)){
      break;
    }
    lFragDepth = max(lOldDepth, lFragDepth);
  }
}

// Second pass with fragment color recording and tail weighted blended order independent transparency blending
void oitPassTwo(const in vec4 pAlphaBlendingColor, const in vec4 pEmissionColor, const in float pViewSpaceZ, out vec4 pTailWeightedBlendedOrderIndependentTransparencyColor, out float pTailWeightedBlendedOrderIndependentTransparencyAlpha){
  uint lFragDepth = floatBitsToUint(gl_FragCoord.z);
  int lBufferIndex = oitGetCoordBaseBufferIndex();

  // Binary search (the followed code could be more better optimized, when it is needed)
  int lLow = 0;
  int lHigh = uOrderIndependentTransparencyLayers - 1;
  while(lLow < lHigh){
    int lMiddle = lLow + ((lHigh - lLow) >> 1);
    uint lTestDepth = orderIndependentTransparencyDepthBuffer[lBufferIndex + lMiddle];
    if(lFragDepth > lTestDepth){
      lLow = lMiddle + 1;
    }else if(lFragDepth < lTestDepth){
      lHigh = lMiddle - 1;
    }else{
      lLow = -1;
      orderIndependentTransparencyColorBuffer[lBufferIndex + lMiddle] = uvec4(uvec2(packHalf2x16(pAlphaBlendingColor.rg), packHalf2x16(pAlphaBlendingColor.ba)), uvec2(packHalf2x16(pEmissionColor.rg), packHalf2x16(pEmissionColor.ba)));
      break;
    }
  }

  // Tail weighted blended order independent transparency blending
  if(lLow < 0){
    // Fragment was recorded, so we must do no tail weighted blended order independent transparency blending here then
    pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(0.0);
    pTailWeightedBlendedOrderIndependentTransparencyAlpha = 0.0;
  }else{
    // Fragment was not recorded, because it is behind the recorded last layer depth, so we must do tail weighted blended order independent transparency blending here then
    if(dot(pAlphaBlendingColor, pAlphaBlendingColor) > 0.0){
      // Alpha blending if alpha value is greater or equal zero (positive)
      float pTailWeightedBlendedOrderIndependentTransparencyWeight = max(min(1.0, max(max(pAlphaBlendingColor.r, pAlphaBlendingColor.g), pAlphaBlendingColor.b) * pAlphaBlendingColor.a), pAlphaBlendingColor.a) * clamp(0.03 / (1e-5 + pow(pViewSpaceZ / 4096.0, 4.0)), 1e-2, 3e3);
      pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(pAlphaBlendingColor.rgb * pAlphaBlendingColor.a, pAlphaBlendingColor.a) * pTailWeightedBlendedOrderIndependentTransparencyWeight;
      pTailWeightedBlendedOrderIndependentTransparencyAlpha = pAlphaBlendingColor.a;
    }
    if(dot(pEmissionColor, pEmissionColor) > 0.0){
      // Fake incorrect but in the most cases still acceptable additive-like blending with weighted blended order independent transparency, if alpha value is less than zero (negative)
      const float pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight = 1e-3; // This value must not be zero, because the additive blending like effect is disappearing then,
                                                                                                        // but also not too high, since it is used also as the alpha value for the blending itself.
                                                                                                        // The base resolve step will do the following then:
                                                                                                        // (firstTexel.rgb / firstTexel.a) * secondTexel.r
                                                                                                        // where firstTexel.a and secondTexel.r are this one rescale weight value
      pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(pEmissionColor.rgb, pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight);
      pTailWeightedBlendedOrderIndependentTransparencyAlpha = pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight;
    }
  }

}

// Resolve the recorded and collected stuff
// Needs active blending with GL_ONE, GL_ONE_MINUS_SRC_ALPHA => dstcolor = srccolor + (dstcolor * (1.0 - srcalpha))
// since the front-to-back layer has rendered over black, so it is pre-multiplied by its alpha value
vec4 oitResolve(){

  ivec2 lFragCoord = ivec2(gl_FragCoord.xy);

  // 1. Clear to translucent black (0.0, 0.0, 0.0, 0.0)
  // The black color meaning nothing to add color-wise, and the transparent alpha value of 0.0 meaning that the background won't be occluded
  vec4 lOutputColor = vec4(0.0);

  // 2. Resolve recorded fragments per front-to-back blending with the UNDER blending operator
  int lBufferIndex = oitGetCoordBaseBufferIndex();
  for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
    // Get array cell value and clear array cell at the same time
    uint lDepth = atomicExchange(orderIndependentTransparencyDepthBuffer[lBufferIndex], 0xffffffffu);
    if(lDepth == 0xffffffffu){
      break;
    }
    uvec4 lBufferPackedColor = orderIndependentTransparencyColorBuffer[lBufferIndex++];
    vec4 lBufferAlphaBlendingColor = vec4(unpackHalf2x16(lBufferPackedColor.x), unpackHalf2x16(lBufferPackedColor.y));
    vec4 lBufferEmissionColor = vec4(unpackHalf2x16(lBufferPackedColor.z), unpackHalf2x16(lBufferPackedColor.w));
    // Emulating GL_ONE_MINUS_DST_ALPHA, GL_ONE => dstcolor += srccolor * (1.0 - dstalpha)
    lOutputColor += (vec4(lBufferAlphaBlendingColor.rgb * lBufferAlphaBlendingColor.a, lBufferAlphaBlendingColor.a) + lBufferEmissionColor) * (1.0 - lOutputColor.a);
  }

  // 3. Blend the weighted blended order independent transparency portion top on the opaque color layer
  {
    vec4 lTailWeightedBlendedOrderIndependentTransparencyColor = texelFetch(uTexTailWeightedBlendedOrderIndependentTransparencyColor, lFragCoord, 0);
    float lTailWeightedBlendedOrderIndependentTransparencyAlpha = texelFetch(uTexTailWeightedBlendedOrderIndependentTransparencyAlpha, lFragCoord, 0).x;
    lTailWeightedBlendedOrderIndependentTransparencyColor = vec4((lTailWeightedBlendedOrderIndependentTransparencyColor.rgb / max(lTailWeightedBlendedOrderIndependentTransparencyColor.a, 1e-5)), lTailWeightedBlendedOrderIndependentTransparencyAlpha);

    // Compositing by emulating GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA => dstcolor = (srccolor * (1.0 - srcalpha)) + (dstcolor * srcalpha)
    // inverted for front-to-back compositing GL_ONE_MINUS_SRC_ALPHA * GL_ONE_MINUS_DST_ALPHA, GL_ONE => dstcolor += (srccolor * (1.0 - srcalpha)) * (1.0 - dstalpha)
    // TO-DO: Check if it is correct in this way
    lOutputColor += (lTailWeightedBlendedOrderIndependentTransparencyColor * (1.0 - lTailWeightedBlendedOrderIndependentTransparencyColor.a)) * (1.0 - lOutputColor.a);
  }

  // Return the final color
  return lOutputColor;
}
	// Hybrid atomic loop weighted blended order independent transparency implementation (work in progress)
	// Copyright (C) 2014 by Benjamin 'BeRo' Rosseaux
	// Licensed under the CC0 license, since in the German legislation exists no public
	// domain.

	// This implementation needs at least OpenGL 4.3 as minimum OpenGL version, due to my usage of shader storage buffer objects here

	// Supports also additive blending for emission color portions

	uniform sampler2D uTexTailWeightedBlendedOrderIndependentTransparencyColor;
	uniform sampler2D uTexTailWeightedBlendedOrderIndependentTransparencyAlpha;

	uniform int uOrderIndependentTransparencyWidth;
	uniform int uOrderIndependentTransparencyLayers;

	layout(std430) coherent buffer orderIndependentTransparencyDepthBufferData {
	uint orderIndependentTransparencyDepthBuffer[];
	};

	layout(std430) buffer orderIndependentTransparencyColorBufferData {
	uvec4 orderIndependentTransparencyColorBuffer[]; // Two packed RGBA16F values: xy = alpha blended color and zw = emission color
	};

	int oitGetCoordBaseBufferIndex(){
	return ((int(gl_FragCoord.y) * uOrderIndependentTransparencyWidth) + int(gl_FragCoord.x)) * uOrderIndependentTransparencyLayers;
	}

	// For one-way clearing at after depth buffer creation, when it shouldn't happen for whatever reason on the CPU itself
	void oitClear(){
	int lBufferIndex = oitGetCoordBaseBufferIndex();
	for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
	orderIndependentTransparencyDepthBuffer[lBufferIndex++] = 0xffffffffu;
	}
	}

	// Optional: Pass "zero" with alpha test against alpha value 1.0 for better weighted blended order independent transparency portion results

	// First pass for finding K closest depth values
	void oitPassOne(){
	uint lFragDepth = floatBitsToUint(gl_FragCoord.z);
	int lBufferIndex = oitGetCoordBaseBufferIndex();
	for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
	uint lOldDepth = atomicMin(orderIndependentTransparencyDepthBuffer[lBufferIndex++], lFragDepth);
	if((lOldDepth == 0xffffffffu) \|\| (lOldDepth == lFragDepth)){
	break;
	}
	lFragDepth = max(lOldDepth, lFragDepth);
	}
	}

	// Second pass with fragment color recording and tail weighted blended order independent transparency blending
	void oitPassTwo(const in vec4 pAlphaBlendingColor, const in vec4 pEmissionColor, const in float pViewSpaceZ, out vec4 pTailWeightedBlendedOrderIndependentTransparencyColor, out float pTailWeightedBlendedOrderIndependentTransparencyAlpha){
	uint lFragDepth = floatBitsToUint(gl_FragCoord.z);
	int lBufferIndex = oitGetCoordBaseBufferIndex();

	// Binary search (the followed code could be more better optimized, when it is needed)
	int lLow = 0;
	int lHigh = uOrderIndependentTransparencyLayers - 1;
	while(lLow < lHigh){
	int lMiddle = lLow + ((lHigh - lLow) >> 1);
	uint lTestDepth = orderIndependentTransparencyDepthBuffer[lBufferIndex + lMiddle];
	if(lFragDepth > lTestDepth){
	lLow = lMiddle + 1;
	}else if(lFragDepth < lTestDepth){
	lHigh = lMiddle - 1;
	}else{
	lLow = -1;
	orderIndependentTransparencyColorBuffer[lBufferIndex + lMiddle] = uvec4(uvec2(packHalf2x16(pAlphaBlendingColor.rg), packHalf2x16(pAlphaBlendingColor.ba)), uvec2(packHalf2x16(pEmissionColor.rg), packHalf2x16(pEmissionColor.ba)));
	break;
	}
	}

	// Tail weighted blended order independent transparency blending
	if(lLow < 0){
	// Fragment was recorded, so we must do no tail weighted blended order independent transparency blending here then
	pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(0.0);
	pTailWeightedBlendedOrderIndependentTransparencyAlpha = 0.0;
	}else{
	// Fragment was not recorded, because it is behind the recorded last layer depth, so we must do tail weighted blended order independent transparency blending here then
	if(dot(pAlphaBlendingColor, pAlphaBlendingColor) > 0.0){
	// Alpha blending if alpha value is greater or equal zero (positive)
	float pTailWeightedBlendedOrderIndependentTransparencyWeight = max(min(1.0, max(max(pAlphaBlendingColor.r, pAlphaBlendingColor.g), pAlphaBlendingColor.b) * pAlphaBlendingColor.a), pAlphaBlendingColor.a) * clamp(0.03 / (1e-5 + pow(pViewSpaceZ / 4096.0, 4.0)), 1e-2, 3e3);
	pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(pAlphaBlendingColor.rgb * pAlphaBlendingColor.a, pAlphaBlendingColor.a) * pTailWeightedBlendedOrderIndependentTransparencyWeight;
	pTailWeightedBlendedOrderIndependentTransparencyAlpha = pAlphaBlendingColor.a;
	}
	if(dot(pEmissionColor, pEmissionColor) > 0.0){
	// Fake incorrect but in the most cases still acceptable additive-like blending with weighted blended order independent transparency, if alpha value is less than zero (negative)
	const float pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight = 1e-3; // This value must not be zero, because the additive blending like effect is disappearing then,
	// but also not too high, since it is used also as the alpha value for the blending itself.
	// The base resolve step will do the following then:
	// (firstTexel.rgb / firstTexel.a) * secondTexel.r
	// where firstTexel.a and secondTexel.r are this one rescale weight value
	pTailWeightedBlendedOrderIndependentTransparencyColor = vec4(pEmissionColor.rgb, pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight);
	pTailWeightedBlendedOrderIndependentTransparencyAlpha = pTailWeightedBlendedOrderIndependentTransparencyAdditiveBlendingRescaleWeight;
	}
	}

	}

	// Resolve the recorded and collected stuff
	// Needs active blending with GL_ONE, GL_ONE_MINUS_SRC_ALPHA => dstcolor = srccolor + (dstcolor * (1.0 - srcalpha))
	// since the front-to-back layer has rendered over black, so it is pre-multiplied by its alpha value
	vec4 oitResolve(){

	ivec2 lFragCoord = ivec2(gl_FragCoord.xy);

	// 1. Clear to translucent black (0.0, 0.0, 0.0, 0.0)
	// The black color meaning nothing to add color-wise, and the transparent alpha value of 0.0 meaning that the background won't be occluded
	vec4 lOutputColor = vec4(0.0);

	// 2. Resolve recorded fragments per front-to-back blending with the UNDER blending operator
	int lBufferIndex = oitGetCoordBaseBufferIndex();
	for(int lLayerIndex = 0; lLayerIndex < uOrderIndependentTransparencyLayers; lLayerIndex++){
	// Get array cell value and clear array cell at the same time
	uint lDepth = atomicExchange(orderIndependentTransparencyDepthBuffer[lBufferIndex], 0xffffffffu);
	if(lDepth == 0xffffffffu){
	break;
	}
	uvec4 lBufferPackedColor = orderIndependentTransparencyColorBuffer[lBufferIndex++];
	vec4 lBufferAlphaBlendingColor = vec4(unpackHalf2x16(lBufferPackedColor.x), unpackHalf2x16(lBufferPackedColor.y));
	vec4 lBufferEmissionColor = vec4(unpackHalf2x16(lBufferPackedColor.z), unpackHalf2x16(lBufferPackedColor.w));
	// Emulating GL_ONE_MINUS_DST_ALPHA, GL_ONE => dstcolor += srccolor * (1.0 - dstalpha)
	lOutputColor += (vec4(lBufferAlphaBlendingColor.rgb * lBufferAlphaBlendingColor.a, lBufferAlphaBlendingColor.a) + lBufferEmissionColor) * (1.0 - lOutputColor.a);
	}

	// 3. Blend the weighted blended order independent transparency portion top on the opaque color layer
	{
	vec4 lTailWeightedBlendedOrderIndependentTransparencyColor = texelFetch(uTexTailWeightedBlendedOrderIndependentTransparencyColor, lFragCoord, 0);
	float lTailWeightedBlendedOrderIndependentTransparencyAlpha = texelFetch(uTexTailWeightedBlendedOrderIndependentTransparencyAlpha, lFragCoord, 0).x;
	lTailWeightedBlendedOrderIndependentTransparencyColor = vec4((lTailWeightedBlendedOrderIndependentTransparencyColor.rgb / max(lTailWeightedBlendedOrderIndependentTransparencyColor.a, 1e-5)), lTailWeightedBlendedOrderIndependentTransparencyAlpha);

	// Compositing by emulating GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA => dstcolor = (srccolor * (1.0 - srcalpha)) + (dstcolor * srcalpha)
	// inverted for front-to-back compositing GL_ONE_MINUS_SRC_ALPHA * GL_ONE_MINUS_DST_ALPHA, GL_ONE => dstcolor += (srccolor * (1.0 - srcalpha)) * (1.0 - dstalpha)
	// TO-DO: Check if it is correct in this way
	lOutputColor += (lTailWeightedBlendedOrderIndependentTransparencyColor * (1.0 - lTailWeightedBlendedOrderIndependentTransparencyColor.a)) * (1.0 - lOutputColor.a);
	}

	// Return the final color
	return lOutputColor;
	}