Skip to content

Instantly share code, notes, and snippets.

@tcantenot
Forked from h3r2tic/raymarch.hlsl
Created May 2, 2023 13:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tcantenot/c15a014aad4621a9b3da36e476438909 to your computer and use it in GitHub Desktop.
Save tcantenot/c15a014aad4621a9b3da36e476438909 to your computer and use it in GitHub Desktop.
Depth buffer raymarching for contact shadows, SSGI, SSR, etc.
// Copyright (c) 2023 Tomasz Stachowiak
//
// This contribution is dual licensed under EITHER OF
//
// Apache License, Version 2.0, (http://www.apache.org/licenses/LICENSE-2.0)
// MIT license (http://opensource.org/licenses/MIT)
//
// at your option.
#include "/inc/frame_constants.hlsl"
#include "/inc/uv.hlsl"
#include "/inc/samplers.hlsl"
struct RootFinderInput {
/// Distance to the surface of which a root we're trying to find
float distance;
/// Whether to consider this sample valid for intersection.
/// Mostly relevant for allowing the ray marcher to travel behind surfaces,
/// as it will mark surfaces it travels under as invalid.
bool valid;
};
template<typename Point>
struct HybridRootFinder {
uint linear_steps;
uint bisection_steps;
bool use_secant;
float jitter;
float min_t;
float max_t;
static HybridRootFinder new_with_linear_steps(uint v) {
HybridRootFinder res;
res.linear_steps = v;
res.bisection_steps = 0;
res.use_secant = false;
res.jitter = 1;
res.min_t = 0;
res.max_t = 1;
return res;
}
HybridRootFinder with_bisection_steps(uint v) {
HybridRootFinder res = this;
res.bisection_steps = v;
return res;
}
HybridRootFinder with_use_secant(bool v) {
HybridRootFinder res = this;
res.use_secant = v;
return res;
}
HybridRootFinder with_jitter(float v) {
HybridRootFinder res = this;
res.jitter = v;
return res;
}
HybridRootFinder with_min_t(uint v) {
HybridRootFinder res = this;
res.min_t = v;
return res;
}
HybridRootFinder with_max_t(uint v) {
HybridRootFinder res = this;
res.max_t = v;
return res;
}
// IntersectionFn: (Point) -> DistanceTy, where `DistanceTy` inherits from `RootFinderInput`.
template<typename IntersectionFn, typename DistanceTy>
bool find_root(Point start, Point end, IntersectionFn distance_fn, inout float hit_t, out DistanceTy hit_d) {
const Point dir = end - start;
float min_t = this.min_t;
float max_t = this.max_t;
DistanceTy min_d = (DistanceTy)0;
DistanceTy max_d = (DistanceTy)0;
const float step_size = (max_t - min_t) / linear_steps;
bool intersected = false;
//
// Ray march using linear steps
if (linear_steps > 0) {
const float candidate_t = min_t + step_size * jitter;
const Point candidate = start + dir * candidate_t;
const DistanceTy candidate_d = distance_fn(candidate);
intersected = candidate_d.distance < 0 && candidate_d.valid;
if (intersected) {
max_t = candidate_t;
max_d = candidate_d;
// The `[min_t .. max_t]` interval contains an intersection. End the linear search.
} else {
// No intersection yet. Carry on.
min_t = candidate_t;
min_d = candidate_d;
for (uint step = 1; step < linear_steps; ++step) {
const float candidate_t = min_t + step_size;
const Point candidate = start + dir * candidate_t;
const DistanceTy candidate_d = distance_fn(candidate);
intersected = candidate_d.distance < 0 && candidate_d.valid;
if (intersected) {
max_t = candidate_t;
max_d = candidate_d;
// The `[min_t .. max_t]` interval contains an intersection. End the linear search.
break;
} else {
// No intersection yet. Carry on.
min_t = candidate_t;
min_d = candidate_d;
}
}
}
}
//
// Refine the hit using bisection
if (intersected) {
for (uint step = 0; step < bisection_steps; ++step) {
const float mid_t = (min_t + max_t) * 0.5;
const Point candidate = start + dir * mid_t;
const DistanceTy candidate_d = distance_fn(candidate);
if (candidate_d.distance < 0 && candidate_d.valid) {
// Intersection at the mid point. Refine the first half.
max_t = mid_t;
max_d = candidate_d;
} else {
// No intersection yet at the mid point. Refine the second half.
min_t = mid_t;
min_d = candidate_d;
}
}
if (this.use_secant) {
// Finish with one application of the secant method
const float total_d = min_d.distance + -max_d.distance;
const float mid_t = lerp(min_t, max_t, min_d.distance / total_d);
const Point candidate = start + dir * mid_t;
const DistanceTy candidate_d = distance_fn(candidate);
// Only accept the result of the secant method if it improves upon the previous result.
//
// Technically this should be `abs(candidate_d.distance) < min(min_d.distance, -max_d.distance) * frac`,
// but this seems sufficient.
if (abs(candidate_d.distance) < min_d.distance * 0.9 && candidate_d.valid) {
hit_t = mid_t;
hit_d = candidate_d;
} else {
hit_t = max_t;
hit_d = max_d;
}
return true;
} else {
hit_t = max_t;
hit_d = max_d;
return true;
}
} else {
// Mark the conservative miss distance.
hit_t = min_t;
return false;
}
}
};
struct DistanceWithPenetration: RootFinderInput {
/// Conservative estimate of depth to which the ray penetrates the marched surface.
float penetration;
};
struct DepthRaymarchDistanceFn {
Texture2D<float> depth_tex;
float2 depth_tex_size;
bool march_behind_surfaces;
float depth_thickness;
DistanceWithPenetration operator()(float3 ray_point_cs) {
const float2 interp_uv = cs_to_uv(ray_point_cs.xy);
const float ray_depth = 1.0 / ray_point_cs.z;
// We're using both point-sampled and bilinear-filtered values from the depth buffer.
//
// That's really stupid but works like magic. For samples taken near the ray origin,
// the discrete nature of the depth buffer becomes a problem. It's not a land of continuous surfaces,
// but a bunch of stacked duplo bricks.
//
// Technically we should be taking discrete steps in this duplo land, but then we're at the mercy
// of arbitrary quantization of our directions -- and sometimes we'll take a step which would
// claim that the ray is occluded -- even though the underlying smooth surface wouldn't occlude it.
//
// If we instead take linear taps from the depth buffer, we reconstruct the linear surface.
// That fixes acne, but introduces false shadowing near object boundaries, as we now pretend
// that everything is shrink-wrapped by this continuous 2.5D surface, and our depth thickness
// heuristic ends up falling apart.
//
// The fix is to consider both the smooth and the discrete surfaces, and only claim occlusion
// when the ray descends below both.
//
// The two approaches end up fixing each other's artifacts:
// * The false occlusions due to duplo land are rejected because the ray stays above the smooth surface.
// * The shrink-wrap surface is no longer continuous, so it's possible for rays to miss it.
#if 1
const float linear_depth = 1.0 / depth_tex.SampleLevel(sampler_llc, interp_uv, 0);
const float unfiltered_depth = 1.0 / depth_tex.SampleLevel(sampler_nnc, interp_uv, 0);
#else
// Manual bilinear; in case using a linear sampler on the depth format is not supported.
const float4 depth_vals = depth_tex.GatherRed(sampler_nnc, interp_uv, 0);
const float2 frac_px = frac(interp_uv * depth_tex_size - 0.5.xx);
const float linear_depth = 1.0 / lerp(
lerp(depth_vals.w, depth_vals.z, frac_px.x),
lerp(depth_vals.x, depth_vals.y, frac_px.x),
frac_px.y
);
const float unfiltered_depth = 1.0 / depth_tex.SampleLevel(sampler_nnc, interp_uv, 0);
#endif
#if 1
const float max_depth = max(linear_depth, unfiltered_depth);
const float min_depth = min(linear_depth, unfiltered_depth);
#else
// Just for sanity checking. Should not be used.
const float max_depth = unfiltered_depth;
const float min_depth = unfiltered_depth;
#endif
const float bias = 0.000002;
DistanceWithPenetration res;
res.distance = max_depth * (1.0 + bias) - ray_depth;
// This will be used at the end of the ray march to potentially discard the hit.
res.penetration = ray_depth - min_depth;
if (this.march_behind_surfaces) {
res.valid = res.penetration < this.depth_thickness;
} else {
res.valid = true;
}
return res;
}
};
struct DepthRayMarchResult {
/// True if the raymarch hit something.
bool hit;
/// Approximate UV of the hit.
float2 hit_uv;
/// The distance that the hit point penetrates into the hit surface.
/// Will normally be non-zero due to limited precision of the ray march.
float hit_penetration;
/// Ditto, within the range `0..DepthRayMarch::depth_thickness_linear_z`
float hit_penetration_frac;
/// In case of a hit, the normalized distance to it.
///
/// In case of a miss, the furthest the ray managed to travel, which could either be
/// exceeding the max range, or getting behind a surface further than the depth thickness.
///
/// Range: `0..=1` as a lerp factor over `ray_start_cs..=ray_end_cs`.
float hit_t;
};
struct DepthRayMarch {
/// Number of steps to be taken at regular intervals to find an initial intersection.
/// Must not be zero.
uint linear_steps;
/// Number of steps in a bisection (binary search) to perform once the linear search
/// has found an intersection. Helps narrow down the hit, increasing the chance of
/// the secant method finding an accurate hit point.
///
/// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows.
uint bisection_steps;
/// Approximate the root position using the secant method -- by solving for line-line
/// intersection between the ray approach rate and the surface gradient.
///
/// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows.
bool use_secant;
/// Jitter to apply to the first step of the linear search; 0..=1 range, mapping
/// to the extent of a single linear step in the first phase of the search.
/// Use 1.0 if you don't want jitter.
float jitter;
/// Clip space coordinates (w=1) of the ray.
float3 ray_start_cs;
float3 ray_end_cs;
/// Should be used for contact shadows, but not for any color bounce, e.g. SSR.
///
/// For SSR etc. this can easily create leaks, but with contact shadows it allows the rays
/// to pass over invalid occlusions (due to thickness), and find potentially valid ones ahead.
///
/// Note that this will cause the linear search to potentially miss surfaces,
/// because when the ray overshoots and ends up penetrating a surface further than
/// `depth_thickness_linear_z`, the ray marcher will just carry on.
///
/// For this reason, this may require a lot of samples, or high depth thickness,
/// so that `depth_thickness_linear_z >= world space ray length / linear_steps`.
bool march_behind_surfaces;
/// When marching the depth buffer, we only have 2.5D information, and don't know how
/// thick surfaces are. We shall assume that the depth buffer fragments are litte squares
/// with a constant thickness defined by this parameter.
float depth_thickness_linear_z;
/// Size of the depth buffer we're marching in, in pixels.
float2 depth_tex_size;
/// The depth buffer. Must use reverse Z with an infinite far plane.
Texture2D<float> depth_tex;
static DepthRayMarch new_from_depth(
Texture2D<float> depth_tex,
float2 depth_tex_size
) {
DepthRayMarch res = (DepthRayMarch)0;
res.jitter = 1;
res.linear_steps = 4;
res.bisection_steps = 0;
res.depth_tex = depth_tex;
res.depth_tex_size = depth_tex_size;
res.depth_thickness_linear_z = 1;
res.march_behind_surfaces = false;
return res;
}
DepthRayMarch with_linear_steps(uint v) {
DepthRayMarch res = this;
res.linear_steps = v;
return res;
}
DepthRayMarch with_bisection_steps(uint v) {
DepthRayMarch res = this;
res.bisection_steps = v;
return res;
}
DepthRayMarch with_use_secant(bool v) {
DepthRayMarch res = this;
res.use_secant = v;
return res;
}
DepthRayMarch with_jitter(float v) {
DepthRayMarch res = this;
res.jitter = v;
return res;
}
DepthRayMarch with_depth_thickness(float v) {
DepthRayMarch res = this;
res.depth_thickness_linear_z = v;
return res;
}
DepthRayMarch with_march_behind_surfaces(bool v) {
DepthRayMarch res = this;
res.march_behind_surfaces = v;
return res;
}
/// March from a clip-space position (w = 1)
DepthRayMarch from_cs(float3 v) {
DepthRayMarch res = this;
res.ray_start_cs = v;
return res;
}
/// March towards a clip-space direction.
/// If `infinite` is `true`, then the ray is extended to cover the whole view frustum.
/// If `infinite` is `false`, then the ray length is that of the `dir_cs` parameter.
//
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum.
DepthRayMarch to_cs_dir_impl(float4 dir_cs, bool infinite) {
float4 end_cs = float4(this.ray_start_cs, 1) + dir_cs;
end_cs /= end_cs.w;
float3 delta_cs = end_cs.xyz - this.ray_start_cs;
// Clip the ray to the frustum
const float2 dist_to_edge = (sign(delta_cs.xy) - this.ray_start_cs.xy) / delta_cs.xy;
const float min_dist_to_edge = min(dist_to_edge.x, dist_to_edge.y);
if (infinite) {
delta_cs *= min_dist_to_edge;
} else {
// If unbounded, would make the ray reach the end of the frustum
delta_cs *= min(1.0, min_dist_to_edge);
}
DepthRayMarch res = this;
res.ray_end_cs = this.ray_start_cs + delta_cs;
return res;
}
/// March to a clip-space position (w = 1)
///
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum.
DepthRayMarch to_cs(float3 end_cs) {
float4 dir = float4(end_cs - this.ray_start_cs, 0) * sign(end_cs.z);
return this.to_cs_dir_impl(dir, false);
}
/// March towards a clip-space direction. Infinite (ray is extended to cover the whole view frustum).
///
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum.
DepthRayMarch to_cs_dir(float4 dir) {
return this.to_cs_dir_impl(dir, true);
}
/// March to a world-space position.
///
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum.
DepthRayMarch to_ws(float3 end) {
return this.to_cs(main_view().position_world_to_sample(end));
}
/// March towards a world-space direction. Infinite (ray is extended to cover the whole view frustum).
///
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum.
DepthRayMarch to_ws_dir(float3 dir) {
return this.to_cs_dir_impl(main_view().direction_world_to_clip(dir), true);
}
/// Perform the ray march.
DepthRayMarchResult march() {
DepthRayMarchResult res = (DepthRayMarchResult)0;
const float2 ray_start_uv = cs_to_uv(ray_start_cs.xy);
const float2 ray_end_uv = cs_to_uv(ray_end_cs.xy);
const float2 ray_uv_delta = ray_end_uv - ray_start_uv;
const float2 ray_len_px = ray_uv_delta * depth_tex_size;
const uint MIN_PX_PER_STEP = 1;
const int step_count = min(linear_steps, int(floor(length(ray_len_px) / MIN_PX_PER_STEP)));
const float linear_z_to_scaled_linear_z = main_view().rcp_near_plane_distance();
const float depth_thickness = this.depth_thickness_linear_z * linear_z_to_scaled_linear_z;
DepthRaymarchDistanceFn distance_fn;
distance_fn.depth_tex = depth_tex;
distance_fn.depth_tex_size = depth_tex_size;
distance_fn.march_behind_surfaces = this.march_behind_surfaces;
distance_fn.depth_thickness = depth_thickness;
DistanceWithPenetration hit;
bool intersected = HybridRootFinder<float3>
::new_with_linear_steps(step_count)
.with_bisection_steps(this.bisection_steps)
.with_use_secant(this.use_secant)
.with_jitter(this.jitter)
.find_root(ray_start_cs, ray_end_cs, distance_fn, res.hit_t, hit);
if (intersected && hit.penetration < depth_thickness && hit.distance < depth_thickness) {
res.hit = true;
res.hit_uv = lerp(ray_start_uv, ray_end_uv, res.hit_t);
res.hit_penetration = hit.penetration / linear_z_to_scaled_linear_z;
res.hit_penetration_frac = hit.penetration / depth_thickness;
return res;
}
return res;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment