Skip to content

Instantly share code, notes, and snippets.

@h3r2tic
Created June 15, 2021 16:50
Show Gist options
  • Save h3r2tic/9073d11131dd7101b33ca1671cbb2836 to your computer and use it in GitHub Desktop.
Save h3r2tic/9073d11131dd7101b33ca1671cbb2836 to your computer and use it in GitHub Desktop.
#include "../inc/math_const.hlsl"
#include "../inc/math.hlsl"
#include "../inc/frame_constants.hlsl"
#include "../inc/uv.hlsl"
#include "../inc/hash.hlsl"
#include "../inc/pack_unpack.hlsl"
#include "../inc/gbuffer.hlsl"
[[vk::binding(0)]] Texture2D<float4> gbuffer_tex;
[[vk::binding(1)]] Texture2D<float> half_depth_tex;
[[vk::binding(2)]] Texture2D<float4> view_normal_tex;
[[vk::binding(3)]] Texture2D<float4> prev_radiance_tex;
[[vk::binding(4)]] Texture2D<float4> reprojection_tex;
[[vk::binding(5)]] RWTexture2D<float4> output_tex;
//[[vk::binding(6)]] RWTexture2D<float4> bent_normal_out_tex;
#define USE_AO_ONLY 1
[[vk::binding(6)]] cbuffer _ {
float4 input_tex_size;
float4 output_tex_size;
};
#if 1
// Micro-occlusion settings used for denoising
static const uint SSGI_HALF_SAMPLE_COUNT = 6;
#define SSGI_KERNEL_RADIUS (50.0 * output_tex_size.w)
#define MAX_KERNEL_RADIUS_CS 0.4
#define USE_KERNEL_DISTANCE_SCALING 0
#define USE_RANDOM_JITTER 0
#else
// Crazy settings for testing with the Cornell Box
static const uint SSGI_HALF_SAMPLE_COUNT = 32;
#define SSGI_KERNEL_RADIUS 5
#define MAX_KERNEL_RADIUS_CS 100.0
#define USE_KERNEL_DISTANCE_SCALING 1
#define USE_RANDOM_JITTER 1
#endif
static const float temporal_rotations[] = { 60.0, 300.0, 180.0, 240.0, 120.0, 0.0 };
static const float temporal_offsets[] = { 0.0, 0.5, 0.25, 0.75 };
float fast_sqrt(float x) {
return asfloat(0x1fbd1df5 + (asuint(x) >> 1u));
}
// max absolute error 9.0x10^-3
// Eberly's polynomial degree 1 - respect bounds
// 4 VGPR, 12 FR (8 FR, 1 QR), 1 scalar
// input [-1, 1] and output [0, M_PI]
float fast_acos(float inX) {
float x = abs(inX);
float res = -0.156583f * x + (M_FRAC_PI_2);
res *= fast_sqrt(1.0f - x);
return (inX >= 0) ? res : M_PI - res;
}
struct Ray {
float3 o;
float3 d;
};
float3 fetch_lighting(float2 uv) {
//return 0.0.xxx;
int2 px = int2(input_tex_size.xy * uv);
//return prev_radiance_tex[px].xyz;
float4 reproj = reprojection_tex[px];
return lerp(0.0, prev_radiance_tex[int2(input_tex_size.xy * (uv + reproj.xy))].xyz, reproj.z);
}
float3 fetch_normal_vs(float2 uv) {
int2 px = int2(output_tex_size.xy * uv);
float3 normal_vs = view_normal_tex[px].xyz;
return normal_vs;
}
float integrate_half_arc(float h1, float n) {
float a = -cos(2.0 * h1 - n) + cos(n) + 2.0 * h1 * sin(n);
return 0.25 * a;
}
float integrate_arc(float h1, float h2, float n) {
float a = -cos(2.0 * h1 - n) + cos(n) + 2.0 * h1 * sin(n);
float b = -cos(2.0 * h2 - n) + cos(n) + 2.0 * h2 * sin(n);
return 0.25 * (a + b);
}
float update_horizion_angle(float prev, float cur, float blend) {
return cur > prev ? lerp(prev, cur, blend) : prev;
}
float intersect_dir_plane_onesided(float3 dir, float3 normal, float3 pt) {
float d = -dot(pt, normal);
float t = d / max(1e-5, -dot(dir, normal));
return t;
}
float3 project_point_on_plane(float3 pt, float3 normal) {
return pt - normal * dot(pt, normal);
}
float process_sample(uint i, float intsgn, float n_angle, inout float3 prev_sample_vs, float4 sample_cs, float3 center_vs, float3 normal_vs, float3 v_vs, float kernel_radius, float theta_cos_max, inout float4 color_accum) {
if (sample_cs.z > 0) {
float4 sample_vs4 = mul(frame_constants.view_constants.sample_to_view, sample_cs);
float3 sample_vs = sample_vs4.xyz / sample_vs4.w;
float3 sample_vs_offset = sample_vs - center_vs;
float sample_vs_offset_len = length(sample_vs_offset);
float sample_theta_cos = dot(sample_vs_offset, v_vs) / sample_vs_offset_len;
const float sample_distance_normalized = sample_vs_offset_len / kernel_radius;
if (sample_distance_normalized < 1) {
//const float sample_influence = 1;
const float sample_influence = 1.0 - sample_distance_normalized * sample_distance_normalized;
bool sample_visible = sample_theta_cos >= theta_cos_max;
float theta_cos_prev = theta_cos_max;
float theta_delta = theta_cos_max;
theta_cos_max = update_horizion_angle(theta_cos_max, sample_theta_cos, sample_influence);
theta_delta = theta_cos_max - theta_delta;
if (sample_visible) {
float3 lighting = fetch_lighting(cs_to_uv(sample_cs.xy));
float3 sample_normal_vs = fetch_normal_vs(cs_to_uv(sample_cs.xy));
float theta_cos_prev_trunc = theta_cos_prev;
#if 1
if (i > 0) {
// Account for the sampled surface's normal, and how it's facing the center pixel
float3 p1 = prev_sample_vs * min(
intersect_dir_plane_onesided(prev_sample_vs, sample_normal_vs, sample_vs),
intersect_dir_plane_onesided(prev_sample_vs, normal_vs, center_vs)
);
theta_cos_prev_trunc = clamp(dot(normalize(p1 - center_vs), v_vs), theta_cos_prev_trunc, theta_cos_max);
}
#endif
{
// Scale the lighting contribution by the cosine factor
n_angle *= -intsgn;
float h1 = fast_acos(theta_cos_prev_trunc);
float h2 = fast_acos(theta_cos_max);
float h1p = n_angle + max(h1 - n_angle, -M_FRAC_PI_2);
float h2p = n_angle + min(h2 - n_angle, M_FRAC_PI_2);
float inv_ao =
integrate_half_arc(h1p, n_angle) -
integrate_half_arc(h2p, n_angle);
lighting *= inv_ao;
lighting *= step(0.0, dot(-normalize(sample_vs_offset), sample_normal_vs));
}
color_accum += float4(lighting, 1.0);
}
}
prev_sample_vs = sample_vs;
} else {
// Sky; assume no occlusion
theta_cos_max = update_horizion_angle(theta_cos_max, -1, 1);
}
return theta_cos_max;
}
[numthreads(8, 8, 1)]
void main(in uint2 px : SV_DispatchThreadID) {
float2 uv = get_uv(px, output_tex_size);
const float depth = half_depth_tex[px];
if (0.0 == depth) {
output_tex[px] = float4(0, 0, 0, 1);
return;
}
float4 gbuffer_packed = gbuffer_tex[px * 2];
GbufferData gbuffer = GbufferDataPacked::from_uint4(asuint(gbuffer_packed)).unpack();
const float3 normal_vs = normalize(mul(frame_constants.view_constants.world_to_view, float4(gbuffer.normal, 0)).xyz);
float4 col = 0.0.xxxx;
float kernel_radius = SSGI_KERNEL_RADIUS;
const ViewRayContext view_ray_context = ViewRayContext::from_uv_and_depth(uv, depth);
float3 v_vs = -normalize(view_ray_context.ray_dir_vs());
float4 ray_hit_cs = view_ray_context.ray_hit_cs;
float3 ray_hit_vs = view_ray_context.ray_hit_vs();
float spatial_direction_noise = 1.0 / 16.0 * ((((px.x + px.y) & 3) << 2) + (px.x & 3));
float temporal_direction_noise = temporal_rotations[frame_constants.frame_index % 6] / 360.0;
float spatial_offset_noise = (1.0 / 4.0) * ((px.y - px.x) & 3);
float temporal_offset_noise = temporal_offsets[frame_constants.frame_index / 6 % 4];
#if USE_RANDOM_JITTER
uint seed0 = hash3(uint3(frame_constants.frame_index, px.x, px.y));
spatial_direction_noise += uint_to_u01_float(seed0) * 0.1;
#endif
float ss_angle = frac(spatial_direction_noise + temporal_direction_noise) * M_PI;
float rand_offset = frac(spatial_offset_noise + temporal_offset_noise);
float2 cs_slice_dir = float2(cos(ss_angle) * input_tex_size.y / input_tex_size.x, sin(ss_angle));
float kernel_radius_shrinkage;
{
// Convert AO radius into world scale
#if USE_KERNEL_DISTANCE_SCALING
const float cs_kernel_radius_scaled = kernel_radius * frame_constants.view_constants.view_to_clip[1][1] / -ray_hit_vs.z;
#else
const float cs_kernel_radius_scaled = kernel_radius;
#endif
cs_slice_dir *= cs_kernel_radius_scaled;
// Calculate AO radius shrinkage (if camera is too close to a surface)
float max_kernel_radius_cs = MAX_KERNEL_RADIUS_CS;
//float max_kernel_radius_cs = 100;
kernel_radius_shrinkage = min(1.0, max_kernel_radius_cs / cs_kernel_radius_scaled);
}
// Shrink the AO radius
cs_slice_dir *= kernel_radius_shrinkage;
kernel_radius *= kernel_radius_shrinkage;
float3 center_vs = ray_hit_vs.xyz;
cs_slice_dir *= 1.0 / float(SSGI_HALF_SAMPLE_COUNT);
float2 vs_slice_dir = mul(float4(cs_slice_dir, 0, 0), frame_constants.view_constants.sample_to_view).xy;
float3 slice_normal_vs = normalize(cross(v_vs, float3(vs_slice_dir, 0)));
float3 proj_normal_vs = normal_vs - slice_normal_vs * dot(slice_normal_vs, normal_vs);
float slice_contrib_weight = length(proj_normal_vs);
proj_normal_vs /= slice_contrib_weight;
float n_angle = fast_acos(clamp(dot(proj_normal_vs, v_vs), -1.0, 1.0)) * sign(dot(vs_slice_dir, proj_normal_vs.xy - v_vs.xy));
float theta_cos_max1 = cos(n_angle - M_FRAC_PI_2);
float theta_cos_max2 = cos(n_angle + M_FRAC_PI_2);
float4 color_accum = 0.0.xxxx;
float3 prev_sample0_vs = v_vs;
float3 prev_sample1_vs = v_vs;
int2 prev_sample_coord0 = px;
int2 prev_sample_coord1 = px;
for (uint i = 0; i < SSGI_HALF_SAMPLE_COUNT; ++i) {
{
float t = float(i) + rand_offset;
float4 sample_cs = float4(ray_hit_cs.xy - cs_slice_dir * t, 0, 1);
int2 sample_px = int2(output_tex_size.xy * cs_to_uv(sample_cs.xy));
// TODO: check if this is beneficial, or needs to be flattened
if (any(sample_px != prev_sample_coord0)) {
prev_sample_coord0 = sample_px;
sample_cs.z = half_depth_tex[sample_px];
theta_cos_max1 = process_sample(i, 1, n_angle, prev_sample0_vs, sample_cs, center_vs, normal_vs, v_vs, kernel_radius, theta_cos_max1, color_accum);
}
}
{
float t = float(i) + (1.0 - rand_offset);
float4 sample_cs = float4(ray_hit_cs.xy + cs_slice_dir * t, 0, 1);
int2 sample_px = int2(output_tex_size.xy * cs_to_uv(sample_cs.xy));
// TODO: check if this is beneficial, or needs to be flattened
if (any(sample_px != prev_sample_coord1)) {
prev_sample_coord1 = sample_px;
sample_cs.z = half_depth_tex[sample_px];
theta_cos_max2 = process_sample(i, -1, n_angle, prev_sample1_vs, sample_cs, center_vs, normal_vs, v_vs, kernel_radius, theta_cos_max2, color_accum);
}
}
}
float h1 = -fast_acos(theta_cos_max1);
float h2 = +fast_acos(theta_cos_max2);
float h1p = n_angle + max(h1 - n_angle, -M_FRAC_PI_2);
float h2p = n_angle + min(h2 - n_angle, M_FRAC_PI_2);
float inv_ao = integrate_arc(h1p, h2p, n_angle);
col.a = max(0.0, inv_ao);
#if USE_AO_ONLY
col.rgb = col.a;
#else
col.rgb = color_accum.rgb;
#endif
col *= slice_contrib_weight;
/*float bent_normal_angle = h1p + h2p - n_angle * 2;
float3 bent_normal_dir = sin(bent_normal_angle) * cross(slice_normal_vs, normal_vs) + cos(bent_normal_angle) * normal_vs;
bent_normal_dir = bent_normal_dir;*/
output_tex[px] = max(0.0, col);
//bent_normal_out_tex[px] = float4(bent_normal_dir, 0);// / slice_contrib_weight;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment