Skip to content

Instantly share code, notes, and snippets.

@futureengine2
Last active May 14, 2024 13:26
Show Gist options
  • Save futureengine2/7c8fbc6fefce1818ff1edcd4d7e7bfcf to your computer and use it in GitHub Desktop.
Save futureengine2/7c8fbc6fefce1818ff1edcd4d7e7bfcf to your computer and use it in GitHub Desktop.
Radiance Cascades 2d GI implementation
static void gi_on_gpu(u8* in_bitmap, int w, int h) {
#define num_cascades 7
static bool initialized;
static gpu_bindgroup_t texture_bindgroup[2];
static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades];
static gpu_bindgroup_t render_uniform_bindgroup;
static gpu_buffer_t vertex_buffer;
static gpu_buffer_t uniform_buffer;
static gpu_pipeline_t pipeline;
static gpu_bindgroup_layout_t uniform_bindgroup_layout;
static gpu_bindgroup_layout_t texture_bindgroup_layout;
static lifetime_t texture_lifetime;
static gpu_texture_t textures[2];
static gpu_texture_t input_texture;
lifetime_t* lifetime = g_platform->lifetime;
f32 d0 = 1.f; // distance between probes in cascade 0
int r0 = 4; // number of rays in cascade 0
int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension
int cn = num_cascades;
typedef struct {
f32 d0;
int r0;
int n0;
int ci;
int cn;
int do_render;
int add_sky_light;
int padding;
v2 resolution;
v2 padding2;
} uniform_t;
if (!initialized) {
lifetime_t temp_lifetime = {0};
initialized = true;
// create bindgroup layouts
uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
.name = "gi uniform bgl",
.entries = {
{
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_buffer,
.buffer.type = gpu_buffer_binding_type_uniform,
},
},
});
texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
.name = "gi texture bgl",
.entries = {
{
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_sampler,
},
{
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_sampler,
},
},
});
// create pipeline
pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){
.name = "gi render shader",
.code = file_read("shaders/gi.glsl", &temp_lifetime).bytes,
.bgls = {
uniform_bindgroup_layout,
texture_bindgroup_layout,
},
});
// create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering
{
gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime);
uniform_buffer = p.handle;
// set cascade uniforms
for (int i = 0; i < num_cascades; ++i) {
*(uniform_t*)p.data = (uniform_t){
.d0 = d0,
.r0 = r0,
.n0 = n0,
.ci = i,
.cn = num_cascades,
.add_sky_light = 1,
.resolution = {(f32)w,(f32)h},
};
cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = uniform_bindgroup_layout,
.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
});
gpu_uniform_packer_next(&p);
}
// set render uniform
*(uniform_t*)p.data = (uniform_t){
.d0 = d0,
.r0 = r0,
.n0 = n0,
.ci = 0,
.cn = num_cascades,
.do_render = 1,
.resolution = {(f32)w,(f32)h},
};
render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = uniform_bindgroup_layout,
.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
});
gpu_uniform_packer_end(&p);
}
// create textures
input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime);
gpu_texture_set_border(input_texture, (color_t){1,1,1,1});
textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = texture_bindgroup_layout,
.entries = {
{.sampler = {input_texture}},
{.sampler = {textures[0]}},
},
});
texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = texture_bindgroup_layout,
.entries = {
{.sampler = {input_texture}},
{.sampler = {textures[1]}},
},
});
lifetime_destroy(&temp_lifetime);
}
// update input texture
gpu_texture_set_data(input_texture, in_bitmap);
// clear texture for pingponging
gpu_texture_clear(textures[(cn-1)%2], (color_t){0});
// build cascades
for (int i = cn-1; i >= 0; --i) {
drawcall_render(&(drawcall_t){
.pipeline = pipeline,
.last_vertex = 6,
.bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]},
.outputs = {textures[(i+1)%2]},
});
}
// render
drawcall_render(&(drawcall_t){
.pipeline = pipeline,
.last_vertex = 6,
.bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]},
});
#undef num_cascades
}
#ifdef VERTEX_SHADER
out vec2 fuv;
// a vertex shader that spits out a screen-size quad
// call with vertex count = 6
void main(void) {
vec2[] positions = vec2[](
vec2(-1,-1),
vec2(1,-1),
vec2(1,1),
vec2(-1,-1),
vec2(1,1),
vec2(-1,1)
);
vec2 vpos = positions[gl_VertexID%6];
gl_Position = vec4(vpos, 0, 1);
fuv = vpos*0.5+0.5;
fuv.y = 1 - fuv.y;
}
#endif /* VERTEX_SHADER */
#ifdef FRAGMENT_SHADER
layout (std140, binding = 0) uniform Uniform
{
float d0; // distance between probes in cascade 0
int r0; // number of rays in cascade 0
int n0; // number of probes in cascade 0 (per dimension)
int ci; // cascade number
int cn; // total number of cascades
int should_do_render; // we switch on this to render instead of building the cascades
int add_sky_light; // set to 1 to add sky lighting to uppermost cascade
int padding;
vec2 u_resolution; // resolution of the input texture
vec2 padding4;
};
layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through
layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture)
in vec2 fuv;
layout(location = 0) out vec4 ocolor;
const float PI = 3.1415927;
// raymarch2d: Implementation of Amanatides & Woo voxel marching algo
struct raymarch2d_t {
int x;
int y;
int sx;
int sy;
int ex;
int ey;
float tmx;
float tmy;
float tdx;
float tdy;
};
raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) {
raymarch2d_t res;
res.x = int(floor(x0));
res.y = int(floor(y0));
res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0;
res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0;
res.ex = int(floor(x1)) + 2*res.sx;
res.ey = int(floor(y1)) + 2*res.sy;
float dx = x1 - x0;
float dy = y1 - y0;
float l = 1.f/sqrt(dx*dx + dy*dy);
dx *= l;
dy *= l;
res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx;
res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy;
res.tdx = dx == 0 ? 0 : res.sx/dx;
res.tdy = dy == 0 ? 0 : res.sy/dy;
return res;
}
bool raymarch2d_next(inout raymarch2d_t r) {
if (r.tmx < r.tmy) {
r.tmx += r.tdx;
r.x += r.sx;
return r.x != r.ex;
}
else {
r.tmy += r.tdy;
r.y += r.sy;
return r.y != r.ey;
}
}
vec3 tonemap_aces(vec3 color) {
const float slope = 12.0;
vec4 x = vec4(
color.r, color.g, color.b,
(color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114)
);
const float a = 2.51f;
const float b = 0.03f;
const float c = 2.43f;
const float d = 0.59f;
const float e = 0.14f;
vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0);
float t = x.a;
t = t * t / (slope + t);
return mix(tonemap.rgb, tonemap.aaa, t);
}
vec3 sky_(vec2 angle) {
float a1 = angle[1];
float a0 = angle[0];
// Sky integral formula taken from
// Analytic Direct Illumination - Mathis
// https://www.shadertoy.com/view/NttSW7
const vec3 SkyColor = vec3(0.2,0.5,1.);
const vec3 SunColor = vec3(1.,0.7,0.1)*10.;
const float SunA = 2.0;
const float SunS = 64.0;
const float SSunS = sqrt(SunS);
const float ISSunS = 1./SSunS;
vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0)));
SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS;
return SI / 6.0;
}
vec3 sky(vec2 angle) {
// Integrate the radiance from the sky over an interval of directions
if (angle[1] < 2.0 * PI)
return sky_(angle);
return
sky_(vec2(angle[0], 2.0 * PI)) +
sky_(vec2(0.0, angle[1] - 2.0 * PI));
}
void main(void) {
if (should_do_render == 1) {
// sample probe in cascade 0
float x = fuv.x * u_resolution.x;
float y = fuv.y * u_resolution.y;
float xi = round(x/d0);
float yi = round(y/d0);
vec3 c = vec3(0,0,0);
for (int r = 0; r < r0; ++r) {
vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5;
c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb;
}
ocolor = vec4(tonemap_aces(c/r0),1);
}
else {
// build cascade
int u = int(gl_FragCoord.x);
int v = int(gl_FragCoord.y);
int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci)
int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase
int n = n0 >> ci; // number of probes in one dimension
float d = d0*(1 << ci); // distance between probes
int rn = r0 << (rm*ci); // number of pixels/rays per probe
int yi = v; // probe index
int xi = u/rn; // probe index
int r = u - xi*rn; // ray index
float dx = d0*0.5f*(1 << ci);
float x = xi * d + dx; // probe pos
float y = yi * d + dx; // probe pos
float l = 0.5 * d0; // length of ray
float intensity = 1.0;
if (xi >= n || xi < 0 || yi >= n || yi < 0) {
ocolor = vec4(0,0,0,0);
return;
}
float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval
float rb = l*(1 << (ci*lm)); // end of ray length interval
float alpha = 2*PI*(float(r)+0.5)/rn;
vec2 rot = vec2(cos(alpha), sin(alpha));
vec2 a = vec2(x,y) + rot*ra; // start of ray
vec2 b = vec2(x,y) + rot*rb; // end of ray
raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y);
vec4 col = vec4(0,0,0,0);
while (raymarch2d_next(raym)) {
vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb;
if (v != vec3(1,1,1)) {
col = vec4(v*intensity,1);
break;
}
}
// if no hit, get from upper cascade
// TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine
if (col.a == 0) {
if (ci == cn-1) {
if (add_sky_light != 0)
col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1);
else
col = vec4(0,0,0,0);
}
else {
int xi2 = (xi+1)/2; // probe index in upper
int yi2 = (yi+1)/2; // probe index in upper
int r2 = r << rm; // ray index in upper
int rn2 = rn << rm; // num rays in upper
int n2 = n >> 1; // num probes in upper
float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
// loop through all the nearby rays in the upper cascade
// TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1),
// we should choose a better weighting than just treating them all equally
vec4 upper = vec4(0,0,0,0);
float frac = 1.0 / (1 << rm);
for (int ri = 0; ri < (1 << rm); ++ri) {
vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec4 c = mix(
mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx),
mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx),
ty
);
upper += c*frac;
}
col = upper;
}
}
ocolor = vec4(col.rgb, 1);
}
}
#endif /* FRAGMENT_SHADER */
@futureengine2
Copy link
Author

futureengine2 commented Mar 24, 2024

I'm not super familiar with hddi, but if I understand correctly it's a datastructure that allows for efficient raytracing. So yeah, you could use this method to decide which rays to sample and how to combine the results, and then use hddi to trace those rays through the scene.

@octanejohn
Copy link

this idea has to be screen space? or it can go to world space so that there isnt visual artifacts when merging

@futureengine2
Copy link
Author

futureengine2 commented Mar 24, 2024

Yes you can definitely do this in 3d world space. I havent done it myself but Alexander has a world-space demo on his YouTube.

I can't think of any reason artefacts should be bad (in fact i believe some artefacts like ringing in 2d dont appear in 3d). I don't know of anyone implementing a production-ready 3d version though.

There's still more research going on to reduce artefacts on the Graphics Programming Discord, recommend you check it out

@octanejohn
Copy link

i saw it but to me it looks it cant produce data on its own outside camera view(turn camera away from light loses the data on the wall), thats why i am trying to see if it can merge with hddagi to help it with world space like amd brixelizer caching idea does

@futureengine2
Copy link
Author

Looks like he's got two 3D videos, one in screen-space and in world-space.
Here's an example of the world space, you can see that he gets light from the models to the left and right outside of the view frustum:

https://youtu.be/5Ua-h1pg6yM?si=c6wdsT-LzlQTPC_l&t=37

There's some other artifacts going on that are probably coming from things like number of cascades being low, or low ray multiplication factor, probably could've used more parameter tweaking.
This is a neat website tmpvar made that lets you play with some of the parameters (screenspace only though) https://tmpvar.com/poc/radiance-cascades/#flatland-2d

@futureengine2
Copy link
Author

futureengine2 commented Mar 25, 2024

Btw this method is literally just a cleverer way of laying out and combining the results of your probes. How you calculate the value of your rays is entirely up to you.

@octanejohn
Copy link

awesome thanks for chatting

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment