menduz/gi.c

## gi.c

static void gi_on_gpu(u8* in_bitmap, int w, int h) {
    #define num_cascades 7
    static bool initialized;
    static gpu_bindgroup_t texture_bindgroup[2];
    static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades];
    static gpu_bindgroup_t render_uniform_bindgroup;
    static gpu_buffer_t vertex_buffer;
    static gpu_buffer_t uniform_buffer;
    static gpu_pipeline_t pipeline;
    static gpu_bindgroup_layout_t uniform_bindgroup_layout;
    static gpu_bindgroup_layout_t texture_bindgroup_layout;
    static lifetime_t texture_lifetime;
    static gpu_texture_t textures[2];
    static gpu_texture_t input_texture;
    lifetime_t* lifetime = g_platform->lifetime;

    f32 d0 = 1.f; // distance between probes in cascade 0
    int r0 = 4; // number of rays in cascade 0
    int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension
    int cn = num_cascades;

    typedef struct {
        f32 d0;
        int r0;
        int n0;
        int ci;

        int cn;
        int do_render;
        int add_sky_light;
        int padding;

        v2 resolution;
        v2 padding2;
    } uniform_t;

    if (!initialized) {
        lifetime_t temp_lifetime = {0};
        initialized = true;

        // create bindgroup layouts
        uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
            .name = "gi uniform bgl",
            .entries = {
                {
                    .visibility = gpu_visibility_fragment,
                    .type = gpu_binding_type_buffer,
                    .buffer.type = gpu_buffer_binding_type_uniform,
                },
            },
        });

        texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
            .name = "gi texture bgl",
            .entries = {
                {
                    .visibility = gpu_visibility_fragment,
                    .type = gpu_binding_type_sampler,
                },
                {
                    .visibility = gpu_visibility_fragment,
                    .type = gpu_binding_type_sampler,
                },
            },
        });

        // create pipeline
        pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){
            .name = "gi render shader",
            .code = file_read("shaders/gi.glsl", &temp_lifetime).bytes,
            .bgls = {
                uniform_bindgroup_layout,
                texture_bindgroup_layout,
            },
        });

        // create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering
        {
            gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime);
            uniform_buffer = p.handle;
            // set cascade uniforms
            for (int i = 0; i < num_cascades; ++i) {
                *(uniform_t*)p.data = (uniform_t){
                    .d0 = d0,
                    .r0 = r0,
                    .n0 = n0,
                    .ci = i,
                    .cn = num_cascades,
                    .add_sky_light = 1,
                    .resolution = {(f32)w,(f32)h},
                };
                cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
                    .name = "gi",
                    .layout = uniform_bindgroup_layout,
                    .entries = {gpu_uniform_packer_bindgroup_entry(&p)},
                });
                gpu_uniform_packer_next(&p);
            }

            // set render uniform
            *(uniform_t*)p.data = (uniform_t){
                .d0 = d0,
                .r0 = r0,
                .n0 = n0,
                .ci = 0,
                .cn = num_cascades,
                .do_render = 1,
                .resolution = {(f32)w,(f32)h},
            };
            render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
                .name = "gi",
                .layout = uniform_bindgroup_layout,
                .entries = {gpu_uniform_packer_bindgroup_entry(&p)},
            });

            gpu_uniform_packer_end(&p);
        }

        // create textures
        input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime);
        gpu_texture_set_border(input_texture, (color_t){1,1,1,1});
        textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
        textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);

        texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
            .name = "gi",
            .layout = texture_bindgroup_layout,
            .entries = {
                {.sampler = {input_texture}},
                {.sampler = {textures[0]}},
            },
        });

        texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
            .name = "gi",
            .layout = texture_bindgroup_layout,
            .entries = {
                {.sampler = {input_texture}},
                {.sampler = {textures[1]}},
            },
        });

        lifetime_destroy(&temp_lifetime);
    }

    // update input texture
    gpu_texture_set_data(input_texture, in_bitmap);

    // clear texture for pingponging
    gpu_texture_clear(textures[(cn-1)%2], (color_t){0});

    // build cascades
    for (int i = cn-1; i >= 0; --i) {
        drawcall_render(&(drawcall_t){
            .pipeline = pipeline,
            .last_vertex = 6,
            .bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]},
            .outputs = {textures[(i+1)%2]},
        });
    }

    // render
    drawcall_render(&(drawcall_t){
        .pipeline = pipeline,
        .last_vertex = 6,
        .bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]},
    });

    #undef num_cascades
}

## gi.glsl
#ifdef VERTEX_SHADER

    out vec2 fuv;

    // a vertex shader that spits out a screen-size quad
    // call with vertex count = 6
    void main(void) {
        vec2[] positions = vec2[](
            vec2(-1,-1),
            vec2(1,-1),
            vec2(1,1),
            vec2(-1,-1),
            vec2(1,1),
            vec2(-1,1)
        );
        vec2 vpos = positions[gl_VertexID%6];
        gl_Position = vec4(vpos, 0, 1);
        fuv = vpos*0.5+0.5;
        fuv.y = 1 - fuv.y;
    }

#endif /* VERTEX_SHADER */

#ifdef FRAGMENT_SHADER

    layout (std140, binding = 0) uniform Uniform
    {
        float d0; // distance between probes in cascade 0
        int   r0; // number of rays in cascade 0
        int   n0; // number of probes in cascade 0 (per dimension)
        int   ci; // cascade number

        int   cn; // total number of cascades
        int   should_do_render; // we switch on this to render instead of building the cascades
        int   add_sky_light; // set to 1 to add sky lighting to uppermost cascade
        int   padding;

        vec2   u_resolution; // resolution of the input texture
        vec2   padding4;
    };
    layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through
    layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture)

    in vec2 fuv;

    layout(location = 0) out vec4 ocolor;

    const float PI = 3.1415927;

    // raymarch2d: Implementation of Amanatides & Woo voxel marching algo
    struct raymarch2d_t {
        int x;
        int y;
        int sx;
        int sy;
        int ex;
        int ey;
        float tmx;
        float tmy;
        float tdx;
        float tdy;
    };

    raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) {
        raymarch2d_t res;
        res.x = int(floor(x0));
        res.y = int(floor(y0));
        res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0;
        res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0;
        res.ex = int(floor(x1)) + 2*res.sx;
        res.ey = int(floor(y1)) + 2*res.sy;
        float dx = x1 - x0;
        float dy = y1 - y0;
        float l = 1.f/sqrt(dx*dx + dy*dy);
        dx *= l;
        dy *= l;
        res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx;
        res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy;
        res.tdx = dx == 0 ? 0 : res.sx/dx;
        res.tdy = dy == 0 ? 0 : res.sy/dy;
        return res;
    }

    bool raymarch2d_next(inout raymarch2d_t r) {
        if (r.tmx < r.tmy) {
            r.tmx += r.tdx;
            r.x += r.sx;
            return r.x != r.ex;
        }
        else {
            r.tmy += r.tdy;
            r.y += r.sy;
            return r.y != r.ey;
        }
    }

    vec3 tonemap_aces(vec3 color) {
        const float slope = 12.0;
        vec4 x = vec4(
            color.r, color.g, color.b,
            (color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114)
        );
        const float a = 2.51f;
        const float b = 0.03f;
        const float c = 2.43f;
        const float d = 0.59f;
        const float e = 0.14f;
        vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0);
        float t = x.a;
        t = t * t / (slope + t);
        return mix(tonemap.rgb, tonemap.aaa, t);
    }

    vec3 sky_(vec2 angle) {
        float a1 = angle[1];
        float a0 = angle[0];
        // Sky integral formula taken from
        // Analytic Direct Illumination - Mathis
        // https://www.shadertoy.com/view/NttSW7
        const vec3 SkyColor = vec3(0.2,0.5,1.);
        const vec3 SunColor = vec3(1.,0.7,0.1)*10.;
        const float SunA = 2.0;
        const float SunS = 64.0;
        const float SSunS = sqrt(SunS);
        const float ISSunS = 1./SSunS;
        vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0)));
        SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS;
        return SI / 6.0;
    }

    vec3 sky(vec2 angle) {
        // Integrate the radiance from the sky over an interval of directions
        if (angle[1] < 2.0 * PI)
            return sky_(angle);
        return
            sky_(vec2(angle[0], 2.0 * PI)) +
            sky_(vec2(0.0, angle[1] - 2.0 * PI));
    }

    void main(void) {
        if (should_do_render == 1) {
            // sample probe in cascade 0
            float x = fuv.x * u_resolution.x;
            float y = fuv.y * u_resolution.y;
            float xi = round(x/d0);
            float yi = round(y/d0);
            vec3 c = vec3(0,0,0);
            for (int r = 0; r < r0; ++r) {
                vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5;
                c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb;
            }
            ocolor = vec4(tonemap_aces(c/r0),1);
        }
        else {
            // build cascade
            int u = int(gl_FragCoord.x);
            int v = int(gl_FragCoord.y);

            int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci)
            int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase
            int n = n0 >> ci; // number of probes in one dimension
            float d = d0*(1 << ci); // distance between probes
            int rn = r0 << (rm*ci); // number of pixels/rays per probe
            int yi = v; // probe index
            int xi = u/rn; // probe index
            int r = u - xi*rn; // ray index
            float dx = d0*0.5f*(1 << ci);
            float x = xi * d + dx; // probe pos
            float y = yi * d + dx; // probe pos
            float l = 0.5 * d0; // length of ray
            float intensity = 1.0;

            if (xi >= n || xi < 0 || yi >= n || yi < 0) {
                ocolor = vec4(0,0,0,0);
                return;
            }

            float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval
            float rb = l*(1 << (ci*lm)); // end of ray length interval

            float alpha = 2*PI*(float(r)+0.5)/rn;
            vec2 rot = vec2(cos(alpha), sin(alpha));
            vec2 a = vec2(x,y) + rot*ra; // start of ray
            vec2 b = vec2(x,y) + rot*rb; // end of ray
            raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y);
            vec4 col = vec4(0,0,0,0);
            while (raymarch2d_next(raym)) {
                vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb;
                if (v != vec3(1,1,1)) {
                    col = vec4(v*intensity,1);
                    break;
                }
            }

            // if no hit, get from upper cascade
            // TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine
            if (col.a == 0) {
                if (ci == cn-1) {
                    if (add_sky_light != 0)
                        col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1);
                    else
                        col = vec4(0,0,0,0);
                }
                else {
                    int xi2 = (xi+1)/2; // probe index in upper
                    int yi2 = (yi+1)/2; // probe index in upper
                    int r2 = r << rm; // ray index in upper
                    int rn2 = rn << rm; // num rays in upper
                    int n2 = n >> 1; // num probes in upper
                    float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
                    float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid

                    // loop through all the nearby rays in the upper cascade
                    // TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1),
                    //       we should choose a better weighting than just treating them all equally
                    vec4 upper = vec4(0,0,0,0);
                    float frac = 1.0 / (1 << rm);
                    for (int ri = 0; ri < (1 << rm); ++ri) {
                        vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
                        vec2 pc2 = floor(vec2(clamp(xi2,   0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
                        vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2,   0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
                        vec2 pc4 = floor(vec2(clamp(xi2,   0, n2-1)*rn2 + r2 + ri, clamp(yi2,   0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
                        vec4 c = mix(
                            mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx),
                            mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx),
                            ty
                        );
                        upper += c*frac;
                    }
                    col = upper;
                }
            }

            ocolor = vec4(col.rgb, 1);
        }
    }

#endif /* FRAGMENT_SHADER */

	static void gi_on_gpu(u8* in_bitmap, int w, int h) {
	#define num_cascades 7
	static bool initialized;
	static gpu_bindgroup_t texture_bindgroup[2];
	static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades];
	static gpu_bindgroup_t render_uniform_bindgroup;
	static gpu_buffer_t vertex_buffer;
	static gpu_buffer_t uniform_buffer;
	static gpu_pipeline_t pipeline;
	static gpu_bindgroup_layout_t uniform_bindgroup_layout;
	static gpu_bindgroup_layout_t texture_bindgroup_layout;
	static lifetime_t texture_lifetime;
	static gpu_texture_t textures[2];
	static gpu_texture_t input_texture;
	lifetime_t* lifetime = g_platform->lifetime;

	f32 d0 = 1.f; // distance between probes in cascade 0
	int r0 = 4; // number of rays in cascade 0
	int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension
	int cn = num_cascades;

	typedef struct {
	f32 d0;
	int r0;
	int n0;
	int ci;

	int cn;
	int do_render;
	int add_sky_light;
	int padding;

	v2 resolution;
	v2 padding2;
	} uniform_t;

	if (!initialized) {
	lifetime_t temp_lifetime = {0};
	initialized = true;

	// create bindgroup layouts
	uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
	.name = "gi uniform bgl",
	.entries = {
	{
	.visibility = gpu_visibility_fragment,
	.type = gpu_binding_type_buffer,
	.buffer.type = gpu_buffer_binding_type_uniform,
	},
	},
	});

	texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
	.name = "gi texture bgl",
	.entries = {
	{
	.visibility = gpu_visibility_fragment,
	.type = gpu_binding_type_sampler,
	},
	{
	.visibility = gpu_visibility_fragment,
	.type = gpu_binding_type_sampler,
	},
	},
	});

	// create pipeline
	pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){
	.name = "gi render shader",
	.code = file_read("shaders/gi.glsl", &temp_lifetime).bytes,
	.bgls = {
	uniform_bindgroup_layout,
	texture_bindgroup_layout,
	},
	});

	// create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering
	{
	gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime);
	uniform_buffer = p.handle;
	// set cascade uniforms
	for (int i = 0; i < num_cascades; ++i) {
	(uniform_t)p.data = (uniform_t){
	.d0 = d0,
	.r0 = r0,
	.n0 = n0,
	.ci = i,
	.cn = num_cascades,
	.add_sky_light = 1,
	.resolution = {(f32)w,(f32)h},
	};
	cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
	.name = "gi",
	.layout = uniform_bindgroup_layout,
	.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
	});
	gpu_uniform_packer_next(&p);
	}

	// set render uniform
	(uniform_t)p.data = (uniform_t){
	.d0 = d0,
	.r0 = r0,
	.n0 = n0,
	.ci = 0,
	.cn = num_cascades,
	.do_render = 1,
	.resolution = {(f32)w,(f32)h},
	};
	render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
	.name = "gi",
	.layout = uniform_bindgroup_layout,
	.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
	});

	gpu_uniform_packer_end(&p);
	}

	// create textures
	input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime);
	gpu_texture_set_border(input_texture, (color_t){1,1,1,1});
	textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
	textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);

	texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
	.name = "gi",
	.layout = texture_bindgroup_layout,
	.entries = {
	{.sampler = {input_texture}},
	{.sampler = {textures[0]}},
	},
	});

	texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
	.name = "gi",
	.layout = texture_bindgroup_layout,
	.entries = {
	{.sampler = {input_texture}},
	{.sampler = {textures[1]}},
	},
	});

	lifetime_destroy(&temp_lifetime);
	}

	// update input texture
	gpu_texture_set_data(input_texture, in_bitmap);

	// clear texture for pingponging
	gpu_texture_clear(textures[(cn-1)%2], (color_t){0});

	// build cascades
	for (int i = cn-1; i >= 0; --i) {
	drawcall_render(&(drawcall_t){
	.pipeline = pipeline,
	.last_vertex = 6,
	.bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]},
	.outputs = {textures[(i+1)%2]},
	});
	}

	// render
	drawcall_render(&(drawcall_t){
	.pipeline = pipeline,
	.last_vertex = 6,
	.bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]},
	});

	#undef num_cascades
	}
	#ifdef VERTEX_SHADER

	out vec2 fuv;

	// a vertex shader that spits out a screen-size quad
	// call with vertex count = 6
	void main(void) {
	vec2[] positions = vec2[](
	vec2(-1,-1),
	vec2(1,-1),
	vec2(1,1),
	vec2(-1,-1),
	vec2(1,1),
	vec2(-1,1)
	);
	vec2 vpos = positions[gl_VertexID%6];
	gl_Position = vec4(vpos, 0, 1);
	fuv = vpos*0.5+0.5;
	fuv.y = 1 - fuv.y;
	}

	#endif /* VERTEX_SHADER */

	#ifdef FRAGMENT_SHADER

	layout (std140, binding = 0) uniform Uniform
	{
	float d0; // distance between probes in cascade 0
	int r0; // number of rays in cascade 0
	int n0; // number of probes in cascade 0 (per dimension)
	int ci; // cascade number

	int cn; // total number of cascades
	int should_do_render; // we switch on this to render instead of building the cascades
	int add_sky_light; // set to 1 to add sky lighting to uppermost cascade
	int padding;

	vec2 u_resolution; // resolution of the input texture
	vec2 padding4;
	};
	layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through
	layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture)

	in vec2 fuv;

	layout(location = 0) out vec4 ocolor;

	const float PI = 3.1415927;

	// raymarch2d: Implementation of Amanatides & Woo voxel marching algo
	struct raymarch2d_t {
	int x;
	int y;
	int sx;
	int sy;
	int ex;
	int ey;
	float tmx;
	float tmy;
	float tdx;
	float tdy;
	};

	raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) {
	raymarch2d_t res;
	res.x = int(floor(x0));
	res.y = int(floor(y0));
	res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0;
	res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0;
	res.ex = int(floor(x1)) + 2*res.sx;
	res.ey = int(floor(y1)) + 2*res.sy;
	float dx = x1 - x0;
	float dy = y1 - y0;
	float l = 1.f/sqrt(dxdx + dydy);
	dx *= l;
	dy *= l;
	res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx;
	res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy;
	res.tdx = dx == 0 ? 0 : res.sx/dx;
	res.tdy = dy == 0 ? 0 : res.sy/dy;
	return res;
	}

	bool raymarch2d_next(inout raymarch2d_t r) {
	if (r.tmx < r.tmy) {
	r.tmx += r.tdx;
	r.x += r.sx;
	return r.x != r.ex;
	}
	else {
	r.tmy += r.tdy;
	r.y += r.sy;
	return r.y != r.ey;
	}
	}

	vec3 tonemap_aces(vec3 color) {
	const float slope = 12.0;
	vec4 x = vec4(
	color.r, color.g, color.b,
	(color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114)
	);
	const float a = 2.51f;
	const float b = 0.03f;
	const float c = 2.43f;
	const float d = 0.59f;
	const float e = 0.14f;
	vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0);
	float t = x.a;
	t = t * t / (slope + t);
	return mix(tonemap.rgb, tonemap.aaa, t);
	}

	vec3 sky_(vec2 angle) {
	float a1 = angle[1];
	float a0 = angle[0];
	// Sky integral formula taken from
	// Analytic Direct Illumination - Mathis
	// https://www.shadertoy.com/view/NttSW7
	const vec3 SkyColor = vec3(0.2,0.5,1.);
	const vec3 SunColor = vec3(1.,0.7,0.1)*10.;
	const float SunA = 2.0;
	const float SunS = 64.0;
	const float SSunS = sqrt(SunS);
	const float ISSunS = 1./SSunS;
	vec3 SI = SkyColor(a1-a0-0.5(cos(a1)-cos(a0)));
	SI += SunColor(atan(SSunS(SunA-a0))-atan(SSunS(SunA-a1)))ISSunS;
	return SI / 6.0;
	}

	vec3 sky(vec2 angle) {
	// Integrate the radiance from the sky over an interval of directions
	if (angle[1] < 2.0 * PI)
	return sky_(angle);
	return
	sky_(vec2(angle[0], 2.0 * PI)) +
	sky_(vec2(0.0, angle[1] - 2.0 * PI));
	}

	void main(void) {
	if (should_do_render == 1) {
	// sample probe in cascade 0
	float x = fuv.x * u_resolution.x;
	float y = fuv.y * u_resolution.y;
	float xi = round(x/d0);
	float yi = round(y/d0);
	vec3 c = vec3(0,0,0);
	for (int r = 0; r < r0; ++r) {
	vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5;
	c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb;
	}
	ocolor = vec4(tonemap_aces(c/r0),1);
	}
	else {
	// build cascade
	int u = int(gl_FragCoord.x);
	int v = int(gl_FragCoord.y);

	int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci)
	int rm = 1;// ray count branching factor. Num rays for cascade ci = r02^(rmci) = r0(1 << rmci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase
	int n = n0 >> ci; // number of probes in one dimension
	float d = d0*(1 << ci); // distance between probes
	int rn = r0 << (rm*ci); // number of pixels/rays per probe
	int yi = v; // probe index
	int xi = u/rn; // probe index
	int r = u - xi*rn; // ray index
	float dx = d00.5f(1 << ci);
	float x = xi * d + dx; // probe pos
	float y = yi * d + dx; // probe pos
	float l = 0.5 * d0; // length of ray
	float intensity = 1.0;

	if (xi >= n \|\| xi < 0 \|\| yi >= n \|\| yi < 0) {
	ocolor = vec4(0,0,0,0);
	return;
	}

	float ra = ci == 0 ? 0 : l(1 << ((ci-1)lm)); // start of ray length interval
	float rb = l(1 << (cilm)); // end of ray length interval

	float alpha = 2PI(float(r)+0.5)/rn;
	vec2 rot = vec2(cos(alpha), sin(alpha));
	vec2 a = vec2(x,y) + rot*ra; // start of ray
	vec2 b = vec2(x,y) + rot*rb; // end of ray
	raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y);
	vec4 col = vec4(0,0,0,0);
	while (raymarch2d_next(raym)) {
	vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb;
	if (v != vec3(1,1,1)) {
	col = vec4(v*intensity,1);
	break;
	}
	}

	// if no hit, get from upper cascade
	// TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine
	if (col.a == 0) {
	if (ci == cn-1) {
	if (add_sky_light != 0)
	col = vec4(sky(vec2(alpha, alpha + 2PI/rn)) / (2PI/rn), 1);
	else
	col = vec4(0,0,0,0);
	}
	else {
	int xi2 = (xi+1)/2; // probe index in upper
	int yi2 = (yi+1)/2; // probe index in upper
	int r2 = r << rm; // ray index in upper
	int rn2 = rn << rm; // num rays in upper
	int n2 = n >> 1; // num probes in upper
	float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
	float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid

	// loop through all the nearby rays in the upper cascade
	// TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1),
	// we should choose a better weighting than just treating them all equally
	vec4 upper = vec4(0,0,0,0);
	float frac = 1.0 / (1 << rm);
	for (int ri = 0; ri < (1 << rm); ++ri) {
	vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
	vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
	vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
	vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
	vec4 c = mix(
	mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx),
	mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx),
	ty
	);
	upper += c*frac;
	}
	col = upper;
	}
	}

	ocolor = vec4(col.rgb, 1);
	}
	}

	#endif /* FRAGMENT_SHADER */