fp64/triangle_rasterizer.h

## triangle_rasterizer.h
// Public Domain under http://unlicense.org, see link for details.

// Simple self-contained software triangle rasterizer, largerly based on
// https://fgiesen.wordpress.com/2013/02/10/optimizing-the-basic-rasterizer/
// See also https://gist.github.com/rygorous/9b793cd21d876da928bf4c7f3e625908

// Not optimized for speed, but should be practical for
// less speed-intensive purposes.

#if 0
// Example usage:
// Render a 3D triangle defined by homogeneous coordinates of
// its vertices, supplying perspective-correct coordinates to
// shader.
// WARNING: does not handle vertices behind the camera (w<0),
// and may fail for vertices too close to w=0 due to overflow.
// For that you may need to use 3D clipping (see e.g.
// https://www.khronos.org/opengl/wiki/Vertex_Post-Processing#Clipping)
// or switch to an altogether clipless solution (see e.g.
// http://threadlocalmutex.com/?p=144 and
// https://redirect.cs.umbc.edu/~olano/papers/2dh-tri/), which
// rasterize_triangle() does not do.
template<typename F>
static void render_triangle(
    const float *v0,
    const float *v1,
    const float *v2,
    int width,int height,
    void *context,
    const F &shader)
{
    float inv_w0=1.0f/v0[3];
    float inv_w1=1.0f/v1[3];
    float inv_w2=1.0f/v2[3];
    rasterize_triangle(
        v0[0]*inv_w0,v0[1]*inv_w0,
        v1[0]*inv_w1,v1[1]*inv_w1,
        v2[0]*inv_w2,v2[1]*inv_w2,
        0,0,width,height,
        context,
        [&](int x,int y,float a0,float a1,float a2,void *userdata)->void
        {
            // Convert area coordinates that rasterize_triangle produces
            // to perspective-corrected barycentric coordinates.
            float b0=a0*inv_w0;
            float b1=a1*inv_w1;
            float b2=a2*inv_w2;
            float w=1.0f/(b0+b1+b2);
            shader(x,y,b0*w,b1*w,b2*w,userdata);
        });
}
#endif

#ifndef TRIANGLE_RASTERIZER_H
#define TRIANGLE_RASTERIZER_H

#include <algorithm> // for min/max.

// Rasterize a triangle and invoke f() for every covered pixel.
// Clockwise triangles (e.g. (0,0)->(0,1)->(1,0)) are discarded.
// Sampling points are at pixel centers. Uses top-left fill
// rule (see https://learn.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-rasterizer-stage-rules).
// Only pixels in scissor region [x_lo;x_hi)x[y_lo;y_hi) are
// considered.
// The callback f() takes pixel coordinates, area coordinates
// (i.e. normalized screen-space barycentric coordinates such
// that a0+a1+a2=1), and an arbitrary context (i.e. userdata;
// less necessary in C++, but helpful for porting/interfacing
// pure C).
// WARNING: to avoid integer overflow, the triangle's AABB should be
// small enough. E.g. for subpixel_bits=4 and narrow=int32_t the
// triangle needs to fit in 8192x8192.
// NOTE: subpixel_bits=4 is typical for OpenGL 1.0. DirectX 11
// uses subpixel_bits=8.
template<int subpixel_bits=4,typename narrow=int,typename wide=long long,typename F>
void rasterize_triangle(
    float x0,float y0,
    float x1,float y1,
    float x2,float y2,
    narrow x_lo,narrow y_lo,narrow x_hi,narrow y_hi,
    void *userdata,
    const F &f)
{
    const int subpixel_size=1<<subpixel_bits;

    // Round to integer (subpixel) vertex coordinates.
    // Can use lrintf() or something instead.
#define ROUND(t) narrow(float(subpixel_size)*(t)+((t)<0.0f?-0.5f:+0.5f))
    narrow ix0=ROUND(x0);
    narrow iy0=ROUND(y0);
    narrow ix1=ROUND(x1);
    narrow iy1=ROUND(y1);
    narrow ix2=ROUND(x2);
    narrow iy2=ROUND(y2);
#undef ROUND

    // Find bounding box.
    x_lo=std::max(x_lo, std::min(ix0,std::min(ix1,ix2))>>subpixel_bits);
    y_lo=std::max(y_lo, std::min(iy0,std::min(iy1,iy2))>>subpixel_bits);
    x_hi=std::min(x_hi,(std::max(ix0,std::max(ix1,ix2))>>subpixel_bits)+1);
    y_hi=std::min(y_hi,(std::max(iy0,std::max(iy1,iy2))>>subpixel_bits)+1);

    if(x_lo>=x_hi||y_lo>=y_hi) return; // Early-out.

    //  Pixel center of pixel (x_lo,y_lo).
    narrow cx=x_lo*subpixel_size+subpixel_size/2;
    narrow cy=y_lo*subpixel_size+subpixel_size/2;

    // Biases, to incorporate the fill rules.
#define TOPLEFT(xA,yA,xB,yB) (((yA)==(yB)&&(xB)<(xA))||(yB)<(yA))
    narrow b0=TOPLEFT(ix1,iy1,ix2,iy2)?0:-1;
    narrow b1=TOPLEFT(ix2,iy2,ix0,iy0)?0:-1;
    narrow b2=TOPLEFT(ix0,iy0,ix1,iy1)?0:-1;
#undef TOPLEFT

    // Edge functions.
#define EDGE(xA,yA,xB,yB) (wide(xB-xA)*wide(cy-yA)-wide(yB-yA)*wide(cx-xA))
    wide E0=EDGE(ix1,iy1,ix2,iy2);
    wide E1=EDGE(ix2,iy2,ix0,iy0);
    wide E2=EDGE(ix0,iy0,ix1,iy1);
    if(E0+E1+E2<0) return; // Early-out.
    float inv_sum=float(subpixel_size)/float(E0+E1+E2);
#undef EDGE

    // Since we step full pixels, the lower
    // bits of E* stay the same, so we can shift
    // them out, effectively using wider type,
    // raising overflow-proof threshold.
    // This part only makes sense if
    // sizeof(narrow)<sizeof(wide).
    narrow e0=narrow((E0+b0)>>subpixel_bits);
    narrow e1=narrow((E1+b1)>>subpixel_bits);
    narrow e2=narrow((E2+b2)>>subpixel_bits);
    // We also need compute adjustments to barycentric
    // coordinates (regardless of the trick above, since
    // we need to account for the biases).
    float fb0=float(E0-wide(e0)*subpixel_size)*inv_sum/float(subpixel_size);
    float fb1=float(E1-wide(e1)*subpixel_size)*inv_sum/float(subpixel_size);
    float fb2=float(E2-wide(e2)*subpixel_size)*inv_sum/float(subpixel_size);

    // Increments.
    narrow d0x=-(iy2-iy1), d0y=+(ix2-ix1);
    narrow d1x=-(iy0-iy2), d1y=+(ix0-ix2);
    narrow d2x=-(iy1-iy0), d2y=+(ix1-ix0);

    // Process pixels in bounding box.
    for(narrow y=y_lo;y<y_hi;++y)
    {
        narrow e0x=e0;
        narrow e1x=e1;
        narrow e2x=e2;
        for(narrow x=x_lo;x<x_hi;++x)
        {
            if((e0x|e1x|e2x)>=0)
            {
                // Screen-space barycentric coordinates (sometimes called "area coordinates").
                float a0=float(e0x)*inv_sum+fb0;
                float a1=float(e1x)*inv_sum+fb1;
                float a2=float(e2x)*inv_sum+fb2;
                f(x,y,a0,a1,a2,userdata);
            }
            e0x+=d0x;
            e1x+=d1x;
            e2x+=d2x;
        }
        e0+=d0y;
        e1+=d1y;
        e2+=d2y;
    }
}

#endif // TRIANGLE_RASTERIZER_H
	// Public Domain under http://unlicense.org, see link for details.

	// Simple self-contained software triangle rasterizer, largerly based on
	// https://fgiesen.wordpress.com/2013/02/10/optimizing-the-basic-rasterizer/
	// See also https://gist.github.com/rygorous/9b793cd21d876da928bf4c7f3e625908

	// Not optimized for speed, but should be practical for
	// less speed-intensive purposes.

	#if 0
	// Example usage:
	// Render a 3D triangle defined by homogeneous coordinates of
	// its vertices, supplying perspective-correct coordinates to
	// shader.
	// WARNING: does not handle vertices behind the camera (w<0),
	// and may fail for vertices too close to w=0 due to overflow.
	// For that you may need to use 3D clipping (see e.g.
	// https://www.khronos.org/opengl/wiki/Vertex_Post-Processing#Clipping)
	// or switch to an altogether clipless solution (see e.g.
	// http://threadlocalmutex.com/?p=144 and
	// https://redirect.cs.umbc.edu/~olano/papers/2dh-tri/), which
	// rasterize_triangle() does not do.
	template<typename F>
	static void render_triangle(
	const float *v0,
	const float *v1,
	const float *v2,
	int width,int height,
	void *context,
	const F &shader)
	{
	float inv_w0=1.0f/v0[3];
	float inv_w1=1.0f/v1[3];
	float inv_w2=1.0f/v2[3];
	rasterize_triangle(
	v0[0]inv_w0,v0[1]inv_w0,
	v1[0]inv_w1,v1[1]inv_w1,
	v2[0]inv_w2,v2[1]inv_w2,
	0,0,width,height,
	context,
	[&](int x,int y,float a0,float a1,float a2,void *userdata)->void
	{
	// Convert area coordinates that rasterize_triangle produces
	// to perspective-corrected barycentric coordinates.
	float b0=a0*inv_w0;
	float b1=a1*inv_w1;
	float b2=a2*inv_w2;
	float w=1.0f/(b0+b1+b2);
	shader(x,y,b0w,b1w,b2*w,userdata);
	});
	}
	#endif

	#ifndef TRIANGLE_RASTERIZER_H
	#define TRIANGLE_RASTERIZER_H

	#include <algorithm> // for min/max.

	// Rasterize a triangle and invoke f() for every covered pixel.
	// Clockwise triangles (e.g. (0,0)->(0,1)->(1,0)) are discarded.
	// Sampling points are at pixel centers. Uses top-left fill
	// rule (see https://learn.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-rasterizer-stage-rules).
	// Only pixels in scissor region [x_lo;x_hi)x[y_lo;y_hi) are
	// considered.
	// The callback f() takes pixel coordinates, area coordinates
	// (i.e. normalized screen-space barycentric coordinates such
	// that a0+a1+a2=1), and an arbitrary context (i.e. userdata;
	// less necessary in C++, but helpful for porting/interfacing
	// pure C).
	// WARNING: to avoid integer overflow, the triangle's AABB should be
	// small enough. E.g. for subpixel_bits=4 and narrow=int32_t the
	// triangle needs to fit in 8192x8192.
	// NOTE: subpixel_bits=4 is typical for OpenGL 1.0. DirectX 11
	// uses subpixel_bits=8.
	template<int subpixel_bits=4,typename narrow=int,typename wide=long long,typename F>
	void rasterize_triangle(
	float x0,float y0,
	float x1,float y1,
	float x2,float y2,
	narrow x_lo,narrow y_lo,narrow x_hi,narrow y_hi,
	void *userdata,
	const F &f)
	{
	const int subpixel_size=1<<subpixel_bits;

	// Round to integer (subpixel) vertex coordinates.
	// Can use lrintf() or something instead.
	#define ROUND(t) narrow(float(subpixel_size)*(t)+((t)<0.0f?-0.5f:+0.5f))
	narrow ix0=ROUND(x0);
	narrow iy0=ROUND(y0);
	narrow ix1=ROUND(x1);
	narrow iy1=ROUND(y1);
	narrow ix2=ROUND(x2);
	narrow iy2=ROUND(y2);
	#undef ROUND

	// Find bounding box.
	x_lo=std::max(x_lo, std::min(ix0,std::min(ix1,ix2))>>subpixel_bits);
	y_lo=std::max(y_lo, std::min(iy0,std::min(iy1,iy2))>>subpixel_bits);
	x_hi=std::min(x_hi,(std::max(ix0,std::max(ix1,ix2))>>subpixel_bits)+1);
	y_hi=std::min(y_hi,(std::max(iy0,std::max(iy1,iy2))>>subpixel_bits)+1);

	if(x_lo>=x_hi\|\|y_lo>=y_hi) return; // Early-out.

	// Pixel center of pixel (x_lo,y_lo).
	narrow cx=x_lo*subpixel_size+subpixel_size/2;
	narrow cy=y_lo*subpixel_size+subpixel_size/2;

	// Biases, to incorporate the fill rules.
	#define TOPLEFT(xA,yA,xB,yB) (((yA)==(yB)&&(xB)<(xA))\|\|(yB)<(yA))
	narrow b0=TOPLEFT(ix1,iy1,ix2,iy2)?0:-1;
	narrow b1=TOPLEFT(ix2,iy2,ix0,iy0)?0:-1;
	narrow b2=TOPLEFT(ix0,iy0,ix1,iy1)?0:-1;
	#undef TOPLEFT

	// Edge functions.
	#define EDGE(xA,yA,xB,yB) (wide(xB-xA)wide(cy-yA)-wide(yB-yA)wide(cx-xA))
	wide E0=EDGE(ix1,iy1,ix2,iy2);
	wide E1=EDGE(ix2,iy2,ix0,iy0);
	wide E2=EDGE(ix0,iy0,ix1,iy1);
	if(E0+E1+E2<0) return; // Early-out.
	float inv_sum=float(subpixel_size)/float(E0+E1+E2);
	#undef EDGE

	// Since we step full pixels, the lower
	// bits of E* stay the same, so we can shift
	// them out, effectively using wider type,
	// raising overflow-proof threshold.
	// This part only makes sense if
	// sizeof(narrow)<sizeof(wide).
	narrow e0=narrow((E0+b0)>>subpixel_bits);
	narrow e1=narrow((E1+b1)>>subpixel_bits);
	narrow e2=narrow((E2+b2)>>subpixel_bits);
	// We also need compute adjustments to barycentric
	// coordinates (regardless of the trick above, since
	// we need to account for the biases).
	float fb0=float(E0-wide(e0)subpixel_size)inv_sum/float(subpixel_size);
	float fb1=float(E1-wide(e1)subpixel_size)inv_sum/float(subpixel_size);
	float fb2=float(E2-wide(e2)subpixel_size)inv_sum/float(subpixel_size);

	// Increments.
	narrow d0x=-(iy2-iy1), d0y=+(ix2-ix1);
	narrow d1x=-(iy0-iy2), d1y=+(ix0-ix2);
	narrow d2x=-(iy1-iy0), d2y=+(ix1-ix0);

	// Process pixels in bounding box.
	for(narrow y=y_lo;y<y_hi;++y)
	{
	narrow e0x=e0;
	narrow e1x=e1;
	narrow e2x=e2;
	for(narrow x=x_lo;x<x_hi;++x)
	{
	if((e0x\|e1x\|e2x)>=0)
	{
	// Screen-space barycentric coordinates (sometimes called "area coordinates").
	float a0=float(e0x)*inv_sum+fb0;
	float a1=float(e1x)*inv_sum+fb1;
	float a2=float(e2x)*inv_sum+fb2;
	f(x,y,a0,a1,a2,userdata);
	}
	e0x+=d0x;
	e1x+=d1x;
	e2x+=d2x;
	}
	e0+=d0y;
	e1+=d1y;
	e2+=d2y;
	}
	}

	#endif // TRIANGLE_RASTERIZER_H