r-lyeh-archived/quant.cc

## quant.cc
#include <stdint.h>
// r-lyeh, public domain - quantize quaternions to 32-bits & numbers to 8/10-bits {
//
// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
// D3D10, GL2: for *_UNORM formats of n-bit length, the decode function is decode(x) = x / (2^n - 1)
// Unsigned quantization: input: [0..1] float; output: [0..255] integer
// D3D10: for *_SNORM formats of n-bit length the decode function is decode(x) = clamp(x / (2^(n-1) - 1), -1, 1)
// Signed quantization for D3D10 rules: input: [-1..1] float; output: [-127..127] integer
static inline uint8_t   encode8_unorm(float    x) { return uint8_t( int (x * 255.f + 0.5f) ); }
static inline float     decode8_unorm(uint8_t  x) { return x / 255.f; }
static inline uint8_t   encode8_snorm(float    x) { return uint8_t( int (x * 127.f + (x > 0 ? 0.5f : -0.5f)) ); }
static inline float     decode8_snorm(uint8_t  x) { float f = x / 127.f; return f <= -1 ? -1.f : (f >= 1 ? 1.f : f); }
//
// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
// OpenGL2: same decoding function for unsigned numbers, but a different one for signed: decode(x) = (2x + 1) / (2^n - 1)
// Signed quantization for OpenGL rules: input: [-1..1] float; output: [-128..127] integer
// Warning: This has slightly better precision (all numbers encode distinct values), but can't represent 0 exactly.
static inline uint8_t   encode8_snorm_gl2(float   x) { return uint8_t( int (x * 127.5f + (x >= 0.f ? 0.f : -1.f)) ); }
static inline float     decode8_snorm_gl2(uint8_t x) { return (2*x + 1) / 255.f; }
//
// Custom: 10-bit based on D3D10_UNORM
// Used to quantize quaternions (see functions below)
static inline uint16_t  encode10_unorm(float    x) { return uint16_t( int (x * 1023.f + 0.5f) ); }
static inline float     decode10_unorm(uint16_t x) { return x / 1023.f; }
static inline uint16_t  encode10_snorm(float    x) { return encode10_unorm( ( x + 1 ) * 0.5f ); }
static inline float     decode10_snorm(uint16_t x) { return ( decode10_unorm( x ) * 2 ) - 1 ; }

template<typename real>
static inline void encode32_quat( uint32_t &out, real a, real b, real c, real d ) {
    // [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
    // "For quaternions we use 2 bits to store the index of the largest component,
    // then 10 bits each to store the value of the remaining three components. We use the knowledge
    // that 1 = x^2 + y^2 + z^2 + w^2 to restore the largest component, so we don't actually have to store its value.
    // Since we don't store the largest component we know that the remaining ones must be in the range (-1/sqrt(2), 1/sqrt(2))
    // (otherwise, one of them would be largest). So we use the 10 bits to quantize a value in that range, giving us a precision of 0.0014.
    // The quaternions (x, y, z, w) and (-x, -y, -z, -w) represent the same rotation,
    // so I flip the signs so that the largest component is always positive." - Niklas Frykholm / bitsquid.se
    real aa = a*a, bb = b*b, cc = c*c, dd = d*d, sgn;
    /**/ if( aa >= bb && aa >= cc && aa >= dd ) {
        out = a >= 0 ? uint32_t( (0<<30) | (encode10_snorm( b)<<20) | (encode10_snorm( c)<<10) | (encode10_snorm( d)<<0) )
                     : uint32_t( (0<<30) | (encode10_snorm(-b)<<20) | (encode10_snorm(-c)<<10) | (encode10_snorm(-d)<<0) );
    }
    else if( bb >= aa && bb >= cc && bb >= dd ) {
        out = b >= 0 ? uint32_t( (1<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( c)<<10) | (encode10_snorm( d)<<0) )
                     : uint32_t( (1<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-c)<<10) | (encode10_snorm(-d)<<0) );
    }
    else if( cc >= aa && cc >= bb && cc >= dd ) {
        out = c >= 0 ? uint32_t( (2<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( b)<<10) | (encode10_snorm( d)<<0) )
                     : uint32_t( (2<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-b)<<10) | (encode10_snorm(-d)<<0) );
    }
    else {
        out = d >= 0 ? uint32_t( (3<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( b)<<10) | (encode10_snorm( c)<<0) )
                     : uint32_t( (3<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-b)<<10) | (encode10_snorm(-c)<<0) );
    }
}
template<typename real>
static inline void decode32_quat( real &a, real &b, real &c, real &d, const uint32_t &in ) {
    // [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
    // [ref] http://en.wikipedia.org/wiki/Fast_inverse_square_root
    auto rsqrt = []( float number ) -> float {
        long i;
        float x2, y;
        const float threehalfs = 1.5F;
        x2 = number * 0.5F;
        y  = number;
        i  = * ( long * ) &y;                       // evil floating point bit level hacking
        i  = 0x5f3759df - ( i >> 1 );               // what the fuck?
        y  = * ( float * ) &i;
        y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
        y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed
        return y;
    };
    switch( in >> 30 ) {
        default: case 0:
            b = decode10_snorm( ( in >> 20 ) & 0x3FF );
            c = decode10_snorm( ( in >> 10 ) & 0x3FF );
            d = decode10_snorm( ( in >>  0 ) & 0x3FF );
            a = 1 / rsqrt( 1 - a * a - b * b - c * c );
        break; case 1:
            a = decode10_snorm( ( in >> 20 ) & 0x3FF );
            c = decode10_snorm( ( in >> 10 ) & 0x3FF );
            d = decode10_snorm( ( in >>  0 ) & 0x3FF );
            b = 1 / rsqrt( 1 - a * a - b * b - c * c );
        break; case 2:
            a = decode10_snorm( ( in >> 20 ) & 0x3FF );
            b = decode10_snorm( ( in >> 10 ) & 0x3FF );
            d = decode10_snorm( ( in >>  0 ) & 0x3FF );
            c = 1 / rsqrt( 1 - a * a - b * b - c * c );
        break; case 3:
            a = decode10_snorm( ( in >> 20 ) & 0x3FF );
            b = decode10_snorm( ( in >> 10 ) & 0x3FF );
            c = decode10_snorm( ( in >>  0 ) & 0x3FF );
            d = 1 / rsqrt( 1 - a * a - b * b - c * c );
    }
}
// } quant.cc

#ifdef QUANTIZE_TESTS

#include <iostream>

float acc[3] = {0,0,0};
float hit[3] = {0,0,0};

void verify( float x ) {
    const int y1 = encode8_unorm(x);     const float x1 = decode8_unorm(y1);
    const int y2 = encode8_snorm(x);     const float x2 = decode8_snorm(y2);
    const int y3 = encode8_snorm_gl2(x); const float x3 = decode8_snorm_gl2(y3);

    float err;
    err = std::abs(x - x1); acc[0] += err; hit[0]++; std::cout << "v1 " << (err > 0 ? "failed! " : "ok! ") << x1 << " vs " << x << " (error: " << err << ")" << std::endl;
    err = std::abs(x - x2); acc[1] += err; hit[1]++; std::cout << "v2 " << (err > 0 ? "failed! " : "ok! ") << x2 << " vs " << x << " (error: " << err << ")" << std::endl;
    err = std::abs(x - x3); acc[2] += err; hit[2]++; std::cout << "v3 " << (err > 0 ? "failed! " : "ok! ") << x3 << " vs " << x << " (error: " << err << ")" << std::endl;
}

// run 'sample | sort'
int main() {
    for( auto x = 0, e = 1000; x <= e; ++x ) {
        verify( x * 0.001f );
    }
    std::cout << "average error (lower is better)" << std::endl;
    std::cout << "avg-1:" << (acc[0] / hit[0]) << std::endl;
    std::cout << "avg-2:" << (acc[1] / hit[1]) << std::endl;
    std::cout << "avg-3:" << (acc[2] / hit[2]) << std::endl;
}

#endif
	#include <stdint.h>
	// r-lyeh, public domain - quantize quaternions to 32-bits & numbers to 8/10-bits {
	//
	// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
	// D3D10, GL2: for *_UNORM formats of n-bit length, the decode function is decode(x) = x / (2^n - 1)
	// Unsigned quantization: input: [0..1] float; output: [0..255] integer
	// D3D10: for *_SNORM formats of n-bit length the decode function is decode(x) = clamp(x / (2^(n-1) - 1), -1, 1)
	// Signed quantization for D3D10 rules: input: [-1..1] float; output: [-127..127] integer
	static inline uint8_t encode8_unorm(float x) { return uint8_t( int (x * 255.f + 0.5f) ); }
	static inline float decode8_unorm(uint8_t x) { return x / 255.f; }
	static inline uint8_t encode8_snorm(float x) { return uint8_t( int (x * 127.f + (x > 0 ? 0.5f : -0.5f)) ); }
	static inline float decode8_snorm(uint8_t x) { float f = x / 127.f; return f <= -1 ? -1.f : (f >= 1 ? 1.f : f); }
	//
	// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
	// OpenGL2: same decoding function for unsigned numbers, but a different one for signed: decode(x) = (2x + 1) / (2^n - 1)
	// Signed quantization for OpenGL rules: input: [-1..1] float; output: [-128..127] integer
	// Warning: This has slightly better precision (all numbers encode distinct values), but can't represent 0 exactly.
	static inline uint8_t encode8_snorm_gl2(float x) { return uint8_t( int (x * 127.5f + (x >= 0.f ? 0.f : -1.f)) ); }
	static inline float decode8_snorm_gl2(uint8_t x) { return (2*x + 1) / 255.f; }
	//
	// Custom: 10-bit based on D3D10_UNORM
	// Used to quantize quaternions (see functions below)
	static inline uint16_t encode10_unorm(float x) { return uint16_t( int (x * 1023.f + 0.5f) ); }
	static inline float decode10_unorm(uint16_t x) { return x / 1023.f; }
	static inline uint16_t encode10_snorm(float x) { return encode10_unorm( ( x + 1 ) * 0.5f ); }
	static inline float decode10_snorm(uint16_t x) { return ( decode10_unorm( x ) * 2 ) - 1 ; }

	template<typename real>
	static inline void encode32_quat( uint32_t &out, real a, real b, real c, real d ) {
	// [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
	// "For quaternions we use 2 bits to store the index of the largest component,
	// then 10 bits each to store the value of the remaining three components. We use the knowledge
	// that 1 = x^2 + y^2 + z^2 + w^2 to restore the largest component, so we don't actually have to store its value.
	// Since we don't store the largest component we know that the remaining ones must be in the range (-1/sqrt(2), 1/sqrt(2))
	// (otherwise, one of them would be largest). So we use the 10 bits to quantize a value in that range, giving us a precision of 0.0014.
	// The quaternions (x, y, z, w) and (-x, -y, -z, -w) represent the same rotation,
	// so I flip the signs so that the largest component is always positive." - Niklas Frykholm / bitsquid.se
	real aa = aa, bb = bb, cc = cc, dd = dd, sgn;
	/**/ if( aa >= bb && aa >= cc && aa >= dd ) {
	out = a >= 0 ? uint32_t( (0<<30) \| (encode10_snorm( b)<<20) \| (encode10_snorm( c)<<10) \| (encode10_snorm( d)<<0) )
	: uint32_t( (0<<30) \| (encode10_snorm(-b)<<20) \| (encode10_snorm(-c)<<10) \| (encode10_snorm(-d)<<0) );
	}
	else if( bb >= aa && bb >= cc && bb >= dd ) {
	out = b >= 0 ? uint32_t( (1<<30) \| (encode10_snorm( a)<<20) \| (encode10_snorm( c)<<10) \| (encode10_snorm( d)<<0) )
	: uint32_t( (1<<30) \| (encode10_snorm(-a)<<20) \| (encode10_snorm(-c)<<10) \| (encode10_snorm(-d)<<0) );
	}
	else if( cc >= aa && cc >= bb && cc >= dd ) {
	out = c >= 0 ? uint32_t( (2<<30) \| (encode10_snorm( a)<<20) \| (encode10_snorm( b)<<10) \| (encode10_snorm( d)<<0) )
	: uint32_t( (2<<30) \| (encode10_snorm(-a)<<20) \| (encode10_snorm(-b)<<10) \| (encode10_snorm(-d)<<0) );
	}
	else {
	out = d >= 0 ? uint32_t( (3<<30) \| (encode10_snorm( a)<<20) \| (encode10_snorm( b)<<10) \| (encode10_snorm( c)<<0) )
	: uint32_t( (3<<30) \| (encode10_snorm(-a)<<20) \| (encode10_snorm(-b)<<10) \| (encode10_snorm(-c)<<0) );
	}
	}
	template<typename real>
	static inline void decode32_quat( real &a, real &b, real &c, real &d, const uint32_t &in ) {
	// [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
	// [ref] http://en.wikipedia.org/wiki/Fast_inverse_square_root
	auto rsqrt = []( float number ) -> float {
	long i;
	float x2, y;
	const float threehalfs = 1.5F;
	x2 = number * 0.5F;
	y = number;
	i = * ( long * ) &y; // evil floating point bit level hacking
	i = 0x5f3759df - ( i >> 1 ); // what the fuck?
	y = * ( float * ) &i;
	y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
	y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
	return y;
	};
	switch( in >> 30 ) {
	default: case 0:
	b = decode10_snorm( ( in >> 20 ) & 0x3FF );
	c = decode10_snorm( ( in >> 10 ) & 0x3FF );
	d = decode10_snorm( ( in >> 0 ) & 0x3FF );
	a = 1 / rsqrt( 1 - a * a - b * b - c * c );
	break; case 1:
	a = decode10_snorm( ( in >> 20 ) & 0x3FF );
	c = decode10_snorm( ( in >> 10 ) & 0x3FF );
	d = decode10_snorm( ( in >> 0 ) & 0x3FF );
	b = 1 / rsqrt( 1 - a * a - b * b - c * c );
	break; case 2:
	a = decode10_snorm( ( in >> 20 ) & 0x3FF );
	b = decode10_snorm( ( in >> 10 ) & 0x3FF );
	d = decode10_snorm( ( in >> 0 ) & 0x3FF );
	c = 1 / rsqrt( 1 - a * a - b * b - c * c );
	break; case 3:
	a = decode10_snorm( ( in >> 20 ) & 0x3FF );
	b = decode10_snorm( ( in >> 10 ) & 0x3FF );
	c = decode10_snorm( ( in >> 0 ) & 0x3FF );
	d = 1 / rsqrt( 1 - a * a - b * b - c * c );
	}
	}
	// } quant.cc

	#ifdef QUANTIZE_TESTS

	#include <iostream>

	float acc[3] = {0,0,0};
	float hit[3] = {0,0,0};

	void verify( float x ) {
	const int y1 = encode8_unorm(x); const float x1 = decode8_unorm(y1);
	const int y2 = encode8_snorm(x); const float x2 = decode8_snorm(y2);
	const int y3 = encode8_snorm_gl2(x); const float x3 = decode8_snorm_gl2(y3);

	float err;
	err = std::abs(x - x1); acc[0] += err; hit[0]++; std::cout << "v1 " << (err > 0 ? "failed! " : "ok! ") << x1 << " vs " << x << " (error: " << err << ")" << std::endl;
	err = std::abs(x - x2); acc[1] += err; hit[1]++; std::cout << "v2 " << (err > 0 ? "failed! " : "ok! ") << x2 << " vs " << x << " (error: " << err << ")" << std::endl;
	err = std::abs(x - x3); acc[2] += err; hit[2]++; std::cout << "v3 " << (err > 0 ? "failed! " : "ok! ") << x3 << " vs " << x << " (error: " << err << ")" << std::endl;
	}

	// run 'sample \| sort'
	int main() {
	for( auto x = 0, e = 1000; x <= e; ++x ) {
	verify( x * 0.001f );
	}
	std::cout << "average error (lower is better)" << std::endl;
	std::cout << "avg-1:" << (acc[0] / hit[0]) << std::endl;
	std::cout << "avg-2:" << (acc[1] / hit[1]) << std::endl;
	std::cout << "avg-3:" << (acc[2] / hit[2]) << std::endl;
	}

	#endif