Skip to content

Instantly share code, notes, and snippets.

@r-lyeh-archived
Last active March 27, 2023 03:39
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save r-lyeh-archived/1bc2097b8ed4fe36259f to your computer and use it in GitHub Desktop.
Save r-lyeh-archived/1bc2097b8ed4fe36259f to your computer and use it in GitHub Desktop.
suite of de/quantizers (s/unorm direct3d, s/unorm opengl and quaternions)
#include <stdint.h>
// r-lyeh, public domain - quantize quaternions to 32-bits & numbers to 8/10-bits {
//
// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
// D3D10, GL2: for *_UNORM formats of n-bit length, the decode function is decode(x) = x / (2^n - 1)
// Unsigned quantization: input: [0..1] float; output: [0..255] integer
// D3D10: for *_SNORM formats of n-bit length the decode function is decode(x) = clamp(x / (2^(n-1) - 1), -1, 1)
// Signed quantization for D3D10 rules: input: [-1..1] float; output: [-127..127] integer
static inline uint8_t encode8_unorm(float x) { return uint8_t( int (x * 255.f + 0.5f) ); }
static inline float decode8_unorm(uint8_t x) { return x / 255.f; }
static inline uint8_t encode8_snorm(float x) { return uint8_t( int (x * 127.f + (x > 0 ? 0.5f : -0.5f)) ); }
static inline float decode8_snorm(uint8_t x) { float f = x / 127.f; return f <= -1 ? -1.f : (f >= 1 ? 1.f : f); }
//
// [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/
// OpenGL2: same decoding function for unsigned numbers, but a different one for signed: decode(x) = (2x + 1) / (2^n - 1)
// Signed quantization for OpenGL rules: input: [-1..1] float; output: [-128..127] integer
// Warning: This has slightly better precision (all numbers encode distinct values), but can't represent 0 exactly.
static inline uint8_t encode8_snorm_gl2(float x) { return uint8_t( int (x * 127.5f + (x >= 0.f ? 0.f : -1.f)) ); }
static inline float decode8_snorm_gl2(uint8_t x) { return (2*x + 1) / 255.f; }
//
// Custom: 10-bit based on D3D10_UNORM
// Used to quantize quaternions (see functions below)
static inline uint16_t encode10_unorm(float x) { return uint16_t( int (x * 1023.f + 0.5f) ); }
static inline float decode10_unorm(uint16_t x) { return x / 1023.f; }
static inline uint16_t encode10_snorm(float x) { return encode10_unorm( ( x + 1 ) * 0.5f ); }
static inline float decode10_snorm(uint16_t x) { return ( decode10_unorm( x ) * 2 ) - 1 ; }
template<typename real>
static inline void encode32_quat( uint32_t &out, real a, real b, real c, real d ) {
// [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
// "For quaternions we use 2 bits to store the index of the largest component,
// then 10 bits each to store the value of the remaining three components. We use the knowledge
// that 1 = x^2 + y^2 + z^2 + w^2 to restore the largest component, so we don't actually have to store its value.
// Since we don't store the largest component we know that the remaining ones must be in the range (-1/sqrt(2), 1/sqrt(2))
// (otherwise, one of them would be largest). So we use the 10 bits to quantize a value in that range, giving us a precision of 0.0014.
// The quaternions (x, y, z, w) and (-x, -y, -z, -w) represent the same rotation,
// so I flip the signs so that the largest component is always positive." - Niklas Frykholm / bitsquid.se
real aa = a*a, bb = b*b, cc = c*c, dd = d*d, sgn;
/**/ if( aa >= bb && aa >= cc && aa >= dd ) {
out = a >= 0 ? uint32_t( (0<<30) | (encode10_snorm( b)<<20) | (encode10_snorm( c)<<10) | (encode10_snorm( d)<<0) )
: uint32_t( (0<<30) | (encode10_snorm(-b)<<20) | (encode10_snorm(-c)<<10) | (encode10_snorm(-d)<<0) );
}
else if( bb >= aa && bb >= cc && bb >= dd ) {
out = b >= 0 ? uint32_t( (1<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( c)<<10) | (encode10_snorm( d)<<0) )
: uint32_t( (1<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-c)<<10) | (encode10_snorm(-d)<<0) );
}
else if( cc >= aa && cc >= bb && cc >= dd ) {
out = c >= 0 ? uint32_t( (2<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( b)<<10) | (encode10_snorm( d)<<0) )
: uint32_t( (2<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-b)<<10) | (encode10_snorm(-d)<<0) );
}
else {
out = d >= 0 ? uint32_t( (3<<30) | (encode10_snorm( a)<<20) | (encode10_snorm( b)<<10) | (encode10_snorm( c)<<0) )
: uint32_t( (3<<30) | (encode10_snorm(-a)<<20) | (encode10_snorm(-b)<<10) | (encode10_snorm(-c)<<0) );
}
}
template<typename real>
static inline void decode32_quat( real &a, real &b, real &c, real &d, const uint32_t &in ) {
// [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html
// [ref] http://en.wikipedia.org/wiki/Fast_inverse_square_root
auto rsqrt = []( float number ) -> float {
long i;
float x2, y;
const float threehalfs = 1.5F;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // evil floating point bit level hacking
i = 0x5f3759df - ( i >> 1 ); // what the fuck?
y = * ( float * ) &i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
};
switch( in >> 30 ) {
default: case 0:
b = decode10_snorm( ( in >> 20 ) & 0x3FF );
c = decode10_snorm( ( in >> 10 ) & 0x3FF );
d = decode10_snorm( ( in >> 0 ) & 0x3FF );
a = 1 / rsqrt( 1 - a * a - b * b - c * c );
break; case 1:
a = decode10_snorm( ( in >> 20 ) & 0x3FF );
c = decode10_snorm( ( in >> 10 ) & 0x3FF );
d = decode10_snorm( ( in >> 0 ) & 0x3FF );
b = 1 / rsqrt( 1 - a * a - b * b - c * c );
break; case 2:
a = decode10_snorm( ( in >> 20 ) & 0x3FF );
b = decode10_snorm( ( in >> 10 ) & 0x3FF );
d = decode10_snorm( ( in >> 0 ) & 0x3FF );
c = 1 / rsqrt( 1 - a * a - b * b - c * c );
break; case 3:
a = decode10_snorm( ( in >> 20 ) & 0x3FF );
b = decode10_snorm( ( in >> 10 ) & 0x3FF );
c = decode10_snorm( ( in >> 0 ) & 0x3FF );
d = 1 / rsqrt( 1 - a * a - b * b - c * c );
}
}
// } quant.cc
#ifdef QUANTIZE_TESTS
#include <iostream>
float acc[3] = {0,0,0};
float hit[3] = {0,0,0};
void verify( float x ) {
const int y1 = encode8_unorm(x); const float x1 = decode8_unorm(y1);
const int y2 = encode8_snorm(x); const float x2 = decode8_snorm(y2);
const int y3 = encode8_snorm_gl2(x); const float x3 = decode8_snorm_gl2(y3);
float err;
err = std::abs(x - x1); acc[0] += err; hit[0]++; std::cout << "v1 " << (err > 0 ? "failed! " : "ok! ") << x1 << " vs " << x << " (error: " << err << ")" << std::endl;
err = std::abs(x - x2); acc[1] += err; hit[1]++; std::cout << "v2 " << (err > 0 ? "failed! " : "ok! ") << x2 << " vs " << x << " (error: " << err << ")" << std::endl;
err = std::abs(x - x3); acc[2] += err; hit[2]++; std::cout << "v3 " << (err > 0 ? "failed! " : "ok! ") << x3 << " vs " << x << " (error: " << err << ")" << std::endl;
}
// run 'sample | sort'
int main() {
for( auto x = 0, e = 1000; x <= e; ++x ) {
verify( x * 0.001f );
}
std::cout << "average error (lower is better)" << std::endl;
std::cout << "avg-1:" << (acc[0] / hit[0]) << std::endl;
std::cout << "avg-2:" << (acc[1] / hit[1]) << std::endl;
std::cout << "avg-3:" << (acc[2] / hit[2]) << std::endl;
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment