|
// Public Domain under http://unlicense.org, see link for details. |
|
// Except individual functions being tested are |
|
// copyright by their respective authors. |
|
|
|
// Comparison of several sRGB<->Linear approximations. |
|
|
|
#include <stdio.h> |
|
#include <math.h> |
|
#include <time.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
|
|
// Reference versions, in double precision. |
|
// The accuracy of the other versions is |
|
// measured against these. |
|
// NOTE: these functions (straight from |
|
// https://en.wikipedia.org/wiki/SRGB) have |
|
// discontinuities larger than |
|
// machine epsilon of float32. |
|
// Specifically: |
|
// linear->srgb: -2.9e-08 ( -7.7 float32 ULP) |
|
// srgb->linear: +2.3e-09 (+10.0 float32 ULP) |
|
static inline double linear2srgb_ref(double x) |
|
{ |
|
if(x<=0.0031308) return 12.92*x; |
|
else return 1.055*pow(x,1.0f/2.4)-0.055; |
|
} |
|
|
|
static inline double srgb2linear_ref(double x) |
|
{ |
|
if(x<=0.04045) return x/12.92; |
|
else return pow((x+0.055)/1.055,2.4); |
|
} |
|
|
|
//============================================================================== |
|
|
|
// Reference single-precision versions. |
|
static inline float l2s_ref(float x) |
|
{ |
|
if(x<=0.0031308f) return 12.92f*x; |
|
else return 1.055f*powf(x,1.0f/2.4f)-0.055f; |
|
} |
|
|
|
static inline float s2l_ref(float x) |
|
{ |
|
if(x<=0.04045f) return x/12.92f; |
|
else return powf((x+0.055f)/1.055f,2.4f); |
|
} |
|
|
|
// Slightly optimized versions of the above, |
|
// with precomputed constants. |
|
static inline float l2s_refopt(float x) |
|
{ |
|
if(x<=0.0031308f) return 12.92f*x; |
|
else return 1.055f*powf(x,0.41666666f)-0.0549999475f; |
|
} |
|
|
|
static inline float s2l_refopt(float x) |
|
{ |
|
if(x<=0.04045f) return x*0.0773993808f; |
|
else return powf(x*0.94786729857f+0.0521327014f,2.4f); |
|
} |
|
|
|
// Identity function. |
|
static inline float l2s_identity(float x) |
|
{ |
|
return x; |
|
} |
|
|
|
static inline float s2l_identity(float x) |
|
{ |
|
return x; |
|
} |
|
|
|
// Square and square root. |
|
static inline float l2s_sqrt(float x) |
|
{ |
|
return sqrtf(x); |
|
} |
|
|
|
static inline float s2l_sqr(float x) |
|
{ |
|
return x*x; |
|
} |
|
|
|
// Gamma 2.2 approximation. |
|
static inline float l2s_22(float x) |
|
{ |
|
return powf(x,1.0f/2.2f); |
|
} |
|
|
|
static inline float s2l_22(float x) |
|
{ |
|
return powf(x,2.2f); |
|
} |
|
|
|
// From http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html |
|
// Still the first google hit for "srgb approximation" as |
|
// of this writing. |
|
static inline float l2s_tayloia_1(float x) |
|
{ |
|
float S1=sqrtf(x); |
|
float S2=sqrtf(S1); |
|
float S3=sqrtf(S2); |
|
return 0.585122381f*S1+0.783140355f*S2-0.368262736f*S3; |
|
} |
|
|
|
static inline float l2s_tayloia_2(float x) |
|
{ |
|
float S1=sqrtf(x); |
|
float S2=sqrtf(S1); |
|
float S3=sqrtf(S2); |
|
return 0.662002687f*S1+0.684122060f*S2-0.323583601f*S3-0.0225411470f*x; |
|
} |
|
|
|
static inline float s2l_tayloia(float x) |
|
{ |
|
return x*(x*(x*0.305306011f+0.682171111f)+0.012522878f); |
|
} |
|
|
|
// From https://mimosa-pudica.net/fast-gamma/ |
|
// NOTE: only provides linear->sRGB. |
|
static inline float l2s_yfujii_1(float x) |
|
{ |
|
return 1.138f*sqrtf(x)-0.138f*x; |
|
} |
|
|
|
#if defined(__GNUC__) && defined(__SSE2__) |
|
#include <emmintrin.h> |
|
static inline float rsqrt(float x) |
|
{ |
|
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); |
|
} |
|
#else |
|
static inline float rsqrt(float x) |
|
{ |
|
return 1.0f/sqrtf(x); |
|
} |
|
#endif |
|
|
|
static inline float l2s_yfujii_2(float x) |
|
{ |
|
float a=0.00279491f; |
|
float b=1.15907984f; |
|
float c=b*rsqrt(1.0f+a)-1.0f; // Lets hope compiler computes it only once. |
|
return (b*rsqrt(x+a)-c)*x; |
|
} |
|
|
|
// From https://www.shadertoy.com/view/WlG3zG |
|
static inline float s2l_iq(float x) |
|
{ |
|
#if __STDC_IEC_60559_FUNCS__>=201506L |
|
#define pow2m1f(x) exp2m1f(x); |
|
#else |
|
#define pow2m1f(x) (exp2f(x)-1.0f) |
|
#endif |
|
return (pow2m1f(x)-x*0.693147f)*3.258891f; |
|
} |
|
|
|
// From http://marc-b-reynolds.github.io/math/2019/12/10/GammaRamp.html |
|
static inline float s2l_mbr(float x) |
|
{ |
|
return (0.5f*x*x)*(sqrtf(x)+1.0f); |
|
} |
|
|
|
/* |
|
static inline float s2l_mbr_2(float x) |
|
{ |
|
//return (x*x)*(0.17758834362030029296875f+0.82241165637969970703125f*sqrtf(x)); |
|
//return (x*x)*(0.564024388790130615234375f+0.435975611209869384765625f*sqrtf(x)); |
|
return x*x*(0.75f+0.25f*x); |
|
} |
|
|
|
static inline float s2l_mbr_3(float x) |
|
{ |
|
return 0x1.117542p-12f+x*(-0x1.6479a8p-6f+x*(0x1.01ea1cp-1f+x*(0x1.554462p-1f+x*(-0x1.313c38p-3f)))); |
|
} |
|
*/ |
|
|
|
// Suggested by https://github.com/MrSmile |
|
// in https://gamedev.ru/flame/forum/?id=226959 |
|
// Are analytic inverse of each other. |
|
static inline float l2s_mrsmile(float x) |
|
{ |
|
return sqrtf(0.0034312649f+x*(2.3263988f+x*1.530326f))-0.906151f*x-0.058577f; |
|
} |
|
|
|
static inline float s2l_mrsmile(float x) |
|
{ |
|
// NOTE: in the original version the constant term was |
|
// 1.5652767f, spoiling 0->0 and 1->1. |
|
return sqrtf(2.450091f+x*(-3.8346549f+x*3.0424711f))+1.2776792f*x-1.5652766f; |
|
} |
|
|
|
// Developed for use in https://github.com/fp64/dbc_blit |
|
// Public domain. |
|
static inline float l2s_p3(float x) |
|
{ |
|
return x*(3.68945826f+x*(-5.87001922f+x*3.18056096f)); |
|
} |
|
|
|
static inline float l2s_p2q2(float x) |
|
{ |
|
float p=x*(13.044282f+x*65.4774821f); |
|
float q=1.0f+x*(42.6165181f+x*34.905246f); |
|
return p/q; |
|
} |
|
|
|
static inline float l2s_p3q2(float x) |
|
{ |
|
float p=x*(15.2012147f+x*(192.311612f+x*67.1571239f)); |
|
float q=1.0f+x*(70.6761146f+x*202.993836f); |
|
return p/q; |
|
} |
|
|
|
static inline float l2s_p4q4(float x) |
|
{ |
|
float p=x*(8.51112614f+x*(21407.7188f+x*(569310.187f+x*(915832.184f)))); |
|
float q=1.0f+x*(1249.83141f+x*(135537.797f+x*(989102.054f+x*(380667.918f)))); |
|
return p/q; |
|
} |
|
|
|
static inline float s2l_p3(float x) |
|
{ |
|
return x*(0.0181434661f+x*(0.667441605f+x*0.314414929f)); |
|
} |
|
|
|
static inline float s2l_p5(float x) |
|
{ |
|
return x*(0.0553287698+x*(0.378519491+x*(1.00770205+x*(-0.657329466+x*0.215779156)))); |
|
} |
|
|
|
static inline float s2l_p4q2(float x) |
|
{ |
|
float p=x*(0.0893605858f+x*(0.894345379f+x*(14.8331415f+x*15.3162396f))); |
|
float q=1.0f+x*(23.4190713f+x*6.71401579f); |
|
return p/q; |
|
} |
|
|
|
//============================================================================== |
|
|
|
// Tests and benchmarks. |
|
static double test_accuracy(float (*f)(float),double (*ref)(double)) |
|
{ |
|
double e=0.0; |
|
const int N=100000; |
|
for(int i=0;i<=N;++i) |
|
{ |
|
float t=float(i)/float(N); |
|
double x=ref(double(t)); |
|
double y=double(f(t)); |
|
double d=fabs(x-y); |
|
e=fmax(e,d); |
|
} |
|
return e; |
|
} |
|
|
|
template<float (*f)(float)> |
|
static double test_speed() |
|
{ |
|
static const int M=2500; |
|
static const int N=1024; |
|
double t=double(clock()); |
|
float buf[M]; |
|
for(int i=0;i<N;++i) |
|
{ |
|
float x=0.0f; |
|
float dx=1.0f/float(M); |
|
for(int j=0;j<M;++j) |
|
{ |
|
buf[j]=f(x); |
|
x+=dx; |
|
} |
|
// Make sure compiler does not optimize out the loop. |
|
volatile float y=buf[i%M]; |
|
(void)y; |
|
} |
|
t=double(clock())-t; |
|
return t/double(CLOCKS_PER_SEC)/double(M)/double(N); |
|
} |
|
|
|
static void roundtrip(float (*linear2srgb)(float),float (*srgb2linear)(float),float *ret) |
|
{ |
|
float lsl=0.0f,sls=0.0f; |
|
for(int i=0;i<256;++i) |
|
{ |
|
float x=float(i)/255.0f; |
|
lsl=fmaxf(lsl,fabsf(srgb2linear(linear2srgb(x))-x)); |
|
sls=fmaxf(sls,fabsf(linear2srgb(srgb2linear(x))-x)); |
|
} |
|
ret[0]=lsl; |
|
ret[1]=sls; |
|
} |
|
|
|
static void test_properties(float (*f)(float),float *ret) |
|
{ |
|
const int N=100000; |
|
float v=0.0f; |
|
ret[0]=f(0.0f); |
|
ret[1]=f(1.0f); |
|
ret[2]=1.0f; |
|
ret[3]=0.0f; |
|
ret[4]=1.0f; |
|
for(int i=0;i<=N;++i) |
|
{ |
|
float t=float(i)/float(N); |
|
float x=f(t); |
|
ret[2]=fmin(ret[2],x); |
|
ret[3]=fmax(ret[3],x); |
|
if(i>0&&x<v) ret[4]=0.0f; |
|
v=x; |
|
} |
|
} |
|
|
|
#define TEST(f) do {\ |
|
double (*ref)(double)=/*HACK*/(#f[0]=='l'?linear2srgb_ref:srgb2linear_ref);\ |
|
printf("%-20s | %17.11f | %11.3f\n",#f,test_accuracy(f,ref),1.0e+9*test_speed<f>());\ |
|
} while (0) |
|
|
|
#define ROUNDTRIP(linear2srgb,srgb2linear) do {\ |
|
float x[2];\ |
|
roundtrip(linear2srgb,srgb2linear,x);\ |
|
printf("%-17s |%-17s | %18.11f | %18.11f\n",#linear2srgb,#srgb2linear,x[0],x[1]);\ |
|
} while (0) |
|
|
|
#define PROPERTIES(f) do {\ |
|
float x[5];\ |
|
test_properties(f,x);\ |
|
printf("%-13s|%7.3g|%15.9g|%15.9g|%15.9g| %s\n"\ |
|
,#f\ |
|
,x[0],x[1],x[2],x[3],(x[4]==1.0f?"Yes":"No"));\ |
|
} while (0) |
|
|
|
struct entry |
|
{ |
|
const char *name; |
|
float (*function)(float); |
|
}; |
|
|
|
#define ENTRY(f) {#f,f} |
|
static entry table[]= |
|
{ |
|
ENTRY(l2s_ref), |
|
ENTRY(l2s_refopt), |
|
ENTRY(l2s_identity), |
|
ENTRY(l2s_sqrt), |
|
ENTRY(l2s_22), |
|
ENTRY(l2s_tayloia_1), |
|
ENTRY(l2s_tayloia_2), |
|
ENTRY(l2s_yfujii_1), |
|
ENTRY(l2s_yfujii_2), |
|
ENTRY(l2s_mrsmile), |
|
ENTRY(l2s_p3), |
|
ENTRY(l2s_p2q2), |
|
ENTRY(l2s_p3q2), |
|
ENTRY(l2s_p4q4), |
|
ENTRY(s2l_ref), |
|
ENTRY(s2l_refopt), |
|
ENTRY(s2l_identity), |
|
ENTRY(s2l_sqr), |
|
ENTRY(s2l_22), |
|
ENTRY(s2l_tayloia), |
|
ENTRY(s2l_iq), |
|
ENTRY(s2l_mbr), |
|
ENTRY(s2l_mrsmile), |
|
ENTRY(s2l_p3), |
|
ENTRY(s2l_p5), |
|
ENTRY(s2l_p4q2), |
|
}; |
|
#undef ENTRY |
|
|
|
static const int NUM_ENTRIES=int(sizeof(table)/sizeof(table[0])); |
|
|
|
int main(int argc,char **argv) |
|
{ |
|
if(argc>1) |
|
{ |
|
int N=1<<13; |
|
const char *s=argv[1]; |
|
float (*f)(float)=0; |
|
double (*ref)(double)=(s[0]=='s'?srgb2linear_ref:linear2srgb_ref); |
|
if(strcmp(s,"list")==0) |
|
{ |
|
for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i) |
|
printf("%s\n",table[i].name); |
|
return 0; |
|
} |
|
for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i) |
|
if(strcmp(s,table[i].name)==0) {f=table[i].function;break;} |
|
if(f==0) return 0; |
|
printf("x %s\n",s); |
|
for(int i=0;i<=N;++i) |
|
{ |
|
float t=float(i)/float(N); |
|
t=t*t; |
|
printf("%.17f %.17f\n",t,double(f(t))-ref(double(t))); |
|
} |
|
return 0; |
|
} |
|
#if defined(DETECT_CPU) && defined(__SSE2__) && defined(__GNUC__) |
|
// Makeshift GCC-only x86 CPU detection. |
|
// Output similar to system("cat /proc/cpuinfo | grep 'model name' | uniq"); |
|
printf("CPU: "); |
|
for(int i=2;i<5;++i) |
|
{ |
|
unsigned reg[4]={0x80000000u+i,0,0,0}; |
|
char s[17]={0}; |
|
__asm__ volatile ("cpuid":"+a"(reg[0]),"+b"(reg[1]),"+c"(reg[2]),"+d"(reg[3])); |
|
memcpy(s,reg,sizeof(reg)); |
|
printf("%s",s); |
|
} |
|
printf("\n"); |
|
#endif |
|
const char *head="---------------------------------------------------"; |
|
printf("Testing Linear -> sRGB:\n"); |
|
printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns"); |
|
printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head); |
|
TEST(l2s_ref); |
|
TEST(l2s_refopt); |
|
TEST(l2s_identity); |
|
TEST(l2s_sqrt); |
|
TEST(l2s_22); |
|
TEST(l2s_tayloia_1); |
|
TEST(l2s_tayloia_2); |
|
TEST(l2s_yfujii_1); |
|
TEST(l2s_yfujii_2); |
|
TEST(l2s_mrsmile); |
|
TEST(l2s_p3); |
|
TEST(l2s_p2q2); |
|
TEST(l2s_p3q2); |
|
TEST(l2s_p4q4); |
|
printf("\n"); |
|
printf("Testing sRGB -> Linear:\n"); |
|
printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns"); |
|
printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head); |
|
TEST(s2l_ref); |
|
TEST(s2l_refopt); |
|
TEST(s2l_identity); |
|
TEST(s2l_sqr); |
|
TEST(s2l_22); |
|
TEST(s2l_tayloia); |
|
TEST(s2l_iq); |
|
TEST(s2l_mbr); |
|
TEST(s2l_mrsmile); |
|
TEST(s2l_p3); |
|
TEST(s2l_p5); |
|
TEST(s2l_p4q2); |
|
printf("\n"); |
|
printf("Testing round-trip:\n"); |
|
printf("%-17s |%-17s |%-20s|%-20s\n","Linear->sRGB","sRGB->Linear","Linear->sRGB->Linear","sRGB->Linear->sRGB"); |
|
printf("%-17.17s-|%-17.17s-|%-20.20s|%-20.20s\n",head,head,head,head); |
|
ROUNDTRIP(l2s_ref ,s2l_ref); |
|
ROUNDTRIP(l2s_sqrt ,s2l_sqr); |
|
ROUNDTRIP(l2s_tayloia_1 ,s2l_tayloia); |
|
ROUNDTRIP(l2s_tayloia_2 ,s2l_tayloia); |
|
ROUNDTRIP(l2s_mrsmile ,s2l_mrsmile); |
|
ROUNDTRIP(l2s_p3 ,s2l_p3); |
|
ROUNDTRIP(l2s_p2q2 ,s2l_p3); |
|
ROUNDTRIP(l2s_p3q2 ,s2l_p5); |
|
ROUNDTRIP(l2s_p4q4 ,s2l_p4q2); |
|
printf("\n"); |
|
|
|
// NOTE: do take these with a grain of salt. |
|
// They can depend, among other things, on |
|
// * Computing in x87 vs SSE |
|
// * Rounding direction |
|
// * FP_CONTRACT |
|
printf("Testing properties:\n"); |
|
printf("%-13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n", |
|
"Function","f(0)","f(1)","min(f)","max(f)","Monotone"); |
|
printf("%-13.13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n", |
|
head,head,head,head,head,"--------"); |
|
PROPERTIES(l2s_ref); |
|
PROPERTIES(l2s_refopt); |
|
PROPERTIES(l2s_identity); |
|
PROPERTIES(l2s_sqrt); |
|
PROPERTIES(l2s_22); |
|
PROPERTIES(l2s_tayloia_1); |
|
PROPERTIES(l2s_tayloia_2); |
|
PROPERTIES(l2s_yfujii_1); |
|
PROPERTIES(l2s_yfujii_2); |
|
PROPERTIES(l2s_mrsmile); |
|
PROPERTIES(l2s_p3); |
|
PROPERTIES(l2s_p2q2); |
|
PROPERTIES(l2s_p3q2); |
|
PROPERTIES(l2s_p4q4); |
|
PROPERTIES(s2l_ref); |
|
PROPERTIES(s2l_refopt); |
|
PROPERTIES(s2l_identity); |
|
PROPERTIES(s2l_sqr); |
|
PROPERTIES(s2l_22); |
|
PROPERTIES(s2l_tayloia); |
|
PROPERTIES(s2l_iq); |
|
PROPERTIES(s2l_mbr); |
|
PROPERTIES(s2l_mrsmile); |
|
PROPERTIES(s2l_p3); |
|
PROPERTIES(s2l_p5); |
|
PROPERTIES(s2l_p4q2); |
|
return 0; |
|
} |