fp64/results.md

## results.md

      
    Raw
  

              results.md
            
          
    Update: added a few more functions.
Results, on AMD EPYC 7571.
Note: time for _identity functions is essentially pure
overhead of the test setup.
sRGB→Linear:


Function
Abs. error (Linf)
Time, ns


s2l_ref
0.00000023284
18.931


s2l_refopt
0.00000009856
17.465


s2l_identity
0.28713593804
1.223


s2l_sqr
0.04247703407
1.263


s2l_22
0.00852770523
15.938


s2l_tayloia
0.00167073848
1.388


s2l_iq
0.00680385671
7.545


s2l_mbr
0.00416070167
1.974


s2l_mrsmile
0.00724860203
2.865


s2l_p3
0.00133785212
1.697


s2l_p5
0.00030062660
3.570


s2l_p4q2
0.00004142101
3.432


Linear→sRGB:


Function
Abs. error (Linf)
Time, ns


l2s_ref
0.00000014871
18.366


l2s_refopt
0.00000009458
18.145


l2s_identity
0.28713593803
1.227


l2s_sqrt
0.03735434676
1.973


l2s_22
0.03352387373
15.901


l2s_tayloia_1
0.04156852089
5.926


l2s_tayloia_2
0.03629874758
5.920


l2s_yfujii_1
0.02479406936
1.974


l2s_yfujii_2
0.00733280893
1.888


l2s_mrsmile
0.00390320955
2.818


l2s_p3
0.08428821274
1.387


l2s_p2q2
0.00605785567
2.583


l2s_p3q2
0.00161903180
3.011


l2s_p4q4
0.00065633937
4.241


Round-trip:


Linear→sRGB
sRGB→Linear
Linear→sRGB→Linear
sRGB→Linear→sRGB


l2s_ref
s2l_ref
0.00000017881
0.00000005960


l2s_sqrt
s2l_sqr
0.00000005960
0.00000000000


l2s_tayloia_1
s2l_tayloia
0.00224190950
0.03975261003


l2s_tayloia_2
s2l_tayloia
0.00159212621
0.03461691365


l2s_mrsmile
s2l_mrsmile
0.00000041723
0.00000119628


l2s_p3
s2l_p3
0.16314363480
0.08558563888


l2s_p2q2
s2l_p3
0.00919800997
0.01905959100


l2s_p3q2
s2l_p5
0.00318956375
0.00248982012


l2s_p4q4
s2l_p4q2
0.00136017799
0.00067627430


Note: for sRGB→Linear→sRGB the error of sRGB→Linear can be magnified
by up to 12.92 by the subsequent Linear→sRGB transformation.
Note: for 8-bit color channels you might want to keep
sRGB→Linear→sRGB the error to below 0.5/255≈0.00196. This error can
be pretty noticeable in some circumstances (e.g. when blending
fully transparent part of the sprite actually the changes
underlying color).
Numerical properties:


Function
f(0)
f(1)
min(f)
max(f)
Monotone


l2s_ref
0
0.99999994
0
0.99999994
Yes


l2s_refopt
0
1
0
1
Yes


l2s_identity
0
1
0
1
Yes


l2s_sqrt
0
1
0
1
Yes


l2s_22
0
1
0
1
Yes


l2s_tayloia_1
0
1
-0.0414393209
1
No


l2s_tayloia_2
0
1
-0.0361695476
1
No


l2s_yfujii_1
0
1
0
1
Yes


l2s_yfujii_2
0
1
0
1.00003147
No


l2s_mrsmile
0
1
0
1
Yes


l2s_p3
0
1
0
1
Yes


l2s_p2q2
0
1
0
1
Yes


l2s_p3q2
0
1
0
1
Yes


l2s_p4q4
0
1
0
1
Yes


s2l_ref
0
1
0
1
Yes


s2l_refopt
0
1
0
1
Yes


s2l_identity
0
1
0
1
Yes


s2l_sqr
0
1
0
1
Yes


s2l_22
0
1
0
1
Yes


s2l_tayloia
0
1
0
1
Yes


s2l_iq
0
1.00000048
-1.76319119e-07
1.00000048
No


s2l_mbr
0
1
0
1
Yes


s2l_mrsmile
0
1
0
1
Yes


s2l_p3
0
1
0
1
Yes


s2l_p5
0
1
0
1
Yes


s2l_p4q2
0
1
0
1
Yes


Note: do take these with a grain of salt. They can and will change depending
on the numeric environment (e.g. _mrsmile functions fail 0/1 on x87).
The specific conditions of the test were:

IEEE-754
Round-to-nearest-ties-to-even
Evaluated in stated precision (i.e. not on x87)
No FMA (FP_CONTRACT OFF)

Error plots:
sRGB→Linear


Linear→sRGB (also with logscale x, to magnify the linear region)


Linear
Logscale


On the non-linear region only (i.e. [0.0031308;1])


## srgb.cpp
// Public Domain under http://unlicense.org, see link for details.
// Except individual functions being tested are
// copyright by their respective authors.

// Comparison of several sRGB<->Linear approximations.

#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

// Reference versions, in double precision.
// The accuracy of the other versions is
// measured against these.
// NOTE: these functions (straight from
// https://en.wikipedia.org/wiki/SRGB) have
// discontinuities larger than
// machine epsilon of float32.
// Specifically:
//   linear->srgb: -2.9e-08 ( -7.7 float32 ULP)
//   srgb->linear: +2.3e-09 (+10.0 float32 ULP)
static inline double linear2srgb_ref(double x)
{
    if(x<=0.0031308) return 12.92*x;
    else return 1.055*pow(x,1.0f/2.4)-0.055;
}

static inline double srgb2linear_ref(double x)
{
    if(x<=0.04045) return x/12.92;
    else return pow((x+0.055)/1.055,2.4);
}

//==============================================================================

// Reference single-precision versions.
static inline float l2s_ref(float x)
{
    if(x<=0.0031308f) return 12.92f*x;
    else return 1.055f*powf(x,1.0f/2.4f)-0.055f;
}

static inline float s2l_ref(float x)
{
    if(x<=0.04045f) return x/12.92f;
    else return powf((x+0.055f)/1.055f,2.4f);
}

// Slightly optimized versions of the above,
// with precomputed constants.
static inline float l2s_refopt(float x)
{
    if(x<=0.0031308f) return 12.92f*x;
    else return 1.055f*powf(x,0.41666666f)-0.0549999475f;
}

static inline float s2l_refopt(float x)
{
    if(x<=0.04045f) return x*0.0773993808f;
    else return powf(x*0.94786729857f+0.0521327014f,2.4f);
}

// Identity function.
static inline float l2s_identity(float x)
{
    return x;
}

static inline float s2l_identity(float x)
{
    return x;
}

// Square and square root.
static inline float l2s_sqrt(float x)
{
    return sqrtf(x);
}

static inline float s2l_sqr(float x)
{
    return x*x;
}

// Gamma 2.2 approximation.
static inline float l2s_22(float x)
{
    return powf(x,1.0f/2.2f);
}

static inline float s2l_22(float x)
{
    return powf(x,2.2f);
}

// From http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
// Still the first google hit for "srgb approximation" as
// of this writing.
static inline float l2s_tayloia_1(float x)
{
    float S1=sqrtf(x);
    float S2=sqrtf(S1);
    float S3=sqrtf(S2);
    return 0.585122381f*S1+0.783140355f*S2-0.368262736f*S3;
}

static inline float l2s_tayloia_2(float x)
{
    float S1=sqrtf(x);
    float S2=sqrtf(S1);
    float S3=sqrtf(S2);
    return 0.662002687f*S1+0.684122060f*S2-0.323583601f*S3-0.0225411470f*x;
}

static inline float s2l_tayloia(float x)
{
    return x*(x*(x*0.305306011f+0.682171111f)+0.012522878f);
}

// From https://mimosa-pudica.net/fast-gamma/
// NOTE: only provides linear->sRGB.
static inline float l2s_yfujii_1(float x)
{
    return 1.138f*sqrtf(x)-0.138f*x;
}

#if defined(__GNUC__) && defined(__SSE2__)
#include <emmintrin.h>
static inline float rsqrt(float x)
{
    return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
}
#else
static inline float rsqrt(float x)
{
    return 1.0f/sqrtf(x);
}
#endif

static inline float l2s_yfujii_2(float x)
{
    float a=0.00279491f;
    float b=1.15907984f;
    float c=b*rsqrt(1.0f+a)-1.0f; // Lets hope compiler computes it only once.
    return (b*rsqrt(x+a)-c)*x;
}

// From https://www.shadertoy.com/view/WlG3zG
static inline float s2l_iq(float x)
{
#if __STDC_IEC_60559_FUNCS__>=201506L
#define pow2m1f(x) exp2m1f(x);
#else
#define pow2m1f(x) (exp2f(x)-1.0f)
#endif
    return (pow2m1f(x)-x*0.693147f)*3.258891f;
}

// From http://marc-b-reynolds.github.io/math/2019/12/10/GammaRamp.html
static inline float s2l_mbr(float x)
{
    return (0.5f*x*x)*(sqrtf(x)+1.0f);
}

/*
static inline float s2l_mbr_2(float x)
{
    //return (x*x)*(0.17758834362030029296875f+0.82241165637969970703125f*sqrtf(x));
    //return (x*x)*(0.564024388790130615234375f+0.435975611209869384765625f*sqrtf(x));
    return x*x*(0.75f+0.25f*x);
}

static inline float s2l_mbr_3(float x)
{
    return 0x1.117542p-12f+x*(-0x1.6479a8p-6f+x*(0x1.01ea1cp-1f+x*(0x1.554462p-1f+x*(-0x1.313c38p-3f))));
}
*/

// Suggested by https://github.com/MrSmile
// in https://gamedev.ru/flame/forum/?id=226959
// Are analytic inverse of each other.
static inline float l2s_mrsmile(float x)
{
    return sqrtf(0.0034312649f+x*(2.3263988f+x*1.530326f))-0.906151f*x-0.058577f;
}

static inline float s2l_mrsmile(float x)
{
    // NOTE: in the original version the constant term was
    // 1.5652767f, spoiling 0->0 and 1->1.
    return sqrtf(2.450091f+x*(-3.8346549f+x*3.0424711f))+1.2776792f*x-1.5652766f;
}

// Developed for use in https://github.com/fp64/dbc_blit
// Public domain.
static inline float l2s_p3(float x)
{
    return  x*(3.68945826f+x*(-5.87001922f+x*3.18056096f));
}

static inline float l2s_p2q2(float x)
{
    float p=x*(13.044282f+x*65.4774821f);
    float q=1.0f+x*(42.6165181f+x*34.905246f);
    return p/q;
}

static inline float l2s_p3q2(float x)
{
    float p=x*(15.2012147f+x*(192.311612f+x*67.1571239f));
    float q=1.0f+x*(70.6761146f+x*202.993836f);
    return p/q;
}

static inline float l2s_p4q4(float x)
{
    float p=x*(8.51112614f+x*(21407.7188f+x*(569310.187f+x*(915832.184f))));
    float q=1.0f+x*(1249.83141f+x*(135537.797f+x*(989102.054f+x*(380667.918f))));
    return p/q;
}

static inline float s2l_p3(float x)
{
    return x*(0.0181434661f+x*(0.667441605f+x*0.314414929f));
}

static inline float s2l_p5(float x)
{
    return x*(0.0553287698+x*(0.378519491+x*(1.00770205+x*(-0.657329466+x*0.215779156))));
}

static inline float s2l_p4q2(float x)
{
    float p=x*(0.0893605858f+x*(0.894345379f+x*(14.8331415f+x*15.3162396f)));
    float q=1.0f+x*(23.4190713f+x*6.71401579f);
    return p/q;
}

//==============================================================================

// Tests and benchmarks.
static double test_accuracy(float (*f)(float),double (*ref)(double))
{
    double e=0.0;
    const int N=100000;
    for(int i=0;i<=N;++i)
    {
        float t=float(i)/float(N);
        double x=ref(double(t));
        double y=double(f(t));
        double d=fabs(x-y);
        e=fmax(e,d);
    }
    return e;
}

template<float (*f)(float)>
static double test_speed()
{
    static const int M=2500;
    static const int N=1024;
    double t=double(clock());
    float buf[M];
    for(int i=0;i<N;++i)
    {
        float x=0.0f;
        float dx=1.0f/float(M);
        for(int j=0;j<M;++j)
        {
            buf[j]=f(x);
            x+=dx;
        }
        // Make sure compiler does not optimize out the loop.
        volatile float y=buf[i%M];
        (void)y;
    }
    t=double(clock())-t;
    return t/double(CLOCKS_PER_SEC)/double(M)/double(N);
}

static void roundtrip(float (*linear2srgb)(float),float (*srgb2linear)(float),float *ret)
{
    float lsl=0.0f,sls=0.0f;
    for(int i=0;i<256;++i)
    {
        float x=float(i)/255.0f;
        lsl=fmaxf(lsl,fabsf(srgb2linear(linear2srgb(x))-x));
        sls=fmaxf(sls,fabsf(linear2srgb(srgb2linear(x))-x));
    }
    ret[0]=lsl;
    ret[1]=sls;
}

static void test_properties(float (*f)(float),float *ret)
{
    const int N=100000;
    float v=0.0f;
    ret[0]=f(0.0f);
    ret[1]=f(1.0f);
    ret[2]=1.0f;
    ret[3]=0.0f;
    ret[4]=1.0f;
    for(int i=0;i<=N;++i)
    {
        float t=float(i)/float(N);
        float x=f(t);
        ret[2]=fmin(ret[2],x);
        ret[3]=fmax(ret[3],x);
        if(i>0&&x<v) ret[4]=0.0f;
        v=x;
    }
}

#define TEST(f) do {\
        double (*ref)(double)=/*HACK*/(#f[0]=='l'?linear2srgb_ref:srgb2linear_ref);\
        printf("%-20s | %17.11f | %11.3f\n",#f,test_accuracy(f,ref),1.0e+9*test_speed<f>());\
    } while (0)

#define ROUNDTRIP(linear2srgb,srgb2linear) do {\
        float x[2];\
        roundtrip(linear2srgb,srgb2linear,x);\
        printf("%-17s |%-17s | %18.11f | %18.11f\n",#linear2srgb,#srgb2linear,x[0],x[1]);\
    } while (0)

#define PROPERTIES(f) do {\
        float x[5];\
        test_properties(f,x);\
        printf("%-13s|%7.3g|%15.9g|%15.9g|%15.9g| %s\n"\
            ,#f\
            ,x[0],x[1],x[2],x[3],(x[4]==1.0f?"Yes":"No"));\
    } while (0)

struct entry
{
    const char *name;
    float (*function)(float);
};

#define ENTRY(f) {#f,f}
static entry table[]=
{
    ENTRY(l2s_ref),
    ENTRY(l2s_refopt),
    ENTRY(l2s_identity),
    ENTRY(l2s_sqrt),
    ENTRY(l2s_22),
    ENTRY(l2s_tayloia_1),
    ENTRY(l2s_tayloia_2),
    ENTRY(l2s_yfujii_1),
    ENTRY(l2s_yfujii_2),
    ENTRY(l2s_mrsmile),
    ENTRY(l2s_p3),
    ENTRY(l2s_p2q2),
    ENTRY(l2s_p3q2),
    ENTRY(l2s_p4q4),
    ENTRY(s2l_ref),
    ENTRY(s2l_refopt),
    ENTRY(s2l_identity),
    ENTRY(s2l_sqr),
    ENTRY(s2l_22),
    ENTRY(s2l_tayloia),
    ENTRY(s2l_iq),
    ENTRY(s2l_mbr),
    ENTRY(s2l_mrsmile),
    ENTRY(s2l_p3),
    ENTRY(s2l_p5),
    ENTRY(s2l_p4q2),
};
#undef ENTRY

static const int NUM_ENTRIES=int(sizeof(table)/sizeof(table[0]));

int main(int argc,char **argv)
{
    if(argc>1)
    {
        int N=1<<13;
        const char *s=argv[1];
        float (*f)(float)=0;
        double (*ref)(double)=(s[0]=='s'?srgb2linear_ref:linear2srgb_ref);
        if(strcmp(s,"list")==0)
        {
            for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
                printf("%s\n",table[i].name);
            return 0;
        }
        for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
            if(strcmp(s,table[i].name)==0) {f=table[i].function;break;}
        if(f==0) return 0;
        printf("x           %s\n",s);
        for(int i=0;i<=N;++i)
        {
            float t=float(i)/float(N);
            t=t*t;
            printf("%.17f %.17f\n",t,double(f(t))-ref(double(t)));
        }
        return 0;
    }
#if defined(DETECT_CPU) && defined(__SSE2__) && defined(__GNUC__)
    // Makeshift GCC-only x86 CPU detection.
    // Output similar to system("cat /proc/cpuinfo | grep 'model name' | uniq");
    printf("CPU: ");
    for(int i=2;i<5;++i)
    {
        unsigned reg[4]={0x80000000u+i,0,0,0};
        char s[17]={0};
        __asm__ volatile ("cpuid":"+a"(reg[0]),"+b"(reg[1]),"+c"(reg[2]),"+d"(reg[3]));
        memcpy(s,reg,sizeof(reg));
        printf("%s",s);
    }
    printf("\n");
#endif
    const char *head="---------------------------------------------------";
    printf("Testing Linear -> sRGB:\n");
    printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns");
    printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head);
    TEST(l2s_ref);
    TEST(l2s_refopt);
    TEST(l2s_identity);
    TEST(l2s_sqrt);
    TEST(l2s_22);
    TEST(l2s_tayloia_1);
    TEST(l2s_tayloia_2);
    TEST(l2s_yfujii_1);
    TEST(l2s_yfujii_2);
    TEST(l2s_mrsmile);
    TEST(l2s_p3);
    TEST(l2s_p2q2);
    TEST(l2s_p3q2);
    TEST(l2s_p4q4);
    printf("\n");
    printf("Testing sRGB -> Linear:\n");
    printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns");
    printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head);
    TEST(s2l_ref);
    TEST(s2l_refopt);
    TEST(s2l_identity);
    TEST(s2l_sqr);
    TEST(s2l_22);
    TEST(s2l_tayloia);
    TEST(s2l_iq);
    TEST(s2l_mbr);
    TEST(s2l_mrsmile);
    TEST(s2l_p3);
    TEST(s2l_p5);
    TEST(s2l_p4q2);
    printf("\n");
    printf("Testing round-trip:\n");
    printf("%-17s |%-17s |%-20s|%-20s\n","Linear->sRGB","sRGB->Linear","Linear->sRGB->Linear","sRGB->Linear->sRGB");
    printf("%-17.17s-|%-17.17s-|%-20.20s|%-20.20s\n",head,head,head,head);
    ROUNDTRIP(l2s_ref       ,s2l_ref);
    ROUNDTRIP(l2s_sqrt      ,s2l_sqr);
    ROUNDTRIP(l2s_tayloia_1 ,s2l_tayloia);
    ROUNDTRIP(l2s_tayloia_2 ,s2l_tayloia);
    ROUNDTRIP(l2s_mrsmile   ,s2l_mrsmile);
    ROUNDTRIP(l2s_p3        ,s2l_p3);
    ROUNDTRIP(l2s_p2q2      ,s2l_p3);
    ROUNDTRIP(l2s_p3q2      ,s2l_p5);
    ROUNDTRIP(l2s_p4q4      ,s2l_p4q2);
    printf("\n");

    // NOTE: do take these with a grain of salt.
    // They can depend, among other things, on
    //   * Computing in x87 vs SSE
    //   * Rounding direction
    //   * FP_CONTRACT
    printf("Testing properties:\n");
    printf("%-13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n",
        "Function","f(0)","f(1)","min(f)","max(f)","Monotone");
    printf("%-13.13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n",
        head,head,head,head,head,"--------");
    PROPERTIES(l2s_ref);
    PROPERTIES(l2s_refopt);
    PROPERTIES(l2s_identity);
    PROPERTIES(l2s_sqrt);
    PROPERTIES(l2s_22);
    PROPERTIES(l2s_tayloia_1);
    PROPERTIES(l2s_tayloia_2);
    PROPERTIES(l2s_yfujii_1);
    PROPERTIES(l2s_yfujii_2);
    PROPERTIES(l2s_mrsmile);
    PROPERTIES(l2s_p3);
    PROPERTIES(l2s_p2q2);
    PROPERTIES(l2s_p3q2);
    PROPERTIES(l2s_p4q4);
    PROPERTIES(s2l_ref);
    PROPERTIES(s2l_refopt);
    PROPERTIES(s2l_identity);
    PROPERTIES(s2l_sqr);
    PROPERTIES(s2l_22);
    PROPERTIES(s2l_tayloia);
    PROPERTIES(s2l_iq);
    PROPERTIES(s2l_mbr);
    PROPERTIES(s2l_mrsmile);
    PROPERTIES(s2l_p3);
    PROPERTIES(s2l_p5);
    PROPERTIES(s2l_p4q2);
    return 0;
}
Function	Abs. error (Linf)	Time, ns
s2l_ref	0.00000023284	18.931
s2l_refopt	0.00000009856	17.465
s2l_identity	0.28713593804	1.223
s2l_sqr	0.04247703407	1.263
s2l_22	0.00852770523	15.938
s2l_tayloia	0.00167073848	1.388
s2l_iq	0.00680385671	7.545
s2l_mbr	0.00416070167	1.974
s2l_mrsmile	0.00724860203	2.865
s2l_p3	0.00133785212	1.697
s2l_p5	0.00030062660	3.570
s2l_p4q2	0.00004142101	3.432
Function	Abs. error (Linf)	Time, ns
l2s_ref	0.00000014871	18.366
l2s_refopt	0.00000009458	18.145
l2s_identity	0.28713593803	1.227
l2s_sqrt	0.03735434676	1.973
l2s_22	0.03352387373	15.901
l2s_tayloia_1	0.04156852089	5.926
l2s_tayloia_2	0.03629874758	5.920
l2s_yfujii_1	0.02479406936	1.974
l2s_yfujii_2	0.00733280893	1.888
l2s_mrsmile	0.00390320955	2.818
l2s_p3	0.08428821274	1.387
l2s_p2q2	0.00605785567	2.583
l2s_p3q2	0.00161903180	3.011
l2s_p4q4	0.00065633937	4.241
Linear→sRGB	sRGB→Linear	Linear→sRGB→Linear	sRGB→Linear→sRGB
l2s_ref	s2l_ref	0.00000017881	0.00000005960
l2s_sqrt	s2l_sqr	0.00000005960	0.00000000000
l2s_tayloia_1	s2l_tayloia	0.00224190950	0.03975261003
l2s_tayloia_2	s2l_tayloia	0.00159212621	0.03461691365
l2s_mrsmile	s2l_mrsmile	0.00000041723	0.00000119628
l2s_p3	s2l_p3	0.16314363480	0.08558563888
l2s_p2q2	s2l_p3	0.00919800997	0.01905959100
l2s_p3q2	s2l_p5	0.00318956375	0.00248982012
l2s_p4q4	s2l_p4q2	0.00136017799	0.00067627430
Function	f(1)	min(f)	max(f)	Monotone
l2s_ref	0.99999994	0	0.99999994	Yes
l2s_refopt	1	0	1	Yes
l2s_identity	1	0	1	Yes
l2s_sqrt	1	0	1	Yes
l2s_22	1	0	1	Yes
l2s_tayloia_1	1	-0.0414393209	1	No
l2s_tayloia_2	1	-0.0361695476	1	No
l2s_yfujii_1	1	0	1	Yes
l2s_yfujii_2	1	0	1.00003147	No
l2s_mrsmile	1	0	1	Yes
l2s_p3	1	0	1	Yes
l2s_p2q2	1	0	1	Yes
l2s_p3q2	1	0	1	Yes
l2s_p4q4	1	0	1	Yes
s2l_ref	1	0	1	Yes
s2l_refopt	1	0	1	Yes
s2l_identity	1	0	1	Yes
s2l_sqr	1	0	1	Yes
s2l_22	1	0	1	Yes
s2l_tayloia	1	0	1	Yes
s2l_iq	1.00000048	-1.76319119e-07	1.00000048	No
s2l_mbr	1	0	1	Yes
s2l_mrsmile	1	0	1	Yes
s2l_p3	1	0	1	Yes
s2l_p5	1	0	1	Yes
s2l_p4q2	1	0	1	Yes
	// Public Domain under http://unlicense.org, see link for details.
	// Except individual functions being tested are
	// copyright by their respective authors.

	// Comparison of several sRGB<->Linear approximations.

	#include <stdio.h>
	#include <math.h>
	#include <time.h>
	#include <stdlib.h>
	#include <string.h>

	// Reference versions, in double precision.
	// The accuracy of the other versions is
	// measured against these.
	// NOTE: these functions (straight from
	// https://en.wikipedia.org/wiki/SRGB) have
	// discontinuities larger than
	// machine epsilon of float32.
	// Specifically:
	// linear->srgb: -2.9e-08 ( -7.7 float32 ULP)
	// srgb->linear: +2.3e-09 (+10.0 float32 ULP)
	static inline double linear2srgb_ref(double x)
	{
	if(x<=0.0031308) return 12.92*x;
	else return 1.055*pow(x,1.0f/2.4)-0.055;
	}

	static inline double srgb2linear_ref(double x)
	{
	if(x<=0.04045) return x/12.92;
	else return pow((x+0.055)/1.055,2.4);
	}

	//==============================================================================

	// Reference single-precision versions.
	static inline float l2s_ref(float x)
	{
	if(x<=0.0031308f) return 12.92f*x;
	else return 1.055f*powf(x,1.0f/2.4f)-0.055f;
	}

	static inline float s2l_ref(float x)
	{
	if(x<=0.04045f) return x/12.92f;
	else return powf((x+0.055f)/1.055f,2.4f);
	}

	// Slightly optimized versions of the above,
	// with precomputed constants.
	static inline float l2s_refopt(float x)
	{
	if(x<=0.0031308f) return 12.92f*x;
	else return 1.055f*powf(x,0.41666666f)-0.0549999475f;
	}

	static inline float s2l_refopt(float x)
	{
	if(x<=0.04045f) return x*0.0773993808f;
	else return powf(x*0.94786729857f+0.0521327014f,2.4f);
	}

	// Identity function.
	static inline float l2s_identity(float x)
	{
	return x;
	}

	static inline float s2l_identity(float x)
	{
	return x;
	}

	// Square and square root.
	static inline float l2s_sqrt(float x)
	{
	return sqrtf(x);
	}

	static inline float s2l_sqr(float x)
	{
	return x*x;
	}

	// Gamma 2.2 approximation.
	static inline float l2s_22(float x)
	{
	return powf(x,1.0f/2.2f);
	}

	static inline float s2l_22(float x)
	{
	return powf(x,2.2f);
	}

	// From http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
	// Still the first google hit for "srgb approximation" as
	// of this writing.
	static inline float l2s_tayloia_1(float x)
	{
	float S1=sqrtf(x);
	float S2=sqrtf(S1);
	float S3=sqrtf(S2);
	return 0.585122381fS1+0.783140355fS2-0.368262736f*S3;
	}

	static inline float l2s_tayloia_2(float x)
	{
	float S1=sqrtf(x);
	float S2=sqrtf(S1);
	float S3=sqrtf(S2);
	return 0.662002687fS1+0.684122060fS2-0.323583601fS3-0.0225411470fx;
	}

	static inline float s2l_tayloia(float x)
	{
	return x(x(x*0.305306011f+0.682171111f)+0.012522878f);
	}

	// From https://mimosa-pudica.net/fast-gamma/
	// NOTE: only provides linear->sRGB.
	static inline float l2s_yfujii_1(float x)
	{
	return 1.138fsqrtf(x)-0.138fx;
	}

	#if defined(__GNUC__) && defined(__SSE2__)
	#include <emmintrin.h>
	static inline float rsqrt(float x)
	{
	return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
	}
	#else
	static inline float rsqrt(float x)
	{
	return 1.0f/sqrtf(x);
	}
	#endif

	static inline float l2s_yfujii_2(float x)
	{
	float a=0.00279491f;
	float b=1.15907984f;
	float c=b*rsqrt(1.0f+a)-1.0f; // Lets hope compiler computes it only once.
	return (brsqrt(x+a)-c)x;
	}

	// From https://www.shadertoy.com/view/WlG3zG
	static inline float s2l_iq(float x)
	{
	#if __STDC_IEC_60559_FUNCS__>=201506L
	#define pow2m1f(x) exp2m1f(x);
	#else
	#define pow2m1f(x) (exp2f(x)-1.0f)
	#endif
	return (pow2m1f(x)-x0.693147f)3.258891f;
	}

	// From http://marc-b-reynolds.github.io/math/2019/12/10/GammaRamp.html
	static inline float s2l_mbr(float x)
	{
	return (0.5fxx)*(sqrtf(x)+1.0f);
	}

	/*
	static inline float s2l_mbr_2(float x)
	{
	//return (xx)(0.17758834362030029296875f+0.82241165637969970703125f*sqrtf(x));
	//return (xx)(0.564024388790130615234375f+0.435975611209869384765625f*sqrtf(x));
	return xx(0.75f+0.25f*x);
	}

	static inline float s2l_mbr_3(float x)
	{
	return 0x1.117542p-12f+x(-0x1.6479a8p-6f+x(0x1.01ea1cp-1f+x(0x1.554462p-1f+x(-0x1.313c38p-3f))));
	}
	*/

	// Suggested by https://github.com/MrSmile
	// in https://gamedev.ru/flame/forum/?id=226959
	// Are analytic inverse of each other.
	static inline float l2s_mrsmile(float x)
	{
	return sqrtf(0.0034312649f+x(2.3263988f+x1.530326f))-0.906151f*x-0.058577f;
	}

	static inline float s2l_mrsmile(float x)
	{
	// NOTE: in the original version the constant term was
	// 1.5652767f, spoiling 0->0 and 1->1.
	return sqrtf(2.450091f+x(-3.8346549f+x3.0424711f))+1.2776792f*x-1.5652766f;
	}

	// Developed for use in https://github.com/fp64/dbc_blit
	// Public domain.
	static inline float l2s_p3(float x)
	{
	return x(3.68945826f+x(-5.87001922f+x*3.18056096f));
	}

	static inline float l2s_p2q2(float x)
	{
	float p=x(13.044282f+x65.4774821f);
	float q=1.0f+x(42.6165181f+x34.905246f);
	return p/q;
	}

	static inline float l2s_p3q2(float x)
	{
	float p=x(15.2012147f+x(192.311612f+x*67.1571239f));
	float q=1.0f+x(70.6761146f+x202.993836f);
	return p/q;
	}

	static inline float l2s_p4q4(float x)
	{
	float p=x(8.51112614f+x(21407.7188f+x(569310.187f+x(915832.184f))));
	float q=1.0f+x(1249.83141f+x(135537.797f+x(989102.054f+x(380667.918f))));
	return p/q;
	}

	static inline float s2l_p3(float x)
	{
	return x(0.0181434661f+x(0.667441605f+x*0.314414929f));
	}

	static inline float s2l_p5(float x)
	{
	return x(0.0553287698+x(0.378519491+x(1.00770205+x(-0.657329466+x*0.215779156))));
	}

	static inline float s2l_p4q2(float x)
	{
	float p=x(0.0893605858f+x(0.894345379f+x(14.8331415f+x15.3162396f)));
	float q=1.0f+x(23.4190713f+x6.71401579f);
	return p/q;
	}

	//==============================================================================

	// Tests and benchmarks.
	static double test_accuracy(float (f)(float),double (ref)(double))
	{
	double e=0.0;
	const int N=100000;
	for(int i=0;i<=N;++i)
	{
	float t=float(i)/float(N);
	double x=ref(double(t));
	double y=double(f(t));
	double d=fabs(x-y);
	e=fmax(e,d);
	}
	return e;
	}

	template<float (*f)(float)>
	static double test_speed()
	{
	static const int M=2500;
	static const int N=1024;
	double t=double(clock());
	float buf[M];
	for(int i=0;i<N;++i)
	{
	float x=0.0f;
	float dx=1.0f/float(M);
	for(int j=0;j<M;++j)
	{
	buf[j]=f(x);
	x+=dx;
	}
	// Make sure compiler does not optimize out the loop.
	volatile float y=buf[i%M];
	(void)y;
	}
	t=double(clock())-t;
	return t/double(CLOCKS_PER_SEC)/double(M)/double(N);
	}

	static void roundtrip(float (linear2srgb)(float),float (srgb2linear)(float),float *ret)
	{
	float lsl=0.0f,sls=0.0f;
	for(int i=0;i<256;++i)
	{
	float x=float(i)/255.0f;
	lsl=fmaxf(lsl,fabsf(srgb2linear(linear2srgb(x))-x));
	sls=fmaxf(sls,fabsf(linear2srgb(srgb2linear(x))-x));
	}
	ret[0]=lsl;
	ret[1]=sls;
	}

	static void test_properties(float (f)(float),float ret)
	{
	const int N=100000;
	float v=0.0f;
	ret[0]=f(0.0f);
	ret[1]=f(1.0f);
	ret[2]=1.0f;
	ret[3]=0.0f;
	ret[4]=1.0f;
	for(int i=0;i<=N;++i)
	{
	float t=float(i)/float(N);
	float x=f(t);
	ret[2]=fmin(ret[2],x);
	ret[3]=fmax(ret[3],x);
	if(i>0&&x<v) ret[4]=0.0f;
	v=x;
	}
	}

	#define TEST(f) do {\
	double (ref)(double)=/HACK*/(#f[0]=='l'?linear2srgb_ref:srgb2linear_ref);\
	printf("%-20s \| %17.11f \| %11.3f\n",#f,test_accuracy(f,ref),1.0e+9*test_speed<f>());\
	} while (0)

	#define ROUNDTRIP(linear2srgb,srgb2linear) do {\
	float x[2];\
	roundtrip(linear2srgb,srgb2linear,x);\
	printf("%-17s \|%-17s \| %18.11f \| %18.11f\n",#linear2srgb,#srgb2linear,x[0],x[1]);\
	} while (0)

	#define PROPERTIES(f) do {\
	float x[5];\
	test_properties(f,x);\
	printf("%-13s\|%7.3g\|%15.9g\|%15.9g\|%15.9g\| %s\n"\
	,#f\
	,x[0],x[1],x[2],x[3],(x[4]==1.0f?"Yes":"No"));\
	} while (0)

	struct entry
	{
	const char *name;
	float (*function)(float);
	};

	#define ENTRY(f) {#f,f}
	static entry table[]=
	{
	ENTRY(l2s_ref),
	ENTRY(l2s_refopt),
	ENTRY(l2s_identity),
	ENTRY(l2s_sqrt),
	ENTRY(l2s_22),
	ENTRY(l2s_tayloia_1),
	ENTRY(l2s_tayloia_2),
	ENTRY(l2s_yfujii_1),
	ENTRY(l2s_yfujii_2),
	ENTRY(l2s_mrsmile),
	ENTRY(l2s_p3),
	ENTRY(l2s_p2q2),
	ENTRY(l2s_p3q2),
	ENTRY(l2s_p4q4),
	ENTRY(s2l_ref),
	ENTRY(s2l_refopt),
	ENTRY(s2l_identity),
	ENTRY(s2l_sqr),
	ENTRY(s2l_22),
	ENTRY(s2l_tayloia),
	ENTRY(s2l_iq),
	ENTRY(s2l_mbr),
	ENTRY(s2l_mrsmile),
	ENTRY(s2l_p3),
	ENTRY(s2l_p5),
	ENTRY(s2l_p4q2),
	};
	#undef ENTRY

	static const int NUM_ENTRIES=int(sizeof(table)/sizeof(table[0]));

	int main(int argc,char **argv)
	{
	if(argc>1)
	{
	int N=1<<13;
	const char *s=argv[1];
	float (*f)(float)=0;
	double (*ref)(double)=(s[0]=='s'?srgb2linear_ref:linear2srgb_ref);
	if(strcmp(s,"list")==0)
	{
	for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
	printf("%s\n",table[i].name);
	return 0;
	}
	for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
	if(strcmp(s,table[i].name)==0) {f=table[i].function;break;}
	if(f==0) return 0;
	printf("x %s\n",s);
	for(int i=0;i<=N;++i)
	{
	float t=float(i)/float(N);
	t=t*t;
	printf("%.17f %.17f\n",t,double(f(t))-ref(double(t)));
	}
	return 0;
	}
	#if defined(DETECT_CPU) && defined(__SSE2__) && defined(__GNUC__)
	// Makeshift GCC-only x86 CPU detection.
	// Output similar to system("cat /proc/cpuinfo \| grep 'model name' \| uniq");
	printf("CPU: ");
	for(int i=2;i<5;++i)
	{
	unsigned reg[4]={0x80000000u+i,0,0,0};
	char s[17]={0};
	__asm__ volatile ("cpuid":"+a"(reg[0]),"+b"(reg[1]),"+c"(reg[2]),"+d"(reg[3]));
	memcpy(s,reg,sizeof(reg));
	printf("%s",s);
	}
	printf("\n");
	#endif
	const char *head="---------------------------------------------------";
	printf("Testing Linear -> sRGB:\n");
	printf("%-20s \| %17s \| %11s\n","Function","Abs. error (Linf)","Time, ns");
	printf("%20.20s-\|-%17.17s-\|-%11.11s\n",head,head,head);
	TEST(l2s_ref);
	TEST(l2s_refopt);
	TEST(l2s_identity);
	TEST(l2s_sqrt);
	TEST(l2s_22);
	TEST(l2s_tayloia_1);
	TEST(l2s_tayloia_2);
	TEST(l2s_yfujii_1);
	TEST(l2s_yfujii_2);
	TEST(l2s_mrsmile);
	TEST(l2s_p3);
	TEST(l2s_p2q2);
	TEST(l2s_p3q2);
	TEST(l2s_p4q4);
	printf("\n");
	printf("Testing sRGB -> Linear:\n");
	printf("%-20s \| %17s \| %11s\n","Function","Abs. error (Linf)","Time, ns");
	printf("%20.20s-\|-%17.17s-\|-%11.11s\n",head,head,head);
	TEST(s2l_ref);
	TEST(s2l_refopt);
	TEST(s2l_identity);
	TEST(s2l_sqr);
	TEST(s2l_22);
	TEST(s2l_tayloia);
	TEST(s2l_iq);
	TEST(s2l_mbr);
	TEST(s2l_mrsmile);
	TEST(s2l_p3);
	TEST(s2l_p5);
	TEST(s2l_p4q2);
	printf("\n");
	printf("Testing round-trip:\n");
	printf("%-17s \|%-17s \|%-20s\|%-20s\n","Linear->sRGB","sRGB->Linear","Linear->sRGB->Linear","sRGB->Linear->sRGB");
	printf("%-17.17s-\|%-17.17s-\|%-20.20s\|%-20.20s\n",head,head,head,head);
	ROUNDTRIP(l2s_ref ,s2l_ref);
	ROUNDTRIP(l2s_sqrt ,s2l_sqr);
	ROUNDTRIP(l2s_tayloia_1 ,s2l_tayloia);
	ROUNDTRIP(l2s_tayloia_2 ,s2l_tayloia);
	ROUNDTRIP(l2s_mrsmile ,s2l_mrsmile);
	ROUNDTRIP(l2s_p3 ,s2l_p3);
	ROUNDTRIP(l2s_p2q2 ,s2l_p3);
	ROUNDTRIP(l2s_p3q2 ,s2l_p5);
	ROUNDTRIP(l2s_p4q4 ,s2l_p4q2);
	printf("\n");

	// NOTE: do take these with a grain of salt.
	// They can depend, among other things, on
	// * Computing in x87 vs SSE
	// * Rounding direction
	// * FP_CONTRACT
	printf("Testing properties:\n");
	printf("%-13s\|%-7.7s\|%-15.15s\|%-15.15s\|%-15.15s\|%s\n",
	"Function","f(0)","f(1)","min(f)","max(f)","Monotone");
	printf("%-13.13s\|%-7.7s\|%-15.15s\|%-15.15s\|%-15.15s\|%s\n",
	head,head,head,head,head,"--------");
	PROPERTIES(l2s_ref);
	PROPERTIES(l2s_refopt);
	PROPERTIES(l2s_identity);
	PROPERTIES(l2s_sqrt);
	PROPERTIES(l2s_22);
	PROPERTIES(l2s_tayloia_1);
	PROPERTIES(l2s_tayloia_2);
	PROPERTIES(l2s_yfujii_1);
	PROPERTIES(l2s_yfujii_2);
	PROPERTIES(l2s_mrsmile);
	PROPERTIES(l2s_p3);
	PROPERTIES(l2s_p2q2);
	PROPERTIES(l2s_p3q2);
	PROPERTIES(l2s_p4q4);
	PROPERTIES(s2l_ref);
	PROPERTIES(s2l_refopt);
	PROPERTIES(s2l_identity);
	PROPERTIES(s2l_sqr);
	PROPERTIES(s2l_22);
	PROPERTIES(s2l_tayloia);
	PROPERTIES(s2l_iq);
	PROPERTIES(s2l_mbr);
	PROPERTIES(s2l_mrsmile);
	PROPERTIES(s2l_p3);
	PROPERTIES(s2l_p5);
	PROPERTIES(s2l_p4q2);
	return 0;
	}