Skip to content

Instantly share code, notes, and snippets.

@fp64
Last active September 5, 2023 17:43
Show Gist options
  • Save fp64/5997d29b18e33da5c336012cff18e74b to your computer and use it in GitHub Desktop.
Save fp64/5997d29b18e33da5c336012cff18e74b to your computer and use it in GitHub Desktop.
Comparison of several sRGB↔Linear approximations

Update: added a few more functions.

Results, on AMD EPYC 7571.

Note: time for _identity functions is essentially pure overhead of the test setup.

sRGB→Linear:

Function Abs. error (Linf) Time, ns
s2l_ref 0.00000023284 18.931
s2l_refopt 0.00000009856 17.465
s2l_identity 0.28713593804 1.223
s2l_sqr 0.04247703407 1.263
s2l_22 0.00852770523 15.938
s2l_tayloia 0.00167073848 1.388
s2l_iq 0.00680385671 7.545
s2l_mbr 0.00416070167 1.974
s2l_mrsmile 0.00724860203 2.865
s2l_p3 0.00133785212 1.697
s2l_p5 0.00030062660 3.570
s2l_p4q2 0.00004142101 3.432

Linear→sRGB:

Function Abs. error (Linf) Time, ns
l2s_ref 0.00000014871 18.366
l2s_refopt 0.00000009458 18.145
l2s_identity 0.28713593803 1.227
l2s_sqrt 0.03735434676 1.973
l2s_22 0.03352387373 15.901
l2s_tayloia_1 0.04156852089 5.926
l2s_tayloia_2 0.03629874758 5.920
l2s_yfujii_1 0.02479406936 1.974
l2s_yfujii_2 0.00733280893 1.888
l2s_mrsmile 0.00390320955 2.818
l2s_p3 0.08428821274 1.387
l2s_p2q2 0.00605785567 2.583
l2s_p3q2 0.00161903180 3.011
l2s_p4q4 0.00065633937 4.241

Round-trip:

Linear→sRGB sRGB→Linear Linear→sRGB→Linear sRGB→Linear→sRGB
l2s_ref s2l_ref 0.00000017881 0.00000005960
l2s_sqrt s2l_sqr 0.00000005960 0.00000000000
l2s_tayloia_1 s2l_tayloia 0.00224190950 0.03975261003
l2s_tayloia_2 s2l_tayloia 0.00159212621 0.03461691365
l2s_mrsmile s2l_mrsmile 0.00000041723 0.00000119628
l2s_p3 s2l_p3 0.16314363480 0.08558563888
l2s_p2q2 s2l_p3 0.00919800997 0.01905959100
l2s_p3q2 s2l_p5 0.00318956375 0.00248982012
l2s_p4q4 s2l_p4q2 0.00136017799 0.00067627430

Note: for sRGB→Linear→sRGB the error of sRGB→Linear can be magnified by up to 12.92 by the subsequent Linear→sRGB transformation.

Note: for 8-bit color channels you might want to keep sRGB→Linear→sRGB the error to below 0.5/255≈0.00196. This error can be pretty noticeable in some circumstances (e.g. when blending fully transparent part of the sprite actually the changes underlying color).

Numerical properties:

Function f(0) f(1) min(f) max(f) Monotone
l2s_ref 0 0.99999994 0 0.99999994 Yes
l2s_refopt 0 1 0 1 Yes
l2s_identity 0 1 0 1 Yes
l2s_sqrt 0 1 0 1 Yes
l2s_22 0 1 0 1 Yes
l2s_tayloia_1 0 1 -0.0414393209 1 No
l2s_tayloia_2 0 1 -0.0361695476 1 No
l2s_yfujii_1 0 1 0 1 Yes
l2s_yfujii_2 0 1 0 1.00003147 No
l2s_mrsmile 0 1 0 1 Yes
l2s_p3 0 1 0 1 Yes
l2s_p2q2 0 1 0 1 Yes
l2s_p3q2 0 1 0 1 Yes
l2s_p4q4 0 1 0 1 Yes
s2l_ref 0 1 0 1 Yes
s2l_refopt 0 1 0 1 Yes
s2l_identity 0 1 0 1 Yes
s2l_sqr 0 1 0 1 Yes
s2l_22 0 1 0 1 Yes
s2l_tayloia 0 1 0 1 Yes
s2l_iq 0 1.00000048 -1.76319119e-07 1.00000048 No
s2l_mbr 0 1 0 1 Yes
s2l_mrsmile 0 1 0 1 Yes
s2l_p3 0 1 0 1 Yes
s2l_p5 0 1 0 1 Yes
s2l_p4q2 0 1 0 1 Yes

Note: do take these with a grain of salt. They can and will change depending on the numeric environment (e.g. _mrsmile functions fail 0/1 on x87). The specific conditions of the test were:

  • IEEE-754
  • Round-to-nearest-ties-to-even
  • Evaluated in stated precision (i.e. not on x87)
  • No FMA (FP_CONTRACT OFF)

Error plots:

sRGB→Linear

img_s2l_3 img_s2l_4

Linear→sRGB (also with logscale x, to magnify the linear region)

Linear Logscale
img_l2s_2
img_l2s_3 img_l2s_3 log
img_l2s_4 img_l2s_4 log

On the non-linear region only (i.e. [0.0031308;1])

img_l2s_5

// Public Domain under http://unlicense.org, see link for details.
// Except individual functions being tested are
// copyright by their respective authors.
// Comparison of several sRGB<->Linear approximations.
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
// Reference versions, in double precision.
// The accuracy of the other versions is
// measured against these.
// NOTE: these functions (straight from
// https://en.wikipedia.org/wiki/SRGB) have
// discontinuities larger than
// machine epsilon of float32.
// Specifically:
// linear->srgb: -2.9e-08 ( -7.7 float32 ULP)
// srgb->linear: +2.3e-09 (+10.0 float32 ULP)
static inline double linear2srgb_ref(double x)
{
if(x<=0.0031308) return 12.92*x;
else return 1.055*pow(x,1.0f/2.4)-0.055;
}
static inline double srgb2linear_ref(double x)
{
if(x<=0.04045) return x/12.92;
else return pow((x+0.055)/1.055,2.4);
}
//==============================================================================
// Reference single-precision versions.
static inline float l2s_ref(float x)
{
if(x<=0.0031308f) return 12.92f*x;
else return 1.055f*powf(x,1.0f/2.4f)-0.055f;
}
static inline float s2l_ref(float x)
{
if(x<=0.04045f) return x/12.92f;
else return powf((x+0.055f)/1.055f,2.4f);
}
// Slightly optimized versions of the above,
// with precomputed constants.
static inline float l2s_refopt(float x)
{
if(x<=0.0031308f) return 12.92f*x;
else return 1.055f*powf(x,0.41666666f)-0.0549999475f;
}
static inline float s2l_refopt(float x)
{
if(x<=0.04045f) return x*0.0773993808f;
else return powf(x*0.94786729857f+0.0521327014f,2.4f);
}
// Identity function.
static inline float l2s_identity(float x)
{
return x;
}
static inline float s2l_identity(float x)
{
return x;
}
// Square and square root.
static inline float l2s_sqrt(float x)
{
return sqrtf(x);
}
static inline float s2l_sqr(float x)
{
return x*x;
}
// Gamma 2.2 approximation.
static inline float l2s_22(float x)
{
return powf(x,1.0f/2.2f);
}
static inline float s2l_22(float x)
{
return powf(x,2.2f);
}
// From http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
// Still the first google hit for "srgb approximation" as
// of this writing.
static inline float l2s_tayloia_1(float x)
{
float S1=sqrtf(x);
float S2=sqrtf(S1);
float S3=sqrtf(S2);
return 0.585122381f*S1+0.783140355f*S2-0.368262736f*S3;
}
static inline float l2s_tayloia_2(float x)
{
float S1=sqrtf(x);
float S2=sqrtf(S1);
float S3=sqrtf(S2);
return 0.662002687f*S1+0.684122060f*S2-0.323583601f*S3-0.0225411470f*x;
}
static inline float s2l_tayloia(float x)
{
return x*(x*(x*0.305306011f+0.682171111f)+0.012522878f);
}
// From https://mimosa-pudica.net/fast-gamma/
// NOTE: only provides linear->sRGB.
static inline float l2s_yfujii_1(float x)
{
return 1.138f*sqrtf(x)-0.138f*x;
}
#if defined(__GNUC__) && defined(__SSE2__)
#include <emmintrin.h>
static inline float rsqrt(float x)
{
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
}
#else
static inline float rsqrt(float x)
{
return 1.0f/sqrtf(x);
}
#endif
static inline float l2s_yfujii_2(float x)
{
float a=0.00279491f;
float b=1.15907984f;
float c=b*rsqrt(1.0f+a)-1.0f; // Lets hope compiler computes it only once.
return (b*rsqrt(x+a)-c)*x;
}
// From https://www.shadertoy.com/view/WlG3zG
static inline float s2l_iq(float x)
{
#if __STDC_IEC_60559_FUNCS__>=201506L
#define pow2m1f(x) exp2m1f(x);
#else
#define pow2m1f(x) (exp2f(x)-1.0f)
#endif
return (pow2m1f(x)-x*0.693147f)*3.258891f;
}
// From http://marc-b-reynolds.github.io/math/2019/12/10/GammaRamp.html
static inline float s2l_mbr(float x)
{
return (0.5f*x*x)*(sqrtf(x)+1.0f);
}
/*
static inline float s2l_mbr_2(float x)
{
//return (x*x)*(0.17758834362030029296875f+0.82241165637969970703125f*sqrtf(x));
//return (x*x)*(0.564024388790130615234375f+0.435975611209869384765625f*sqrtf(x));
return x*x*(0.75f+0.25f*x);
}
static inline float s2l_mbr_3(float x)
{
return 0x1.117542p-12f+x*(-0x1.6479a8p-6f+x*(0x1.01ea1cp-1f+x*(0x1.554462p-1f+x*(-0x1.313c38p-3f))));
}
*/
// Suggested by https://github.com/MrSmile
// in https://gamedev.ru/flame/forum/?id=226959
// Are analytic inverse of each other.
static inline float l2s_mrsmile(float x)
{
return sqrtf(0.0034312649f+x*(2.3263988f+x*1.530326f))-0.906151f*x-0.058577f;
}
static inline float s2l_mrsmile(float x)
{
// NOTE: in the original version the constant term was
// 1.5652767f, spoiling 0->0 and 1->1.
return sqrtf(2.450091f+x*(-3.8346549f+x*3.0424711f))+1.2776792f*x-1.5652766f;
}
// Developed for use in https://github.com/fp64/dbc_blit
// Public domain.
static inline float l2s_p3(float x)
{
return x*(3.68945826f+x*(-5.87001922f+x*3.18056096f));
}
static inline float l2s_p2q2(float x)
{
float p=x*(13.044282f+x*65.4774821f);
float q=1.0f+x*(42.6165181f+x*34.905246f);
return p/q;
}
static inline float l2s_p3q2(float x)
{
float p=x*(15.2012147f+x*(192.311612f+x*67.1571239f));
float q=1.0f+x*(70.6761146f+x*202.993836f);
return p/q;
}
static inline float l2s_p4q4(float x)
{
float p=x*(8.51112614f+x*(21407.7188f+x*(569310.187f+x*(915832.184f))));
float q=1.0f+x*(1249.83141f+x*(135537.797f+x*(989102.054f+x*(380667.918f))));
return p/q;
}
static inline float s2l_p3(float x)
{
return x*(0.0181434661f+x*(0.667441605f+x*0.314414929f));
}
static inline float s2l_p5(float x)
{
return x*(0.0553287698+x*(0.378519491+x*(1.00770205+x*(-0.657329466+x*0.215779156))));
}
static inline float s2l_p4q2(float x)
{
float p=x*(0.0893605858f+x*(0.894345379f+x*(14.8331415f+x*15.3162396f)));
float q=1.0f+x*(23.4190713f+x*6.71401579f);
return p/q;
}
//==============================================================================
// Tests and benchmarks.
static double test_accuracy(float (*f)(float),double (*ref)(double))
{
double e=0.0;
const int N=100000;
for(int i=0;i<=N;++i)
{
float t=float(i)/float(N);
double x=ref(double(t));
double y=double(f(t));
double d=fabs(x-y);
e=fmax(e,d);
}
return e;
}
template<float (*f)(float)>
static double test_speed()
{
static const int M=2500;
static const int N=1024;
double t=double(clock());
float buf[M];
for(int i=0;i<N;++i)
{
float x=0.0f;
float dx=1.0f/float(M);
for(int j=0;j<M;++j)
{
buf[j]=f(x);
x+=dx;
}
// Make sure compiler does not optimize out the loop.
volatile float y=buf[i%M];
(void)y;
}
t=double(clock())-t;
return t/double(CLOCKS_PER_SEC)/double(M)/double(N);
}
static void roundtrip(float (*linear2srgb)(float),float (*srgb2linear)(float),float *ret)
{
float lsl=0.0f,sls=0.0f;
for(int i=0;i<256;++i)
{
float x=float(i)/255.0f;
lsl=fmaxf(lsl,fabsf(srgb2linear(linear2srgb(x))-x));
sls=fmaxf(sls,fabsf(linear2srgb(srgb2linear(x))-x));
}
ret[0]=lsl;
ret[1]=sls;
}
static void test_properties(float (*f)(float),float *ret)
{
const int N=100000;
float v=0.0f;
ret[0]=f(0.0f);
ret[1]=f(1.0f);
ret[2]=1.0f;
ret[3]=0.0f;
ret[4]=1.0f;
for(int i=0;i<=N;++i)
{
float t=float(i)/float(N);
float x=f(t);
ret[2]=fmin(ret[2],x);
ret[3]=fmax(ret[3],x);
if(i>0&&x<v) ret[4]=0.0f;
v=x;
}
}
#define TEST(f) do {\
double (*ref)(double)=/*HACK*/(#f[0]=='l'?linear2srgb_ref:srgb2linear_ref);\
printf("%-20s | %17.11f | %11.3f\n",#f,test_accuracy(f,ref),1.0e+9*test_speed<f>());\
} while (0)
#define ROUNDTRIP(linear2srgb,srgb2linear) do {\
float x[2];\
roundtrip(linear2srgb,srgb2linear,x);\
printf("%-17s |%-17s | %18.11f | %18.11f\n",#linear2srgb,#srgb2linear,x[0],x[1]);\
} while (0)
#define PROPERTIES(f) do {\
float x[5];\
test_properties(f,x);\
printf("%-13s|%7.3g|%15.9g|%15.9g|%15.9g| %s\n"\
,#f\
,x[0],x[1],x[2],x[3],(x[4]==1.0f?"Yes":"No"));\
} while (0)
struct entry
{
const char *name;
float (*function)(float);
};
#define ENTRY(f) {#f,f}
static entry table[]=
{
ENTRY(l2s_ref),
ENTRY(l2s_refopt),
ENTRY(l2s_identity),
ENTRY(l2s_sqrt),
ENTRY(l2s_22),
ENTRY(l2s_tayloia_1),
ENTRY(l2s_tayloia_2),
ENTRY(l2s_yfujii_1),
ENTRY(l2s_yfujii_2),
ENTRY(l2s_mrsmile),
ENTRY(l2s_p3),
ENTRY(l2s_p2q2),
ENTRY(l2s_p3q2),
ENTRY(l2s_p4q4),
ENTRY(s2l_ref),
ENTRY(s2l_refopt),
ENTRY(s2l_identity),
ENTRY(s2l_sqr),
ENTRY(s2l_22),
ENTRY(s2l_tayloia),
ENTRY(s2l_iq),
ENTRY(s2l_mbr),
ENTRY(s2l_mrsmile),
ENTRY(s2l_p3),
ENTRY(s2l_p5),
ENTRY(s2l_p4q2),
};
#undef ENTRY
static const int NUM_ENTRIES=int(sizeof(table)/sizeof(table[0]));
int main(int argc,char **argv)
{
if(argc>1)
{
int N=1<<13;
const char *s=argv[1];
float (*f)(float)=0;
double (*ref)(double)=(s[0]=='s'?srgb2linear_ref:linear2srgb_ref);
if(strcmp(s,"list")==0)
{
for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
printf("%s\n",table[i].name);
return 0;
}
for(int i=0;i<int(sizeof(table)/sizeof(table[0]));++i)
if(strcmp(s,table[i].name)==0) {f=table[i].function;break;}
if(f==0) return 0;
printf("x %s\n",s);
for(int i=0;i<=N;++i)
{
float t=float(i)/float(N);
t=t*t;
printf("%.17f %.17f\n",t,double(f(t))-ref(double(t)));
}
return 0;
}
#if defined(DETECT_CPU) && defined(__SSE2__) && defined(__GNUC__)
// Makeshift GCC-only x86 CPU detection.
// Output similar to system("cat /proc/cpuinfo | grep 'model name' | uniq");
printf("CPU: ");
for(int i=2;i<5;++i)
{
unsigned reg[4]={0x80000000u+i,0,0,0};
char s[17]={0};
__asm__ volatile ("cpuid":"+a"(reg[0]),"+b"(reg[1]),"+c"(reg[2]),"+d"(reg[3]));
memcpy(s,reg,sizeof(reg));
printf("%s",s);
}
printf("\n");
#endif
const char *head="---------------------------------------------------";
printf("Testing Linear -> sRGB:\n");
printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns");
printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head);
TEST(l2s_ref);
TEST(l2s_refopt);
TEST(l2s_identity);
TEST(l2s_sqrt);
TEST(l2s_22);
TEST(l2s_tayloia_1);
TEST(l2s_tayloia_2);
TEST(l2s_yfujii_1);
TEST(l2s_yfujii_2);
TEST(l2s_mrsmile);
TEST(l2s_p3);
TEST(l2s_p2q2);
TEST(l2s_p3q2);
TEST(l2s_p4q4);
printf("\n");
printf("Testing sRGB -> Linear:\n");
printf("%-20s | %17s | %11s\n","Function","Abs. error (Linf)","Time, ns");
printf("%20.20s-|-%17.17s-|-%11.11s\n",head,head,head);
TEST(s2l_ref);
TEST(s2l_refopt);
TEST(s2l_identity);
TEST(s2l_sqr);
TEST(s2l_22);
TEST(s2l_tayloia);
TEST(s2l_iq);
TEST(s2l_mbr);
TEST(s2l_mrsmile);
TEST(s2l_p3);
TEST(s2l_p5);
TEST(s2l_p4q2);
printf("\n");
printf("Testing round-trip:\n");
printf("%-17s |%-17s |%-20s|%-20s\n","Linear->sRGB","sRGB->Linear","Linear->sRGB->Linear","sRGB->Linear->sRGB");
printf("%-17.17s-|%-17.17s-|%-20.20s|%-20.20s\n",head,head,head,head);
ROUNDTRIP(l2s_ref ,s2l_ref);
ROUNDTRIP(l2s_sqrt ,s2l_sqr);
ROUNDTRIP(l2s_tayloia_1 ,s2l_tayloia);
ROUNDTRIP(l2s_tayloia_2 ,s2l_tayloia);
ROUNDTRIP(l2s_mrsmile ,s2l_mrsmile);
ROUNDTRIP(l2s_p3 ,s2l_p3);
ROUNDTRIP(l2s_p2q2 ,s2l_p3);
ROUNDTRIP(l2s_p3q2 ,s2l_p5);
ROUNDTRIP(l2s_p4q4 ,s2l_p4q2);
printf("\n");
// NOTE: do take these with a grain of salt.
// They can depend, among other things, on
// * Computing in x87 vs SSE
// * Rounding direction
// * FP_CONTRACT
printf("Testing properties:\n");
printf("%-13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n",
"Function","f(0)","f(1)","min(f)","max(f)","Monotone");
printf("%-13.13s|%-7.7s|%-15.15s|%-15.15s|%-15.15s|%s\n",
head,head,head,head,head,"--------");
PROPERTIES(l2s_ref);
PROPERTIES(l2s_refopt);
PROPERTIES(l2s_identity);
PROPERTIES(l2s_sqrt);
PROPERTIES(l2s_22);
PROPERTIES(l2s_tayloia_1);
PROPERTIES(l2s_tayloia_2);
PROPERTIES(l2s_yfujii_1);
PROPERTIES(l2s_yfujii_2);
PROPERTIES(l2s_mrsmile);
PROPERTIES(l2s_p3);
PROPERTIES(l2s_p2q2);
PROPERTIES(l2s_p3q2);
PROPERTIES(l2s_p4q4);
PROPERTIES(s2l_ref);
PROPERTIES(s2l_refopt);
PROPERTIES(s2l_identity);
PROPERTIES(s2l_sqr);
PROPERTIES(s2l_22);
PROPERTIES(s2l_tayloia);
PROPERTIES(s2l_iq);
PROPERTIES(s2l_mbr);
PROPERTIES(s2l_mrsmile);
PROPERTIES(s2l_p3);
PROPERTIES(s2l_p5);
PROPERTIES(s2l_p4q2);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment