Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active December 1, 2022 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vurtun/6f8d4481efd2a38da11745948448a0bd to your computer and use it in GitHub Desktop.
Save vurtun/6f8d4481efd2a38da11745948448a0bd to your computer and use it in GitHub Desktop.
/*
Enable FMA support:
clang -O2 -Wall -mfma
*/
static inline float
rsqrta(float x) {
/* Exact bits: 13.71 */
/* δ+max = 7.459289×10^−5 */
/* δ−max = −7.450387×10^−5 */
union { float f; int i; } v = {x};
int k = v.i & 0x00800000;
float y = 0.0f;
if (k != 0) {
v.i = 0x5ed9e91f - (v.i >> 1);
y = v.f;
y = 2.33124256f*y*fmaf(-x,y*y, 1.0749737f);
} else {
v.i = 0x5f19e8fc - (v.i >> 1);
y = v.f;
y = 0.824218631f*y*fmaf(-x, y*y, 2.1499474f);
}
return y;
}
static inline float
rsqrt(float x) {
/* Exact bits: 23.62 */
/* δ+max = 7.362378×10^−8 */
/* δ−max = −7.754203×10^−8 */
union { float f; int i; } v = {x};
int k = v.i & 0x00800000;
float y = 0.0f;
if (k != 0) {
v.i = 0x5ed9dbc6 - (v.i >> 1);
y = v.f;
y = 2.33124018f*y*fmaf(-x, y*y, 1.07497406f);
} else {
v.i = 0x5f19d200 - (v.i >> 1);
y = v.f;
y = 0.824212492f*y*fmaf(-x, y*y, 2.14996147f);
}
{
float c = x*y;
float r = fmaf(y, -c, 1.0f);
return fmaf(0.5f*y, r, y);
}
}
static inline float
sqrta(float x) {
/* Exact bits: 13.71 */
/* δ+max = 7.450372×10^−5 */
/* δ−max = −7.451108×10^−5 */
union { float f; int i; } v = {x};
int k = v.i & 0x00800000;
float y = 0.0f, c;
if (k != 0) {
v.i = 0x5ed9e893 - (v.i >> 1);
y = v.f;
c = x*y;
y = 2.33130789f*c*fmaf(y, -c, 1.07495356f);
} else {
v.i = 0x5f19e8fd - (v.i >> 1);
y = v.f;
c = x*y;
y = 0.82421863f*c*fmaf(y, -c, 2.1499474f);
}
return y;
}
static inline float
sqrt(float x) {
/* Exact bits: 23.4 */
/* δ+max = 8.757966×10^−8 */
/* δ−max = −9.037992×10^−8 */
union { float f; int i; } v = {x};
int k = v.i & 0x00800000;
float y, c;
if (k != 0) {
v.i = 0x5ed9d098 - (v.i >> 1);
y = v.f;
c = x*y;
y = 2.33139729f*c*fmaf(y, -c, 1.07492042f);
} else {
v.i = 0x5f19d352 - (v.i >> 1);
y = v.f;
c = x*y;
y = 0.82420468f*c*fmaf(y, -c, 2.14996147f);
}
{
float k = x*y;
float r = fmaf(y, -k, 1.0f);
return fmaf(0.5f*k, r, k);
}
}
static float
rcbrta(float x){
/* 15.18 correct bits */
/* |δ1max| = 2.686·10^−5 */
union {float f; int i; } v = {x};
v.i = 0x548c2b4b - v.i/3;
float y = v.f;
float c = x*y*y*y;
y = y*(1.752319676f - c*(1.2509524245f - 0.5093818292f*c));
c = 1.0f - x*y*y*y;
y = y*(1.0f + 0.333333333333f*c);
return y;
}
static float
rcbrt(float x){
/* 22.84 correct bits */
/* |δ2max| = 1.3301·10^−7 */
union {float f; int i; } v = {x};
v.i = 0x548c2b4b - v.i/3;
float y = v.f;
float c = x*y*y*y;
y = y*(1.752319676f - c*(1.2509524245f - 0.5093818292f*c));
c = 1.0f - x*y*y*y;//fmaf
float d = x*y*y;
c = 1.0f - d*y;//fmaf
y = d*(1.0f + 0.333333333333f*c);//fmaf
return y;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment