Created
June 28, 2019 12:55
-
-
Save Marc-B-Reynolds/b1c6bfeb54c68f93f1a65e66d65cc227 to your computer and use it in GitHub Desktop.
RU(sqrt(x)) & RD(sqrt(x)) in round-to-nearest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <math.h> | |
#include <string.h> | |
inline uint32_t f32_to_bits(float x) | |
{ | |
uint32_t u; memcpy(&u, &x, 4); return u; | |
} | |
inline float f32_from_bits(uint32_t x) | |
{ | |
float f; memcpy(&f, &x, 4); return f; | |
} | |
// next representable FP away from zero | |
// * blindly walks past +/-inf to NaN | |
inline float f32_succ(float a) | |
{ | |
return f32_from_bits(f32_to_bits(a)+1); | |
} | |
// next representable FP toward zero | |
// * blindly walks past +/-zero to NaN | |
inline float f32_pred(float a) | |
{ | |
return f32_from_bits(f32_to_bits(a)-1); | |
} | |
// next representable FP away from zero | |
// * a is normal and not zero | |
inline float f32_nnz_succ(float a) | |
{ | |
const float s = 0x1.000002p-24f; | |
return fmaf(a,s,a); | |
} | |
// next representable FP toward zero | |
// * a is normal and not zero | |
inline float f32_nnz_pred(float a) | |
{ | |
const float s = -0x1.000002p-24f; | |
return fmaf(a,s,a); | |
} | |
// hacky cutoff values | |
const float f32_sqrt_rd_cutoff = 0x1.fffffcp-103f; | |
const float f32_sqrt_ru_cutoff = 0x1.bb882cp-107f; | |
// punt on small input version | |
float f32_sqrt_ru(float x) | |
{ | |
if (x >= f32_sqrt_ru_cutoff) { | |
float r = sqrtf(x); | |
float e = fmaf(r,r,-x); | |
if (e < 0.f) { r = f32_succ(r); } | |
return r; | |
} | |
return 0; // hack: NaN or RU(sqrt(cutoff)) | |
} | |
// punt on small input version | |
float f32_sqrt_rd(float x) | |
{ | |
if (x >= f32_sqrt_rd_cutoff) { | |
float r = sqrtf(x); | |
float e = fmaf(r,r,-x); | |
if (e > 0.f) { r = f32_pred(r); } | |
return r; | |
} | |
return 0; // hack: NaN or zero | |
} | |
// promote to double version: for shorter source | |
float f32_sqrt_ru_promote(float x) | |
{ | |
if (x >= 0.f) { | |
float r = sqrtf(x); | |
double rd = (double)r; | |
double e = fma(rd,rd,-(double)x); | |
if (e < 0.0) { r = f32_succ(r); } | |
return r; | |
} | |
return 0; // hack: should return NaN | |
} | |
// promote to double version: for shorter source | |
float f32_sqrt_rd_promote(float x) | |
{ | |
if (x >= 0.f) { | |
float r = sqrtf(x); | |
double dr = (double)r; | |
double e = fma(dr,dr,-(double)x); | |
if (e > 0.0) { r = f32_pred(r); } | |
return r; | |
} | |
return 0; // hack: should return NaN | |
} | |
float f32_sqrt_ru_no_promote(float x) | |
{ | |
if (x >= f32_sqrt_ru_cutoff) { | |
float r = sqrtf(x); | |
float e = fmaf(r,r,-x); | |
if (e < 0.f) { r = f32_succ(r); } | |
return r; | |
} | |
// rescale input so 'e' doesn't flush to zero. | |
if (x != 0) | |
return 0x1.0p-50f*f32_sqrt_ru_no_promote(0x1.0p100f*x); | |
return 0; // hack: should return NaN | |
} | |
float f32_sqrt_rd_no_promote(float x) | |
{ | |
if (x >= f32_sqrt_rd_cutoff) { | |
float r = sqrtf(x); | |
float e = fmaf(r,r,-x); | |
if (e > 0.f) { r = f32_pred(r); } | |
return r; | |
} | |
// rescale input so 'e' doesn't flush to zero. | |
if (x != 0) | |
return 0x1.0p-50f*f32_sqrt_rd_no_promote(0x1.0p100f*x); | |
return 0; // hack: should return NaN | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment