Skip to content

Instantly share code, notes, and snippets.

@Marc-B-Reynolds
Created June 28, 2019 12:55
Show Gist options
  • Save Marc-B-Reynolds/b1c6bfeb54c68f93f1a65e66d65cc227 to your computer and use it in GitHub Desktop.
Save Marc-B-Reynolds/b1c6bfeb54c68f93f1a65e66d65cc227 to your computer and use it in GitHub Desktop.
RU(sqrt(x)) & RD(sqrt(x)) in round-to-nearest
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
inline uint32_t f32_to_bits(float x)
{
uint32_t u; memcpy(&u, &x, 4); return u;
}
inline float f32_from_bits(uint32_t x)
{
float f; memcpy(&f, &x, 4); return f;
}
// next representable FP away from zero
// * blindly walks past +/-inf to NaN
inline float f32_succ(float a)
{
return f32_from_bits(f32_to_bits(a)+1);
}
// next representable FP toward zero
// * blindly walks past +/-zero to NaN
inline float f32_pred(float a)
{
return f32_from_bits(f32_to_bits(a)-1);
}
// next representable FP away from zero
// * a is normal and not zero
inline float f32_nnz_succ(float a)
{
const float s = 0x1.000002p-24f;
return fmaf(a,s,a);
}
// next representable FP toward zero
// * a is normal and not zero
inline float f32_nnz_pred(float a)
{
const float s = -0x1.000002p-24f;
return fmaf(a,s,a);
}
// hacky cutoff values
const float f32_sqrt_rd_cutoff = 0x1.fffffcp-103f;
const float f32_sqrt_ru_cutoff = 0x1.bb882cp-107f;
// punt on small input version
float f32_sqrt_ru(float x)
{
if (x >= f32_sqrt_ru_cutoff) {
float r = sqrtf(x);
float e = fmaf(r,r,-x);
if (e < 0.f) { r = f32_succ(r); }
return r;
}
return 0; // hack: NaN or RU(sqrt(cutoff))
}
// punt on small input version
float f32_sqrt_rd(float x)
{
if (x >= f32_sqrt_rd_cutoff) {
float r = sqrtf(x);
float e = fmaf(r,r,-x);
if (e > 0.f) { r = f32_pred(r); }
return r;
}
return 0; // hack: NaN or zero
}
// promote to double version: for shorter source
float f32_sqrt_ru_promote(float x)
{
if (x >= 0.f) {
float r = sqrtf(x);
double rd = (double)r;
double e = fma(rd,rd,-(double)x);
if (e < 0.0) { r = f32_succ(r); }
return r;
}
return 0; // hack: should return NaN
}
// promote to double version: for shorter source
float f32_sqrt_rd_promote(float x)
{
if (x >= 0.f) {
float r = sqrtf(x);
double dr = (double)r;
double e = fma(dr,dr,-(double)x);
if (e > 0.0) { r = f32_pred(r); }
return r;
}
return 0; // hack: should return NaN
}
float f32_sqrt_ru_no_promote(float x)
{
if (x >= f32_sqrt_ru_cutoff) {
float r = sqrtf(x);
float e = fmaf(r,r,-x);
if (e < 0.f) { r = f32_succ(r); }
return r;
}
// rescale input so 'e' doesn't flush to zero.
if (x != 0)
return 0x1.0p-50f*f32_sqrt_ru_no_promote(0x1.0p100f*x);
return 0; // hack: should return NaN
}
float f32_sqrt_rd_no_promote(float x)
{
if (x >= f32_sqrt_rd_cutoff) {
float r = sqrtf(x);
float e = fmaf(r,r,-x);
if (e > 0.f) { r = f32_pred(r); }
return r;
}
// rescale input so 'e' doesn't flush to zero.
if (x != 0)
return 0x1.0p-50f*f32_sqrt_rd_no_promote(0x1.0p100f*x);
return 0; // hack: should return NaN
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment