Skip to content

Instantly share code, notes, and snippets.

@Marc-B-Reynolds
Last active December 12, 2022 01:35
Show Gist options
  • Save Marc-B-Reynolds/51ae4fb3342bfde2c54d873c529c3cdd to your computer and use it in GitHub Desktop.
Save Marc-B-Reynolds/51ae4fb3342bfde2c54d873c529c3cdd to your computer and use it in GitHub Desktop.
basic stand-alone test for computing 2 reciprocals or 2 divisions using only 1 divide.
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#define RANDOMIZE
#define TEST_LEN 0x7fffffff
#define VARIANT_FMA
uint64_t rng_state[2] = {0xac564b0527d4eb2d,0xd1342543de82ef95};
inline uint64_t rot_64(uint64_t x, uint32_t n) { n &= 0x3f; return (x<<n) | (x>>(-n & 0x3f)); }
inline uint64_t rng_u64(void)
{
uint64_t s0 = rng_state[0];
uint64_t s1 = rng_state[1];
uint64_t r = s0 + s1;
s1 ^= s0;
rng_state[0] = rot_64(s0,55) ^ s1 ^ (s1<<14);
rng_state[1] = rot_64(s1,36);
return r;
}
inline uint32_t rng_u32(void)
{
return (uint32_t)rng_u64();
}
// end of PRNG
inline uint32_t f32_to_bits(float x)
{
uint32_t u; memcpy(&u, &x, 4); return u;
}
inline float f32_from_bits(uint32_t x)
{
float f; memcpy(&f, &x, 4); return f;
}
static inline uint32_t u32_abs(uint32_t x)
{
return (int32_t)x >= 0 ? x : -x;
}
// ulp distance provided a & b are finite
// and have the same sign
static inline uint32_t f32_ulp_dist_ss(float a, float b)
{
uint32_t ua = f32_to_bits(a);
uint32_t ub = f32_to_bits(b);
return u32_abs(ua-ub);
}
void test()
{
uint32_t mra = 0;
uint32_t mrb = 0;
uint32_t mda = 0;
uint32_t mdb = 0;
uint32_t mha = 0;
uint32_t mhb = 0;
uint32_t dump = 0;
uint64_t hr[5] = {0}; // recip
uint64_t hd[5] = {0}; // div
uint64_t hh[5] = {0}; // div via fma
for(uint64_t i=0; i<TEST_LEN; i++) {
// generate 3 values on [1,2) with final bit set
uint64_t u = rng_u64();
uint32_t ia = (uint32_t)(u & 0x00ffffff) | 0x3f800001;
uint32_t ib = (uint32_t)(u >> (64-24)) | 0x3f800001;
uint32_t ic = (rng_u32() >> (32-24)) | 0x3f800001;
// convert to binary32
float a = f32_from_bits(ia);
float b = f32_from_bits(ib);
float c = f32_from_bits(ic);
// correctly rounded reciprocals
float ra = 1.f/a;
float rb = 1.f/b;
// two reciprocals via: one divide, 3 products
float t = a*b;
float rt = 1.f/t;
float aa = b*rt;
float ab = a*rt;
// ulp distances
uint32_t da = f32_ulp_dist_ss(ra,aa);
uint32_t db = f32_ulp_dist_ss(rb,ab);
if (mra < da) { mra = da; dump=1; }
if (mrb < db) { mrb = db; dump=1; }
hr[da]++;
hr[db]++;
if (dump) {
printf("r: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db);
dump=0;
}
// correctly rounded divisions
ra = c/a;
rb = c/b;
// two divides by composition:
// WARNING: I DON'T THINK THIS IS USEFUL. JUST NERDSNIPED
// MYSELF INTO CHECKING ON THE ERRORS.
// underperforms next version. not really usefully IMHO
// because we can do other "things" for reused divisors
// https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html
// aa = c*aa;
// ab = c*ab;
aa = c*b*rt;
ab = c*a*rt;
// ulp distances
da = f32_ulp_dist_ss(ra,aa);
db = f32_ulp_dist_ss(rb,ab);
if (mda < da) { mda = da; dump=1; }
if (mdb < db) { mdb = db; dump=1; }
hd[da]++;
hd[db]++;
if (dump) {
printf("d: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db);
dump=0;
}
// 2 divisions in: 1 div, 4 products, 4 fmas
// -- likewise don't think "useful"
float ta = c*b, tae = fmaf(c,b,-ta);
float tb = c*a, tbe = fmaf(c,a,-tb);
aa = fmaf(ta,rt,tae*rt);
ab = fmaf(tb,rt,tbe*rt);
// ulp distances
da = f32_ulp_dist_ss(ra,aa);
db = f32_ulp_dist_ss(rb,ab);
if (mha < da) { mha = da; dump=1; }
if (mhb < db) { mhb = db; dump=1; }
hh[da]++;
hh[db]++;
if (dump) {
printf("h: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db);
dump=0;
}
}
// dump out stats. two per iteration for each.
printf("\nrecip: ");
for(uint32_t i=0; i<5; i++) {
printf("%f ", 50.0*(double)hr[i]/((double)TEST_LEN));
}
printf("\ndiv: ");
for(uint32_t i=0; i<5; i++) {
printf("%f ", 50.0*(double)hd[i]/((double)TEST_LEN));
}
printf("\ndivfma:");
for(uint32_t i=0; i<5; i++) {
printf("%f ", 50.0*(double)hh[i]/((double)TEST_LEN));
}
printf("\n");
}
int main(void)
{
#ifdef RANDOMIZE
rng_state[0] ^= __rdtsc()*UINT64_C(0x7fb5d329728ea185);
rng_state[1] += __rdtsc();
#endif
test();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment