Last active
December 12, 2022 01:35
-
-
Save Marc-B-Reynolds/51ae4fb3342bfde2c54d873c529c3cdd to your computer and use it in GitHub Desktop.
basic stand-alone test for computing 2 reciprocals or 2 divisions using only 1 divide.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <math.h> | |
#define RANDOMIZE | |
#define TEST_LEN 0x7fffffff | |
#define VARIANT_FMA | |
uint64_t rng_state[2] = {0xac564b0527d4eb2d,0xd1342543de82ef95}; | |
inline uint64_t rot_64(uint64_t x, uint32_t n) { n &= 0x3f; return (x<<n) | (x>>(-n & 0x3f)); } | |
inline uint64_t rng_u64(void) | |
{ | |
uint64_t s0 = rng_state[0]; | |
uint64_t s1 = rng_state[1]; | |
uint64_t r = s0 + s1; | |
s1 ^= s0; | |
rng_state[0] = rot_64(s0,55) ^ s1 ^ (s1<<14); | |
rng_state[1] = rot_64(s1,36); | |
return r; | |
} | |
inline uint32_t rng_u32(void) | |
{ | |
return (uint32_t)rng_u64(); | |
} | |
// end of PRNG | |
inline uint32_t f32_to_bits(float x) | |
{ | |
uint32_t u; memcpy(&u, &x, 4); return u; | |
} | |
inline float f32_from_bits(uint32_t x) | |
{ | |
float f; memcpy(&f, &x, 4); return f; | |
} | |
static inline uint32_t u32_abs(uint32_t x) | |
{ | |
return (int32_t)x >= 0 ? x : -x; | |
} | |
// ulp distance provided a & b are finite | |
// and have the same sign | |
static inline uint32_t f32_ulp_dist_ss(float a, float b) | |
{ | |
uint32_t ua = f32_to_bits(a); | |
uint32_t ub = f32_to_bits(b); | |
return u32_abs(ua-ub); | |
} | |
void test() | |
{ | |
uint32_t mra = 0; | |
uint32_t mrb = 0; | |
uint32_t mda = 0; | |
uint32_t mdb = 0; | |
uint32_t mha = 0; | |
uint32_t mhb = 0; | |
uint32_t dump = 0; | |
uint64_t hr[5] = {0}; // recip | |
uint64_t hd[5] = {0}; // div | |
uint64_t hh[5] = {0}; // div via fma | |
for(uint64_t i=0; i<TEST_LEN; i++) { | |
// generate 3 values on [1,2) with final bit set | |
uint64_t u = rng_u64(); | |
uint32_t ia = (uint32_t)(u & 0x00ffffff) | 0x3f800001; | |
uint32_t ib = (uint32_t)(u >> (64-24)) | 0x3f800001; | |
uint32_t ic = (rng_u32() >> (32-24)) | 0x3f800001; | |
// convert to binary32 | |
float a = f32_from_bits(ia); | |
float b = f32_from_bits(ib); | |
float c = f32_from_bits(ic); | |
// correctly rounded reciprocals | |
float ra = 1.f/a; | |
float rb = 1.f/b; | |
// two reciprocals via: one divide, 3 products | |
float t = a*b; | |
float rt = 1.f/t; | |
float aa = b*rt; | |
float ab = a*rt; | |
// ulp distances | |
uint32_t da = f32_ulp_dist_ss(ra,aa); | |
uint32_t db = f32_ulp_dist_ss(rb,ab); | |
if (mra < da) { mra = da; dump=1; } | |
if (mrb < db) { mrb = db; dump=1; } | |
hr[da]++; | |
hr[db]++; | |
if (dump) { | |
printf("r: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db); | |
dump=0; | |
} | |
// correctly rounded divisions | |
ra = c/a; | |
rb = c/b; | |
// two divides by composition: | |
// WARNING: I DON'T THINK THIS IS USEFUL. JUST NERDSNIPED | |
// MYSELF INTO CHECKING ON THE ERRORS. | |
// underperforms next version. not really usefully IMHO | |
// because we can do other "things" for reused divisors | |
// https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html | |
// aa = c*aa; | |
// ab = c*ab; | |
aa = c*b*rt; | |
ab = c*a*rt; | |
// ulp distances | |
da = f32_ulp_dist_ss(ra,aa); | |
db = f32_ulp_dist_ss(rb,ab); | |
if (mda < da) { mda = da; dump=1; } | |
if (mdb < db) { mdb = db; dump=1; } | |
hd[da]++; | |
hd[db]++; | |
if (dump) { | |
printf("d: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db); | |
dump=0; | |
} | |
// 2 divisions in: 1 div, 4 products, 4 fmas | |
// -- likewise don't think "useful" | |
float ta = c*b, tae = fmaf(c,b,-ta); | |
float tb = c*a, tbe = fmaf(c,a,-tb); | |
aa = fmaf(ta,rt,tae*rt); | |
ab = fmaf(tb,rt,tbe*rt); | |
// ulp distances | |
da = f32_ulp_dist_ss(ra,aa); | |
db = f32_ulp_dist_ss(rb,ab); | |
if (mha < da) { mha = da; dump=1; } | |
if (mhb < db) { mhb = db; dump=1; } | |
hh[da]++; | |
hh[db]++; | |
if (dump) { | |
printf("h: %f %f : %f %f %f %f : %d %d\n", a,b,ra,aa,rb,ab,da,db); | |
dump=0; | |
} | |
} | |
// dump out stats. two per iteration for each. | |
printf("\nrecip: "); | |
for(uint32_t i=0; i<5; i++) { | |
printf("%f ", 50.0*(double)hr[i]/((double)TEST_LEN)); | |
} | |
printf("\ndiv: "); | |
for(uint32_t i=0; i<5; i++) { | |
printf("%f ", 50.0*(double)hd[i]/((double)TEST_LEN)); | |
} | |
printf("\ndivfma:"); | |
for(uint32_t i=0; i<5; i++) { | |
printf("%f ", 50.0*(double)hh[i]/((double)TEST_LEN)); | |
} | |
printf("\n"); | |
} | |
int main(void) | |
{ | |
#ifdef RANDOMIZE | |
rng_state[0] ^= __rdtsc()*UINT64_C(0x7fb5d329728ea185); | |
rng_state[1] += __rdtsc(); | |
#endif | |
test(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment