Marc-B-Reynolds/div_by_recip.c

## div_by_recip.c
// SEE: https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html

#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#define RANDOMIZE

#define TEST_LEN UINT64_C(0x0000000100000000)

uint64_t rng_state[2] = {0xac564b0527d4eb2d,0xd1342543de82ef95};

inline uint64_t rot_64(uint64_t x, uint32_t n) { n &= 0x3f; return (x<<n) | (x>>(-n & 0x3f)); }

inline uint64_t rng_u64(void)
{
  uint64_t s0 = rng_state[0];
  uint64_t s1 = rng_state[1];
  uint64_t r  = s0 + s1;

  s1 ^= s0;
  rng_state[0] = rot_64(s0,55) ^ s1 ^ (s1<<14);
  rng_state[1] = rot_64(s1,36);

  return r;
}

inline uint32_t rng_u32(void)
{
  return (uint32_t)rng_u64();
}
// end of PRNG

inline uint32_t f32_to_bits(float x)
{
  uint32_t u; memcpy(&u, &x, 4); return u;
}

inline float f32_from_bits(uint32_t x)
{
  float f; memcpy(&f, &x, 4); return f;
}

inline uint32_t u32_abs(uint32_t x)
{
  return (int32_t)x >= 0 ? x : -x;
}

// ulp distance provided a & b are finite
// and have the same sign
inline uint32_t f32_ulp_dist_ss(float a, float b)
{
  uint32_t ua = f32_to_bits(a);
  uint32_t ub = f32_to_bits(b);
  return u32_abs(ua-ub);
}

void test()
{
  double   md   = 0.0;
  uint32_t dump = 0;

  // histogram of distance
  uint64_t h[5] = {0};

  double   ave = 0.0;

  printf("d is zero for correctly rounded and one for faithfully.\nThe rounding unit error size 'u' is estimated using doubles. The max is 1.5ulp or 3u.\n\n%8s %8s : %13s %13s: %3s : %s\n", "a","b","a/b","a(1/b)","d", "~u (rounding unit)");

  for(uint64_t i=0; i<TEST_LEN; i++) {
    // generate 2 values on [1,2) with final bit set
    uint64_t u  = rng_u64();
    uint32_t ia = (uint32_t)(u & 0x00ffffff) | 0x3f800001;
    uint32_t ib = (uint32_t)(u >> (64-24))   | 0x3f800001;

    // convert to binary32
    float    a  = f32_from_bits(ia);
    float    b  = f32_from_bits(ib);

    // result of division is [1,2)/[1,2) -> [1/2,2)

    float    d = a/b;           // correctly rounded division
    float    r = a * (1.f/b);   // division by mult w/recip

    uint32_t dist = f32_ulp_dist_ss(d,r);

    h[dist]++;

    // estimate error in the size of the rounding unit 'u'
    double dd = (double)a/(double)b;

    // scale result to [1,2) : error free. to normalize the
    // size of the rounding unit in following computations.
    if (dd <= 0.5) { dd *= 2.0; r *= 2.f; }

    dd  = fabs(dd-(double)r);
    ave += dd;
    dd *= 0x1p24;               // multiply by size of rounding unit

    // spew out a line each time we see an increase in 'u' error
    if (md < dd) { md = dd; dump=1; }

    if (dump) {
      printf("%f %f : %13a %13a: %3d : %f\n", a,b,d,r,dist, dd);
      dump=0;
    }
  }

  printf("\nobserved errors (percent): first is correctly round, second faithfully and all others should be zero\n");

  // dump out stats
  for(uint32_t i=0; i<5; i++) {
    printf("%f ", 100.0*(double)h[i]/((double)TEST_LEN));
  }
  printf("\n");

  printf("ave observed error (in u)= %f\n", 0x1p24*ave/(double)TEST_LEN);
}


int main(void)
{
#ifdef RANDOMIZE
  rng_state[0] ^= __rdtsc()*UINT64_C(0x7fb5d329728ea185);
  rng_state[1] += __rdtsc();
#endif

  test();

  return 0;
}
	// SEE: https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html

	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>
	#include <math.h>

	#define RANDOMIZE

	#define TEST_LEN UINT64_C(0x0000000100000000)

	uint64_t rng_state[2] = {0xac564b0527d4eb2d,0xd1342543de82ef95};

	inline uint64_t rot_64(uint64_t x, uint32_t n) { n &= 0x3f; return (x<<n) \| (x>>(-n & 0x3f)); }

	inline uint64_t rng_u64(void)
	{
	uint64_t s0 = rng_state[0];
	uint64_t s1 = rng_state[1];
	uint64_t r = s0 + s1;

	s1 ^= s0;
	rng_state[0] = rot_64(s0,55) ^ s1 ^ (s1<<14);
	rng_state[1] = rot_64(s1,36);

	return r;
	}

	inline uint32_t rng_u32(void)
	{
	return (uint32_t)rng_u64();
	}
	// end of PRNG

	inline uint32_t f32_to_bits(float x)
	{
	uint32_t u; memcpy(&u, &x, 4); return u;
	}

	inline float f32_from_bits(uint32_t x)
	{
	float f; memcpy(&f, &x, 4); return f;
	}

	inline uint32_t u32_abs(uint32_t x)
	{
	return (int32_t)x >= 0 ? x : -x;
	}

	// ulp distance provided a & b are finite
	// and have the same sign
	inline uint32_t f32_ulp_dist_ss(float a, float b)
	{
	uint32_t ua = f32_to_bits(a);
	uint32_t ub = f32_to_bits(b);
	return u32_abs(ua-ub);
	}

	void test()
	{
	double md = 0.0;
	uint32_t dump = 0;

	// histogram of distance
	uint64_t h[5] = {0};

	double ave = 0.0;

	printf("d is zero for correctly rounded and one for faithfully.\nThe rounding unit error size 'u' is estimated using doubles. The max is 1.5ulp or 3u.\n\n%8s %8s : %13s %13s: %3s : %s\n", "a","b","a/b","a(1/b)","d", "~u (rounding unit)");

	for(uint64_t i=0; i<TEST_LEN; i++) {
	// generate 2 values on [1,2) with final bit set
	uint64_t u = rng_u64();
	uint32_t ia = (uint32_t)(u & 0x00ffffff) \| 0x3f800001;
	uint32_t ib = (uint32_t)(u >> (64-24)) \| 0x3f800001;

	// convert to binary32
	float a = f32_from_bits(ia);
	float b = f32_from_bits(ib);

	// result of division is [1,2)/[1,2) -> [1/2,2)

	float d = a/b; // correctly rounded division
	float r = a * (1.f/b); // division by mult w/recip

	uint32_t dist = f32_ulp_dist_ss(d,r);

	h[dist]++;

	// estimate error in the size of the rounding unit 'u'
	double dd = (double)a/(double)b;

	// scale result to [1,2) : error free. to normalize the
	// size of the rounding unit in following computations.
	if (dd <= 0.5) { dd = 2.0; r = 2.f; }

	dd = fabs(dd-(double)r);
	ave += dd;
	dd *= 0x1p24; // multiply by size of rounding unit

	// spew out a line each time we see an increase in 'u' error
	if (md < dd) { md = dd; dump=1; }

	if (dump) {
	printf("%f %f : %13a %13a: %3d : %f\n", a,b,d,r,dist, dd);
	dump=0;
	}
	}

	printf("\nobserved errors (percent): first is correctly round, second faithfully and all others should be zero\n");

	// dump out stats
	for(uint32_t i=0; i<5; i++) {
	printf("%f ", 100.0*(double)h[i]/((double)TEST_LEN));
	}
	printf("\n");

	printf("ave observed error (in u)= %f\n", 0x1p24*ave/(double)TEST_LEN);
	}



	int main(void)
	{
	#ifdef RANDOMIZE
	rng_state[0] ^= __rdtsc()*UINT64_C(0x7fb5d329728ea185);
	rng_state[1] += __rdtsc();
	#endif

	test();

	return 0;
	}