Marc B. Reynolds Marc-B-Reynolds

## f32_quadratic_hq.c

// whatever return type
typedef struct { float h,l; } f32_pair_t;

f32_pair_t f32_quadratic_hq(float A, float B, float C)
{
  double a = (double)A;
  double b = (double)B;
  double c = (double)C;

## quick.c

// if 'f' is the 64-bit input CRC32-C (with 'incoming' 32-bit value of zero) then
// there are 50 bijections (invertiable functions) which are a sum of f
// and a simple xorshift. So the follow two forms:
//
//   f(x) ^ (x ^ (x << S))
//   f(x) ^ (x ^ (x >> S))
//
// and 23 using a rotate instead of a shift:
//

## cbrt.c
// Public Domain under http://unlicense.org, see link for details.
// except:
// * core-math function `cr_cbrtf`               (see license below)
// * musl flavored fdlib function `fdlibm_cbrtf` (see license below)

// code and test driver for cube root and it's reciprocal based on:
// "Fast Calculation of Cube and Inverse Cube Roots Using
// a Magic Constant and Its Implementation on Microcontrollers"
// Moroz, Samotyy, Walczyk, Cieslinski, 2021
// (PDF: https://www.mdpi.com/1996-1073/14/4/1058)

## output.txt
----------------------------------------------------------------------------
starting: vrecpe_f32 on [3f800000, 40000000]
// min/max ulp   -45502 44978
// abs max ulp   45502.375051
// correctly:    0.002611% (219)
// faithfully:   0.005555% (466)
//   2 ulp:      0.005436% (456)
//   3 ulp:      0.005460% (458)
//   4 ulp:      0.005329% (447)
//   5 ulp:      0.005531% (464)

## output.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              9 stars
            
          
                Marc-B-Reynolds
                / output.md
            
            
              Last active
              August 28, 2023 10:07
            
              
                brute force testing of 1/sqrt functions
              
          
    click for range breakdown
checking on [3f800000,40000000] [1.000000e+00,2.000000e+00]


func
e
max ULP
CR
FR
2 ULP
> 2 ULP
CR%
FR%
2 ULP%
> 2 ULP%


vrsqrte_f32
--
4947968
103
225
216
8388065
0.001228
0.002682
0.002575
99.993515


FRSR_Mon0
--
564177
3
8
6
8388592
0.000036
0.000095
0.000072
99.999797


FRSR_Deg0
--
403258
0
0
0
8388609
0.000000
0.000000
0.000000
100.000000


FRSR_Mon1
--
14751
230
464
466
8387449
0.002742
0.005531
0.005555
99.986172


## tanpi_hack.c
// Public Domain under http://unlicense.org, see link for details.

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>

#include "xmmintrin.h"
#include "wmmintrin.h"

## div_by_recip.c
// SEE: https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html

#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#define RANDOMIZE

#define TEST_LEN UINT64_C(0x0000000100000000)

## two_for_one_div.c
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#define RANDOMIZE

#define TEST_LEN 0x7fffffff

#define VARIANT_FMA

## gist:57509af21e42bc7ec9d2da5f7e55f02e
00 : mean=0.000000,variance=0.000000,std-dev=0.000000, count=67108863 on [0.000000,0.000000]
01 : mean=6.511208,variance=3.084904,std-dev=1.756390, count=67108863 on [2.000000,31.000000]
02 : mean=6.254943,variance=3.523351,std-dev=1.877059, count=67108863 on [2.000000,30.000000]
03 : mean=6.142395,variance=3.876341,std-dev=1.968843, count=67108863 on [2.000000,31.000000]
04 : mean=6.083418,variance=4.125431,std-dev=2.031116, count=67108863 on [2.000000,31.000000]
05 : mean=6.046452,variance=4.320036,std-dev=2.078470, count=67108863 on [2.000000,31.000000]
06 : mean=6.018018,variance=4.485348,std-dev=2.117864, count=67108863 on [2.000000,31.000000]
07 : mean=5.996642,variance=4.607561,std-dev=2.146523, count=67108863 on [2.000000,33.000000]
08 : mean=5.986188,variance=4.664854,std-dev=2.159827, count=67108863 on [2.000000,32.000000]
09 : mean=5.987023,variance=4.672484,std-dev=2.161593, count=67108863 on [2.000000,33.000000]

## gist:53ecc4f9648d9ba4348403b9afd06804
// 1) "Tables of linear congruential generators of different sizes and good lattice structure" (1999), Pierre L'Ecuyer
// 2) Pre-print version of (3)  (https://arxiv.org/abs/2001.05304)
// 3) "Computationally easy, spectrally good multipliers for congruential pseudorandom number generators" (2021), Guy L. Steele Jr. & Sebastiano Vigna (https://onlinelibrary.wiley.com/doi/epdf/10.1002/spe.3030)

// inline comment number is bit width of constant

const uint32_t lcg_mul_k_table_32[] =
{
  // (3) Table 4 LCGs
  0x0000d9f5, // 16

	// whatever return type
	typedef struct { float h,l; } f32_pair_t;

	f32_pair_t f32_quadratic_hq(float A, float B, float C)
	{
	double a = (double)A;
	double b = (double)B;
	double c = (double)C;

	// if 'f' is the 64-bit input CRC32-C (with 'incoming' 32-bit value of zero) then
	// there are 50 bijections (invertiable functions) which are a sum of f
	// and a simple xorshift. So the follow two forms:
	//
	// f(x) ^ (x ^ (x << S))
	// f(x) ^ (x ^ (x >> S))
	//
	// and 23 using a rotate instead of a shift:
	//
	// Public Domain under http://unlicense.org, see link for details.
	// except:
	// * core-math function `cr_cbrtf` (see license below)
	// * musl flavored fdlib function `fdlibm_cbrtf` (see license below)

	// code and test driver for cube root and it's reciprocal based on:
	// "Fast Calculation of Cube and Inverse Cube Roots Using
	// a Magic Constant and Its Implementation on Microcontrollers"
	// Moroz, Samotyy, Walczyk, Cieslinski, 2021
	// (PDF: https://www.mdpi.com/1996-1073/14/4/1058)
	----------------------------------------------------------------------------
	starting: vrecpe_f32 on [3f800000, 40000000]
	// min/max ulp -45502 44978
	// abs max ulp 45502.375051
	// correctly: 0.002611% (219)
	// faithfully: 0.005555% (466)
	// 2 ulp: 0.005436% (456)
	// 3 ulp: 0.005460% (458)
	// 4 ulp: 0.005329% (447)
	// 5 ulp: 0.005531% (464)
func	e	max ULP	CR	FR	2 ULP	> 2 ULP	CR%	FR%	2 ULP%	> 2 ULP%
vrsqrte_f32	--	4947968	103	225	216	8388065	0.001228	0.002682	0.002575	99.993515
FRSR_Mon0	--	564177	3	8	6	8388592	0.000036	0.000095	0.000072	99.999797
FRSR_Deg0	--	403258	0	0	0	8388609	0.000000	0.000000	0.000000	100.000000
FRSR_Mon1	--	14751	230	464	466	8387449	0.002742	0.005531	0.005555	99.986172
	// Public Domain under http://unlicense.org, see link for details.

	#include <stdint.h>
	#include <stdlib.h>
	#include <stdio.h>
	#include <math.h>
	#include <string.h>

	#include "xmmintrin.h"
	#include "wmmintrin.h"
	// SEE: https://marc-b-reynolds.github.io/math/2019/03/12/FpDiv.html

	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>
	#include <math.h>

	#define RANDOMIZE

	#define TEST_LEN UINT64_C(0x0000000100000000)
	00 : mean=0.000000,variance=0.000000,std-dev=0.000000, count=67108863 on [0.000000,0.000000]
	01 : mean=6.511208,variance=3.084904,std-dev=1.756390, count=67108863 on [2.000000,31.000000]
	02 : mean=6.254943,variance=3.523351,std-dev=1.877059, count=67108863 on [2.000000,30.000000]
	03 : mean=6.142395,variance=3.876341,std-dev=1.968843, count=67108863 on [2.000000,31.000000]
	04 : mean=6.083418,variance=4.125431,std-dev=2.031116, count=67108863 on [2.000000,31.000000]
	05 : mean=6.046452,variance=4.320036,std-dev=2.078470, count=67108863 on [2.000000,31.000000]
	06 : mean=6.018018,variance=4.485348,std-dev=2.117864, count=67108863 on [2.000000,31.000000]
	07 : mean=5.996642,variance=4.607561,std-dev=2.146523, count=67108863 on [2.000000,33.000000]
	08 : mean=5.986188,variance=4.664854,std-dev=2.159827, count=67108863 on [2.000000,32.000000]
	09 : mean=5.987023,variance=4.672484,std-dev=2.161593, count=67108863 on [2.000000,33.000000]
	// 1) "Tables of linear congruential generators of different sizes and good lattice structure" (1999), Pierre L'Ecuyer
	// 2) Pre-print version of (3) (https://arxiv.org/abs/2001.05304)
	// 3) "Computationally easy, spectrally good multipliers for congruential pseudorandom number generators" (2021), Guy L. Steele Jr. & Sebastiano Vigna (https://onlinelibrary.wiley.com/doi/epdf/10.1002/spe.3030)

	// inline comment number is bit width of constant

	const uint32_t lcg_mul_k_table_32[] =
	{
	// (3) Table 4 LCGs
	0x0000d9f5, // 16