Protonk/softsqrt.c

## softsqrt.c
/* Kahan and Ng's SoftSqrt
See http://www.netlib.org/fdlibm/e_sqrt.c
or https://adampunk.com/documents/softsqrt.pdf (for the original)
specifically section B. 1 of the comment block labeled "other methods"

Method for operating in a logarithmic domain described in
section 2.3 "A Poor Man's Logarithm" of
J. T. COONEN, "CONTRIBUTIONS TO A PROPOSED STANDARD FOR BINARY
FLOATING-POINT ARITHMETIC (COMPUTER ARITHMETIC). PhD Thesis,
University of California Berkeley, 1984.

Noteworthy: behavior of the errors depends deeply upon
the log-uniform distribution of floating point fraction digits.
Coonen 1984 (p 117) cites Knuth TAOCP Vol 2 (2nd edition) pp. 238-247

Imagine an IEEE-754 double precision floating point number x.
For our purposes x is a positive normal number.

x is made up of 64 bits. The first is a sign bit, the next
11 bits make up the exponent, then the last 52 the fraction:

    1    11		     52				  ...widths
    ------------------------------------------------------
x: |s|	  e     |	      f				|
    ------------------------------------------------------
     msb     lsb msb				      lsb ...order

If we split x into 32 bit halves the more significant half
will contain the sign bit, the exponent, and 20 bits of the
fraction. Call this portion x0. We won't use x1 any more
in the code below, though Kahan and Ng use it for accuracy
checking of roots.

    ------------------------
x0: |s|   e    |    f1     |
    ------------------------
We have constrained x to be a positive normal
number so the sign bit is always 1 and the
exponent bits are at least 00000000001.

The fragment x0 is shorter than a double but is still
"lexicographically ordered up and down the number
system as sign magnitude fixed point numbers" (Jerome Coonen,
personal correspondence) a characteristic preserved if
we cut off the bottom 32 bits of the fraction.

See also a version by Linus which is a complete port.
Alpha linux's glibc had a tuned assembly version for nearly
two decades (link is too long to put here)
http://lkml.iu.edu/hypermail/linux/alpha/9512.2/0027.html
*/
/* Use standard integer types. The original version used
unsigned long longs and so forth. This change does not
impact functionality or internal working of the code. */
#include <stdint.h>
/* Lookup-table is shaped like this over 64 values:
                                                     ▄▓▀▓▄
                                                   ╔█`   ╙█m
                                                  ╫▌      "█▒
                          ╓▄▄æ╗▄╖                ╫▌        └█µ
                       ▄▓▀`      ╙▓▄            ╟█          ║█
                    ,▓▀└           ╙▓▄         ▐█            █▒
                   ▄▓`               ╚█       .█╙            ▐█
                 ╓█╙                  └█ε     ╫▌              ╫▌
                ▄▀                      █▄   ▐█               ╚█
              .▓╩                       `█▄  █╨                █▒
             ╓█`                         └█ε╫█                 ╚█
            ╔█`                           ╙██Γ                 ⌠█ε
           ╔█                              ╙▀                   ╟▌
          ▄█                                                    ║█
         ║█                                                      █⌐
        ▄█                                                       ╟▌
       ▐█                                                        ║█
      ╔█                                                         ]█ε
     ]█⌐                                                          ╫▌
    ,█╙                                                           ╟▌
   .▓╩                                                            ╚█
*/
static uint32_t lookup[64]= {
    0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
    0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
    0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
    0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
    0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
    0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
    0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
    0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd
};

double KahanNgRefactored(double x) {
    double y;
    uint32_t xUPPER, yUPPER;
    uint64_t xINT, yINT;

/* Convert double which is 64 bits wide to a 64 bit unsigned integer
Below is one of the older sources on this technique in digital computing
of "evil floating point bit level hacking" to get an approximate logarithm
from floating-point/integer
relationships: https://doi.org/10.1109/TEC.1962.5219391 */
    xINT = *( uint64_t *) &x;
/* Extract only the first half of the bits of the integer. In the original
version, the lower 32 bits are retained to check that the result
is accurate to the last place. Here we discard them */
    xUPPER = (xINT & 0xffffffff00000000) >> 32;
/* From Stackoverflow: https://stackoverflow.com/a/70735267/1188479
    "Recall the exponent encoding needs to have a bias of 1023 from the
     exponent, but we cut it in half and negated it, so - (x0 >> 1) has
     a bias of −511½ that needs to be adjusted to +1023, so we need to
     add 1534½. That needs to go into the exponent field, which is at
     bits 62 to 52 in the 64-bit double encoding, so at 30 to 20 in this
     32-bit portion from the high half. So we need to add 1534½ starting
     at bit 20, or 1534½•220 = 1,609,039,872 = 5FE8000016." */
    xUPPER = 0x5fe80000 - (xUPPER >> 1);
/* lookup is indexed by mostly by exponent bits. Only 5 highest bits from
mantissa are used to index it. See the above stack overflow answer for a
detailed discussion of the indexing strategy */
    yUPPER = xUPPER - lookup[63 & (xUPPER >> 14)];
/* Pad with zeros for the LS 32 bits, convert back to uint64_t in preparation
for casting back to double */
    yINT = ((uint64_t) yUPPER << 32);
/* Get double from bits making up uint64_t */
    y = *( double* ) &yINT;

/* One step of Netwon-Raphson iteration. See section 2 of the Kahan Ng 1986
paper for specific constant choice. The original function continues through
more iterations of newton's method and operations using the least significant
half of the input bits, which I did not port.  */
    return y * (1.5f - powf(2, -30) - (0.5f * x * y * y));
}
	/* Kahan and Ng's SoftSqrt
	See http://www.netlib.org/fdlibm/e_sqrt.c
	or https://adampunk.com/documents/softsqrt.pdf (for the original)
	specifically section B. 1 of the comment block labeled "other methods"

	Method for operating in a logarithmic domain described in
	section 2.3 "A Poor Man's Logarithm" of
	J. T. COONEN, "CONTRIBUTIONS TO A PROPOSED STANDARD FOR BINARY
	FLOATING-POINT ARITHMETIC (COMPUTER ARITHMETIC). PhD Thesis,
	University of California Berkeley, 1984.

	Noteworthy: behavior of the errors depends deeply upon
	the log-uniform distribution of floating point fraction digits.
	Coonen 1984 (p 117) cites Knuth TAOCP Vol 2 (2nd edition) pp. 238-247

	Imagine an IEEE-754 double precision floating point number x.
	For our purposes x is a positive normal number.

	x is made up of 64 bits. The first is a sign bit, the next
	11 bits make up the exponent, then the last 52 the fraction:

	1 11 52 ...widths
	------------------------------------------------------
	x: \|s\| e \| f \|
	------------------------------------------------------
	msb lsb msb lsb ...order

	If we split x into 32 bit halves the more significant half
	will contain the sign bit, the exponent, and 20 bits of the
	fraction. Call this portion x0. We won't use x1 any more
	in the code below, though Kahan and Ng use it for accuracy
	checking of roots.

	------------------------
	x0: \|s\| e \| f1 \|
	------------------------
	We have constrained x to be a positive normal
	number so the sign bit is always 1 and the
	exponent bits are at least 00000000001.

	The fragment x0 is shorter than a double but is still
	"lexicographically ordered up and down the number
	system as sign magnitude fixed point numbers" (Jerome Coonen,
	personal correspondence) a characteristic preserved if
	we cut off the bottom 32 bits of the fraction.

	See also a version by Linus which is a complete port.
	Alpha linux's glibc had a tuned assembly version for nearly
	two decades (link is too long to put here)
	http://lkml.iu.edu/hypermail/linux/alpha/9512.2/0027.html
	*/
	/* Use standard integer types. The original version used
	unsigned long longs and so forth. This change does not
	impact functionality or internal working of the code. */
	#include <stdint.h>
	/* Lookup-table is shaped like this over 64 values:
	▄▓▀▓▄
	╔█` ╙█m
	╫▌ "█▒
	╓▄▄æ╗▄╖ ╫▌ └█µ
	▄▓▀` ╙▓▄ ╟█ ║█
	,▓▀└ ╙▓▄ ▐█ █▒
	▄▓` ╚█ .█╙ ▐█
	╓█╙ └█ε ╫▌ ╫▌
	▄▀ █▄ ▐█ ╚█
	.▓╩ `█▄ █╨ █▒
	╓█` └█ε╫█ ╚█
	╔█` ╙██Γ ⌠█ε
	╔█ ╙▀ ╟▌
	▄█ ║█
	║█ █⌐
	▄█ ╟▌
	▐█ ║█
	╔█ ]█ε
	]█⌐ ╫▌
	,█╙ ╟▌
	.▓╩ ╚█
	*/
	static uint32_t lookup[64]= {
	0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
	0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
	0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd
	};

	double KahanNgRefactored(double x) {
	double y;
	uint32_t xUPPER, yUPPER;
	uint64_t xINT, yINT;

	/* Convert double which is 64 bits wide to a 64 bit unsigned integer
	Below is one of the older sources on this technique in digital computing
	of "evil floating point bit level hacking" to get an approximate logarithm
	from floating-point/integer
	relationships: https://doi.org/10.1109/TEC.1962.5219391 */
	xINT = ( uint64_t ) &x;
	/* Extract only the first half of the bits of the integer. In the original
	version, the lower 32 bits are retained to check that the result
	is accurate to the last place. Here we discard them */
	xUPPER = (xINT & 0xffffffff00000000) >> 32;
	/* From Stackoverflow: https://stackoverflow.com/a/70735267/1188479
	"Recall the exponent encoding needs to have a bias of 1023 from the
	exponent, but we cut it in half and negated it, so - (x0 >> 1) has
	a bias of −511½ that needs to be adjusted to +1023, so we need to
	add 1534½. That needs to go into the exponent field, which is at
	bits 62 to 52 in the 64-bit double encoding, so at 30 to 20 in this
	32-bit portion from the high half. So we need to add 1534½ starting
	at bit 20, or 1534½•220 = 1,609,039,872 = 5FE8000016." */
	xUPPER = 0x5fe80000 - (xUPPER >> 1);
	/* lookup is indexed by mostly by exponent bits. Only 5 highest bits from
	mantissa are used to index it. See the above stack overflow answer for a
	detailed discussion of the indexing strategy */
	yUPPER = xUPPER - lookup[63 & (xUPPER >> 14)];
	/* Pad with zeros for the LS 32 bits, convert back to uint64_t in preparation
	for casting back to double */
	yINT = ((uint64_t) yUPPER << 32);
	/* Get double from bits making up uint64_t */
	y = ( double ) &yINT;

	/* One step of Netwon-Raphson iteration. See section 2 of the Kahan Ng 1986
	paper for specific constant choice. The original function continues through
	more iterations of newton's method and operations using the least significant
	half of the input bits, which I did not port. */
	return y * (1.5f - powf(2, -30) - (0.5f * x * y * y));
	}