TekuConcept/fp_math.cpp

## fp_math.cpp
/**
 * Created by TekuConcept on September 23, 2020
 */

#include <iostream>
#include <iomanip>
#include <cmath>
#include <cstdint>

// Q10(256) = 262144 := BPI (proportional representation of 2pi as a power of 2)
// Q10(M_PI) = 3217 (Actual Q10 value of pi to compare; 3.14159... * (1 << 10))
#define Q10_BPI_4  0x08000 // pi/4
#define Q10_BPI_2  0x10000 // pi/2
#define Q10_3BPI_4 0x18000 // 3pi/4
#define Q10_BPI    0x20000 // pi
#define Q10_2BPI   0x40000 // 2pi


/*********************************************************************************\
 * Fixed-Point Sqrt                                                              *
 * Input:  x in Q10                                                              *
 * Output: y in Q10                                                              *
 * Note:   precision represents actual precision left-shift one                  *
 *                                                                               *
 * std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005               *
 *                                                                               *
 * Based on: https://github.com/chmike/fpsqrt                                    *
 *      and: "Fixed Point Square Root" from http://citeseerx.ist.psu.edu/        *
\*********************************************************************************/

int32_t fp_sqrt(int32_t x, int8_t s precision = 20)
{
    uint32_t testDiv, root, count, rem;
    rem = x;
    root = 0;
    count = 0x10 << precision;

    do {
        testDiv = root + count;
        if (rem >= testDiv) {
            rem -= testDiv;
            root = testDiv + count;
        }
        rem <<= 1;
        count >>= 1;
    } while (count > 0x40);

    return root >> 8;
}
inline int32_t fp_invsqrt(int32_t x) { return ((0x400 << 10) / fp_sqrt(x)); } // not tested


/*********************************************************************************\
 * Fixed-Point Log2                                                              *
 * Input:  x in Q10                                                              *
 * Output: y in Q10                                                              *
 *                                                                               *
 * std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005               *
 *                                                                               *
 * Based on: https://github.com/dmoulding/log2fix                                *
 *      and: https://stackoverflow.com/questions/4657468                         *
\*********************************************************************************/

int32_t fp_log2(uint32_t x, int8_t precision = 10)
{
    int32_t b = 1 << (precision - 1);
    int32_t y = 0, i;
    uint64_t z = x;

    if (x == 0) return 0xFFFFFFFF;

    while (x < (1 << precision)) {
        x <<= 1;
        y -= (1 << precision);
    }

    while (x >= (2 << precision)) {
        x >>= 1;
        y += (1 << precision);
    }

    for (i = 0; i < precision; i++) {
        z = z * z >> precision;
        if (z >= 2 << (uint64_t)precision) {
            z >>= 1;
            y += b;
        }
        b >>= 1;
    }

    return y;
}
inline int32_t fp_log(int32_t x) { return (fp_log2(x) * (uint64_t)0x268826a1) >> 31; };
inline int32_t fp_ln(int32_t x) { return (fp_log2(x) * (uint64_t)0x58b90bfc) >> 31; };


/*********************************************************************************\
 * Fixed-Point Sine                                                              *
 * Input:  x in Q10 fp-radians [rad * (256 << 10) / (2pi)]                       *
 * Output: y in Q10                                                              *
 *                                                                               *
 * std::sin(theta) = fp_sin(theta * (256<<10) / (2*M_PI)) +/-0.012831            *
 *                                                                               *
 * fp_cos, fp_tan, fp_sec, fp_csc, fp_cot can all be derived from fp_sin         *
 * Based on: https://www.coranac.com/2009/07/sines/                              *
 * Fifth-order variant can be found here: https://www.nullhardware.com/          *
\*********************************************************************************/

int32_t
fp_sin(int32_t x)
{
    // S(x) = x * ( (3 << p) - (x * x >> r) ) >> s
    // n : Q-pos for quarter circle             13
    // A : Q-pos for output                     12
    // p : Q-pos for parentheses intermediate   15
    // r = 2n-p                                 11
    // s = A-1-p-n                              17

    static const int
        qN = 16,
        qA = 10,
        qP = 15,
        qR = 2 * qN - qP,
        qS = qN + qP + 1 - qA;

    x = x << (30 - qN); // shift to full s32 range (Q13->Q30)
    if((x ^ (x << 1)) < 0) // test for quadrant 1 or 2
        x = (1 << 31) - x;
    x = x >> (30 - qN);

    // 0.5z(3-z^2); z = 2x/pi
    return x * ((3 << qP) - ((int64_t)x * x >> qR)) >> qS;
}
inline int32_t fp_cos(int32_t x) { return fp_sin(x + Q10_BPI_2); }
inline int32_t fp_tan(int32_t x) { return (((int64_t)fp_sin(x) << 10) / fp_cos(x)); }
inline int32_t fp_sec(int32_t x) { return ((0x400 << 10) / fp_cos(x)); }
inline int32_t fp_csc(int32_t x) { return ((0x400 << 10) / fp_sin(x)); }
inline int32_t fp_cot(int32_t x) { return (((int64_t)fp_cos(x) << 10) / fp_sin(x)); }


/*********************************************************************************\
 * Fixed-Point ArcCos                                                            *
 * Input:  x in Q10                                                              *
 * Output: y in Q10 fp-radians [rad * (256 << 10) / (2pi)]                       *
 *                                                                               *
 * std::acos(x) = fp_acos(x) / (float)(1 << 10)                                  *
 *                                                                               *
 * fp_asin can be derived from fp_acos                                           *
 * Based on: https://stackoverflow.com/questions/7378187/                        *
 * Originally from https://developer.nvidia.com/                                 *
\*********************************************************************************/

int32_t
fp_acos(int32_t x) {
    static const int8_t precision = 10;
    int8_t sign = (x < 0) ? -1 : 1;
    int64_t abs_x, y;

    abs_x = abs(x);
    abs_x %= (1 << precision) + 1;

    // right-shift to preserve
    // precision after multiply
    y = ((-19 * abs_x) >> precision) + 76;
    y = ((y * abs_x) >> precision) - 217;
    y = ((y * abs_x) >> precision) + 1608;
    y = (y * fp_sqrt(1024 - abs_x)) >> 10;
    y = y + (2 * sign * y);
    y = y + sign * 3217;

    return y;
}
inline int32_t fp_asin(int32_t x) { return (fp_acos(-x) - Q10_BPI_2); }


/*********************************************************************************\
 * Fixed-Point ArcTan2                                                           *
 * Input:  x in Q10 fp-radians [rad * (256 << 10) / (2pi)]                       *
 * Output: y in Q10                                                              *
 *                                                                               *
 * std::atan2(y, x) = fp_atan2(fp_y, fp_x) / (float)(1 << 10) +/-0.00885         *
 *                                                                               *
 * fp_asin, fp_acos, fp_asec, fp_acsc, fp_acot can all be derived from fp_atan2  *
 * Based on: https://developer.download.nvidia.com/cg/atan2.html                 *
\*********************************************************************************/

int32_t
fp_atan2(int32_t y, int32_t x)
{
    static const int8_t precision = 10;
    int32_t t0, t1, t2, t3, t4;

    t3 = abs(x);
    t1 = abs(y);
    t0 = (t3 > t1) ? t3 : t1;
    t1 = (t3 < t1) ? t3 : t1;
    t3 = (0x400 << precision) / t0;
    t3 = (t3 * t1) >> precision;
    t4 = (t3 * t3) >> precision;
    t0 =                        - 14;   // Q10(0.013480470) = 14
    t0 = ((t0 * t4)>>precision) + 59;   // Q10(0.057477314) = 59
    t0 = ((t0 * t4)>>precision) - 124;  // Q10(0.121239071) = 124
    t0 = ((t0 * t4)>>precision) + 200;  // Q10(0.195635925) = 200
    t0 = ((t0 * t4)>>precision) - 341;  // Q10(0.332994597) = 341
    t0 = ((t0 * t4)>>precision) + 1024; // Q10(0.999995630) = 1024
    t3 = ((t0 * t3)>>precision);
    t3 = (abs(y) > abs(x)) ? 1608 - t3 : t3; // Q10(1.570796327) = 1608
    t3 = (x < 0) ? 3217 - t3 : t3;           // Q10(3.141592654) = 3217
    t3 = (y < 0) ? -t3 : t3;

    return t3;
}
// fp_asin(x) := fp_atan2(x, fp_sqrt(1 - x * x))
// fp_acos(x) := fp_atan2(fp_sqrt(1 - x * x), x)
inline int32_t fp_atan(int32_t x) { return fp_atan2(x, 0x400); } // not tested - theoretical
inline int32_t fp_asec(int32_t x) { return fp_acos((0x400 << 10) / x); }
inline int32_t fp_acsc(int32_t x) { return fp_asin((0x400 << 10) / x); }
inline int32_t fp_acot(x) { return (fp_atan(x) - (x > 0 ? 0 : Q10_BPI)); }


#include <iostream>
#include <iomanip>
#define DCELL(c) std::dec << std::left << std::setw(c)
int main() {
    std::cout << "-- BEGIN DEMO -- \n";

    int32_t bigree = 1 << 10; // binary-degree
    int32_t full_revolution = bigree * 256;

    for (int32_t a = 0; a < (full_revolution + bigree * 10); a += (2 * bigree)) {
        float fx = std::cos(i * 2 * M_PI / (float)full_revolution);
        float fy = std::sin(i * 2 * M_PI / (float)full_revolution);
        int32_t ix = fp_cos(i);
        int32_t iy = fp_sin(i);

        // -- fp_sqrt example --
        std::cout << DCELL(12) << std::sqrt(a / (float)bigree);
        std::cout << DCELL(12) << (fp_sqrt(a) / (float)bigree);

        // -- fp_log2 example --
        std::cout << DCELL(12) << std::log2(i / (float)bigree);
        std::cout << DCELL(12) << (fp_log2(i) / (float)bigree);

        // -- fp_sin example --
        std::cout << DCELL(12) << std::sin(i * 2 * M_PI / (float)full_revolution);
        std::cout << DCELL(12) << (fp_sin(i) / (float)bigree);

        // -- fp_acos example --
        std::cout << DCELL(12) << std::acos(fx);
        std::cout << DCELL(12) << (fp_acos(ix) / (float)(1 << 10));

        // -- fp_atan2 example --
        std::cout << DCELL(12) << std::atan2(fy, fx);
        std::cout << DCELL(12) << (fp_atan2(iy, ix) / (float)bigree);
    }

    std::cout << "-- END OF DEMO --" << std::endl;
    return 0;
}
	/**
	* Created by TekuConcept on September 23, 2020
	*/

	#include <iostream>
	#include <iomanip>
	#include <cmath>
	#include <cstdint>

	// Q10(256) = 262144 := BPI (proportional representation of 2pi as a power of 2)
	// Q10(M_PI) = 3217 (Actual Q10 value of pi to compare; 3.14159... * (1 << 10))
	#define Q10_BPI_4 0x08000 // pi/4
	#define Q10_BPI_2 0x10000 // pi/2
	#define Q10_3BPI_4 0x18000 // 3pi/4
	#define Q10_BPI 0x20000 // pi
	#define Q10_2BPI 0x40000 // 2pi





	/*********************************************************************************\
	* Fixed-Point Sqrt *
	* Input: x in Q10 *
	* Output: y in Q10 *
	* Note: precision represents actual precision left-shift one *
	* *
	* std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005 *
	* *
	* Based on: https://github.com/chmike/fpsqrt *
	* and: "Fixed Point Square Root" from http://citeseerx.ist.psu.edu/ *
	\*********************************************************************************/

	int32_t fp_sqrt(int32_t x, int8_t s precision = 20)
	{
	uint32_t testDiv, root, count, rem;
	rem = x;
	root = 0;
	count = 0x10 << precision;

	do {
	testDiv = root + count;
	if (rem >= testDiv) {
	rem -= testDiv;
	root = testDiv + count;
	}
	rem <<= 1;
	count >>= 1;
	} while (count > 0x40);

	return root >> 8;
	}
	inline int32_t fp_invsqrt(int32_t x) { return ((0x400 << 10) / fp_sqrt(x)); } // not tested





	/*********************************************************************************\
	* Fixed-Point Log2 *
	* Input: x in Q10 *
	* Output: y in Q10 *
	* *
	* std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005 *
	* *
	* Based on: https://github.com/dmoulding/log2fix *
	* and: https://stackoverflow.com/questions/4657468 *
	\*********************************************************************************/

	int32_t fp_log2(uint32_t x, int8_t precision = 10)
	{
	int32_t b = 1 << (precision - 1);
	int32_t y = 0, i;
	uint64_t z = x;

	if (x == 0) return 0xFFFFFFFF;

	while (x < (1 << precision)) {
	x <<= 1;
	y -= (1 << precision);
	}

	while (x >= (2 << precision)) {
	x >>= 1;
	y += (1 << precision);
	}

	for (i = 0; i < precision; i++) {
	z = z * z >> precision;
	if (z >= 2 << (uint64_t)precision) {
	z >>= 1;
	y += b;
	}
	b >>= 1;
	}

	return y;
	}
	inline int32_t fp_log(int32_t x) { return (fp_log2(x) * (uint64_t)0x268826a1) >> 31; };
	inline int32_t fp_ln(int32_t x) { return (fp_log2(x) * (uint64_t)0x58b90bfc) >> 31; };





	/*********************************************************************************\
	* Fixed-Point Sine *
	* Input: x in Q10 fp-radians [rad * (256 << 10) / (2pi)] *
	* Output: y in Q10 *
	* *
	* std::sin(theta) = fp_sin(theta * (256<<10) / (2M_PI)) +/-0.012831
	* *
	* fp_cos, fp_tan, fp_sec, fp_csc, fp_cot can all be derived from fp_sin *
	* Based on: https://www.coranac.com/2009/07/sines/ *
	* Fifth-order variant can be found here: https://www.nullhardware.com/ *
	\*********************************************************************************/

	int32_t
	fp_sin(int32_t x)
	{
	// S(x) = x * ( (3 << p) - (x * x >> r) ) >> s
	// n : Q-pos for quarter circle 13
	// A : Q-pos for output 12
	// p : Q-pos for parentheses intermediate 15
	// r = 2n-p 11
	// s = A-1-p-n 17

	static const int
	qN = 16,
	qA = 10,
	qP = 15,
	qR = 2 * qN - qP,
	qS = qN + qP + 1 - qA;

	x = x << (30 - qN); // shift to full s32 range (Q13->Q30)
	if((x ^ (x << 1)) < 0) // test for quadrant 1 or 2
	x = (1 << 31) - x;
	x = x >> (30 - qN);

	// 0.5z(3-z^2); z = 2x/pi
	return x * ((3 << qP) - ((int64_t)x * x >> qR)) >> qS;
	}
	inline int32_t fp_cos(int32_t x) { return fp_sin(x + Q10_BPI_2); }
	inline int32_t fp_tan(int32_t x) { return (((int64_t)fp_sin(x) << 10) / fp_cos(x)); }
	inline int32_t fp_sec(int32_t x) { return ((0x400 << 10) / fp_cos(x)); }
	inline int32_t fp_csc(int32_t x) { return ((0x400 << 10) / fp_sin(x)); }
	inline int32_t fp_cot(int32_t x) { return (((int64_t)fp_cos(x) << 10) / fp_sin(x)); }





	/*********************************************************************************\
	* Fixed-Point ArcCos *
	* Input: x in Q10 *
	* Output: y in Q10 fp-radians [rad * (256 << 10) / (2pi)] *
	* *
	* std::acos(x) = fp_acos(x) / (float)(1 << 10) *
	* *
	* fp_asin can be derived from fp_acos *
	* Based on: https://stackoverflow.com/questions/7378187/ *
	* Originally from https://developer.nvidia.com/ *
	\*********************************************************************************/

	int32_t
	fp_acos(int32_t x) {
	static const int8_t precision = 10;
	int8_t sign = (x < 0) ? -1 : 1;
	int64_t abs_x, y;

	abs_x = abs(x);
	abs_x %= (1 << precision) + 1;

	// right-shift to preserve
	// precision after multiply
	y = ((-19 * abs_x) >> precision) + 76;
	y = ((y * abs_x) >> precision) - 217;
	y = ((y * abs_x) >> precision) + 1608;
	y = (y * fp_sqrt(1024 - abs_x)) >> 10;
	y = y + (2 * sign * y);
	y = y + sign * 3217;

	return y;
	}
	inline int32_t fp_asin(int32_t x) { return (fp_acos(-x) - Q10_BPI_2); }





	/*********************************************************************************\
	* Fixed-Point ArcTan2 *
	* Input: x in Q10 fp-radians [rad * (256 << 10) / (2pi)] *
	* Output: y in Q10 *
	* *
	* std::atan2(y, x) = fp_atan2(fp_y, fp_x) / (float)(1 << 10) +/-0.00885 *
	* *
	* fp_asin, fp_acos, fp_asec, fp_acsc, fp_acot can all be derived from fp_atan2 *
	* Based on: https://developer.download.nvidia.com/cg/atan2.html *
	\*********************************************************************************/

	int32_t
	fp_atan2(int32_t y, int32_t x)
	{
	static const int8_t precision = 10;
	int32_t t0, t1, t2, t3, t4;

	t3 = abs(x);
	t1 = abs(y);
	t0 = (t3 > t1) ? t3 : t1;
	t1 = (t3 < t1) ? t3 : t1;
	t3 = (0x400 << precision) / t0;
	t3 = (t3 * t1) >> precision;
	t4 = (t3 * t3) >> precision;
	t0 = - 14; // Q10(0.013480470) = 14
	t0 = ((t0 * t4)>>precision) + 59; // Q10(0.057477314) = 59
	t0 = ((t0 * t4)>>precision) - 124; // Q10(0.121239071) = 124
	t0 = ((t0 * t4)>>precision) + 200; // Q10(0.195635925) = 200
	t0 = ((t0 * t4)>>precision) - 341; // Q10(0.332994597) = 341
	t0 = ((t0 * t4)>>precision) + 1024; // Q10(0.999995630) = 1024
	t3 = ((t0 * t3)>>precision);
	t3 = (abs(y) > abs(x)) ? 1608 - t3 : t3; // Q10(1.570796327) = 1608
	t3 = (x < 0) ? 3217 - t3 : t3; // Q10(3.141592654) = 3217
	t3 = (y < 0) ? -t3 : t3;

	return t3;
	}
	// fp_asin(x) := fp_atan2(x, fp_sqrt(1 - x * x))
	// fp_acos(x) := fp_atan2(fp_sqrt(1 - x * x), x)
	inline int32_t fp_atan(int32_t x) { return fp_atan2(x, 0x400); } // not tested - theoretical
	inline int32_t fp_asec(int32_t x) { return fp_acos((0x400 << 10) / x); }
	inline int32_t fp_acsc(int32_t x) { return fp_asin((0x400 << 10) / x); }
	inline int32_t fp_acot(x) { return (fp_atan(x) - (x > 0 ? 0 : Q10_BPI)); }





	#include <iostream>
	#include <iomanip>
	#define DCELL(c) std::dec << std::left << std::setw(c)
	int main() {
	std::cout << "-- BEGIN DEMO -- \n";

	int32_t bigree = 1 << 10; // binary-degree
	int32_t full_revolution = bigree * 256;

	for (int32_t a = 0; a < (full_revolution + bigree * 10); a += (2 * bigree)) {
	float fx = std::cos(i * 2 * M_PI / (float)full_revolution);
	float fy = std::sin(i * 2 * M_PI / (float)full_revolution);
	int32_t ix = fp_cos(i);
	int32_t iy = fp_sin(i);

	// -- fp_sqrt example --
	std::cout << DCELL(12) << std::sqrt(a / (float)bigree);
	std::cout << DCELL(12) << (fp_sqrt(a) / (float)bigree);

	// -- fp_log2 example --
	std::cout << DCELL(12) << std::log2(i / (float)bigree);
	std::cout << DCELL(12) << (fp_log2(i) / (float)bigree);

	// -- fp_sin example --
	std::cout << DCELL(12) << std::sin(i * 2 * M_PI / (float)full_revolution);
	std::cout << DCELL(12) << (fp_sin(i) / (float)bigree);

	// -- fp_acos example --
	std::cout << DCELL(12) << std::acos(fx);
	std::cout << DCELL(12) << (fp_acos(ix) / (float)(1 << 10));

	// -- fp_atan2 example --
	std::cout << DCELL(12) << std::atan2(fy, fx);
	std::cout << DCELL(12) << (fp_atan2(iy, ix) / (float)bigree);
	}

	std::cout << "-- END OF DEMO --" << std::endl;
	return 0;
	}