davmac314/ffdiv.cc

## ffdiv.cc
// Fast fixed-dividend divider generator in C++11
// by Davin McCall <davmac@davmac.org>
//
// Division is a relatively very slow operation even on modern processors. This
// template (ffdiv) produces a divider that will divide any number up to a fixed
// limit by a fixed dividend, using a multiplication and shift operation. I
// measured this as more than twice as fast as using ordinary division.
//
// Compile with (for eg) "gcc -std=c++11".
//
// I hereby grant license to use this and distribute this code for any purpose,
// without restriction.

#include <iostream>


// bits_in: calculate the number of significant bits in an operand
template <unsigned long long n, unsigned bits = 0> class bits_in;

template <unsigned bits> class bits_in<0, bits>
{
    public:
    const static unsigned val = bits;
};

template <unsigned long long n, unsigned bits> class bits_in
{
    const static unsigned long long shiftone = n >> 1u;

    public:
    const static unsigned val = bits_in<shiftone, bits + 1>::val;
};


// ffdiv: fast fixed-dividend divider for the given maximum value and dividend
//
//   T  - type in which to perform math (use a larger type if assertions fail)
//
template <unsigned long long maxval, unsigned long long divisor, typename T = unsigned> class ffdiv
{
    const static T maxmult = ((T) -1) / maxval;

    const static unsigned shift = bits_in<maxmult * divisor>::val - 1;
    const static T n = (T)1 << shift;
    const static T mult = n / divisor + 1;
    const static T actual_N9_offs = ((divisor - 1) * mult) % n;
    const static T per10_diff = (divisor * mult) % n;
    const static T errpoint = ((n - actual_N9_offs) / per10_diff + 2) * divisor - 1;

    // The 'errpoint' tells us where our approximation will fail.
    //   (n - actual_N9_offs) is the remainder of ((divisor -1 ) * mult) divided by n (the shift divider),
    //      subtracted from n. Look at it as the amount of leeway before error causes overflow.
    //   per10_diff represents the error per (divisor) in the dividend. Once this error reaches the
    //      leeway, we'll get overflow. That will always happen on a divisor boundary, -1.

    static_assert(errpoint > maxval, "cannot create a fast divider for given parameters (A)");
    static_assert(maxval * mult / mult == maxval, "cannot create a fast divider for given parameters (B)");

    public:

    static T divide(T dividend)
    {
        return dividend * mult >> shift;
    }
};


int main()
{
    using ff = ffdiv<100000,27,unsigned long>;

    // Example with output:
    std::cout << "9990 / 27 = " << ff::divide(9990) << std::endl;

    // Test:
    for (unsigned i = 0; i < 100000; i++) {
        if (ff::divide(i) != i / 27) {
            std::cout << "Test failed for dividend = " << i << std::endl;
            std::cout << "   result = " << ff::divide(i) << std::endl;
            std::cout << "   i/27 = " << (i / 27) << std::endl;
            return 1;
        }
    }

    std::cout << "Test passed." << std::endl;
    return 0;
}
	// Fast fixed-dividend divider generator in C++11
	// by Davin McCall <davmac@davmac.org>
	//
	// Division is a relatively very slow operation even on modern processors. This
	// template (ffdiv) produces a divider that will divide any number up to a fixed
	// limit by a fixed dividend, using a multiplication and shift operation. I
	// measured this as more than twice as fast as using ordinary division.
	//
	// Compile with (for eg) "gcc -std=c++11".
	//
	// I hereby grant license to use this and distribute this code for any purpose,
	// without restriction.

	#include <iostream>


	// bits_in: calculate the number of significant bits in an operand
	template <unsigned long long n, unsigned bits = 0> class bits_in;

	template <unsigned bits> class bits_in<0, bits>
	{
	public:
	const static unsigned val = bits;
	};

	template <unsigned long long n, unsigned bits> class bits_in
	{
	const static unsigned long long shiftone = n >> 1u;

	public:
	const static unsigned val = bits_in<shiftone, bits + 1>::val;
	};


	// ffdiv: fast fixed-dividend divider for the given maximum value and dividend
	//
	// T - type in which to perform math (use a larger type if assertions fail)
	//
	template <unsigned long long maxval, unsigned long long divisor, typename T = unsigned> class ffdiv
	{
	const static T maxmult = ((T) -1) / maxval;

	const static unsigned shift = bits_in<maxmult * divisor>::val - 1;
	const static T n = (T)1 << shift;
	const static T mult = n / divisor + 1;
	const static T actual_N9_offs = ((divisor - 1) * mult) % n;
	const static T per10_diff = (divisor * mult) % n;
	const static T errpoint = ((n - actual_N9_offs) / per10_diff + 2) * divisor - 1;

	// The 'errpoint' tells us where our approximation will fail.
	// (n - actual_N9_offs) is the remainder of ((divisor -1 ) * mult) divided by n (the shift divider),
	// subtracted from n. Look at it as the amount of leeway before error causes overflow.
	// per10_diff represents the error per (divisor) in the dividend. Once this error reaches the
	// leeway, we'll get overflow. That will always happen on a divisor boundary, -1.

	static_assert(errpoint > maxval, "cannot create a fast divider for given parameters (A)");
	static_assert(maxval * mult / mult == maxval, "cannot create a fast divider for given parameters (B)");

	public:

	static T divide(T dividend)
	{
	return dividend * mult >> shift;
	}
	};


	int main()
	{
	using ff = ffdiv<100000,27,unsigned long>;

	// Example with output:
	std::cout << "9990 / 27 = " << ff::divide(9990) << std::endl;

	// Test:
	for (unsigned i = 0; i < 100000; i++) {
	if (ff::divide(i) != i / 27) {
	std::cout << "Test failed for dividend = " << i << std::endl;
	std::cout << " result = " << ff::divide(i) << std::endl;
	std::cout << " i/27 = " << (i / 27) << std::endl;
	return 1;
	}
	}

	std::cout << "Test passed." << std::endl;
	return 0;
	}