nestordemeure/cpp

## cpp
/*
Most people use Kahan summation[0] when they want a higher precision sum.
Mostly because it is the only thing they know.

Here is an alternative that is:
- almost as fast (while having no unpredictable branch),
- significantly more precise (about as precise as doubling the floating point precision with added resilience to cancelations)
- very easy to implement in your favorite programming language.

For more information, I highly recommend reading the Handbook of floating-point arithmetic.

[0]: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
*/

#include <vector>

// computes the error introduced during a floating point sum
// the main difference with the error transformation used in Kahan's summation is that this one does not expect one of the terms to be larger than the other
// WARNINGS:
// - requires rounding to nearest (the default on modern processors)
//   see Priest's version in the Handbook of floating point arithmetic if you need a version valid for all rounding modes
// - the volatile keyword is there to avoid the operation being optimized away by a compiler using associativity rules
//   see: https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Possible_invalidation_by_compiler_optimization
//   (on most compilers, it is only useful with the stronger optimisation flags)
template <typename T>
inline const T TwoSum(const T n1, const T n2, const volatile T result)
{
    const T n22 = result - n1;
    const T n11 = result - n22;
    const volatile T epsilon2 = n2 - n22;
    const volatile T epsilon1 = n1 - n11;
    const T error = epsilon1 + epsilon2;
    return error;
}

// a compensated, numerically stable, summation algorithm
double compensatedSummation(std::vector<double>& numbers)
{
    double result = 0.0;
    double error = 0.0;

    // computes the sum AND the error introduced by each addition
    for(double x: numbers)
    {
        const double previousResult = result;
        result += x;
        error += TwoSum(x, previousResult, result);
    }

    // corrects the floating-point result by correcting it with its total error
    return result + error;
}
	/*
	Most people use Kahan summation[0] when they want a higher precision sum.
	Mostly because it is the only thing they know.

	Here is an alternative that is:
	- almost as fast (while having no unpredictable branch),
	- significantly more precise (about as precise as doubling the floating point precision with added resilience to cancelations)
	- very easy to implement in your favorite programming language.

	For more information, I highly recommend reading the Handbook of floating-point arithmetic.

	[0]: https://en.wikipedia.org/wiki/Kahan_summation_algorithm
	*/

	#include <vector>

	// computes the error introduced during a floating point sum
	// the main difference with the error transformation used in Kahan's summation is that this one does not expect one of the terms to be larger than the other
	// WARNINGS:
	// - requires rounding to nearest (the default on modern processors)
	// see Priest's version in the Handbook of floating point arithmetic if you need a version valid for all rounding modes
	// - the volatile keyword is there to avoid the operation being optimized away by a compiler using associativity rules
	// see: https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Possible_invalidation_by_compiler_optimization
	// (on most compilers, it is only useful with the stronger optimisation flags)
	template <typename T>
	inline const T TwoSum(const T n1, const T n2, const volatile T result)
	{
	const T n22 = result - n1;
	const T n11 = result - n22;
	const volatile T epsilon2 = n2 - n22;
	const volatile T epsilon1 = n1 - n11;
	const T error = epsilon1 + epsilon2;
	return error;
	}

	// a compensated, numerically stable, summation algorithm
	double compensatedSummation(std::vector<double>& numbers)
	{
	double result = 0.0;
	double error = 0.0;

	// computes the sum AND the error introduced by each addition
	for(double x: numbers)
	{
	const double previousResult = result;
	result += x;
	error += TwoSum(x, previousResult, result);
	}

	// corrects the floating-point result by correcting it with its total error
	return result + error;
	}