Eleobert/ranking.hpp

## ranking.hpp
#include <iostream>
#include <algorithm>
#include <cassert>
#include <vector>
#include <numeric>
#include <cmath>
#include <limits>


template<typename T>
auto print(const T& c)
{
    for(auto x: c)
    {
        std::cout << x << ' ';
    }
    std::cout << std::endl;
}

namespace rank
{

enum class nan_policy
{
    none, first, last
};


namespace details
{

template<typename RandomAcess>
auto index_sort(const RandomAcess& rng)
{
    auto indices = std::vector<size_t>(rng.size());

    std::iota(indices.begin(), indices.end(), 0ul);

    std::sort(indices.begin(), indices.end(), [&rng](auto i, auto j)
    {
        return rng[i] < rng[j];
    });

    return indices;
}

template<typename RandomAcess>
auto sanitize_nans(RandomAcess& rng, nan_policy policy)
{

    using value_type = typename RandomAcess::value_type;

    if (std::is_integral_v<value_type> || policy == nan_policy::none)
    {
        return;
    }

    // If you want nans with the same rank just replace them with
    // numeric limits max or min.

    constexpr auto max = std::numeric_limits<value_type>::max();
    constexpr auto epsilon = std::numeric_limits<value_type>::epsilon();

    auto nan = (policy == nan_policy::last)? max: -max;

    // This assigns a different rank for each nan (R like)
    for(int i = rng.size() - 1; i >= 0; i--)
    {
        if(std::isnan(rng[i]))
        {
            rng[i] = nan;
            nan = std::nextafter(nan, -nan);
        }
    }
}

}

template<typename OutRandomAcess = std::vector<int>, typename RandomAcess>
auto dense(RandomAcess vec, nan_policy policy = nan_policy::none) -> OutRandomAcess
{
    assert(vec.empty() == false);

    details::sanitize_nans(vec, policy);

    const auto n = vec.size();
    const auto indices = details::index_sort(vec);
    auto res = OutRandomAcess(n);

    auto rank = 1;
    res[indices[0]] = rank;

    for(auto i = 1ul; i < n; i++)
    {
        rank += vec[indices[i]] != vec[indices[i - 1]];
        res[indices[i]] = rank;
    }
    return res;
}


template<typename OutRandomAcess = std::vector<float>, typename RandomAcess>
auto average(RandomAcess vec, nan_policy policy = nan_policy::none
            ) -> OutRandomAcess
{
    static_assert(std::is_integral_v<typename OutRandomAcess::value_type> == false);
    details::sanitize_nans(vec, policy);

    assert(vec.empty() == false);

    const auto n = vec.size();
    const auto indices = details::index_sort(vec);
    auto res = OutRandomAcess(n);

    auto start = 0;
    auto end   = 0;

    do
    {
        end = start + 1;
        // find the end of the current tie
        while(end < vec.size() && vec[indices[start]] == vec[indices[end]])
        {
            end++;
        }

        // fill the ties with rank
        for(auto i = start; i < end; i++)
        {
            res[indices[i]] = start + (end - start + 1) / 2.0;
        }

        start = end;

    }while(end < vec.size());

    return res;
}

} // namespace rank


int main()
{
    auto nan = std::numeric_limits<float>::quiet_NaN();
    auto vec = std::vector<float>{nan, 5, nan, 5, nan, 3, 5};
    print(rank::dense(vec, rank::nan_policy::first));
    print(rank::average(vec, rank::nan_policy::first));
}
	#include <iostream>
	#include <algorithm>
	#include <cassert>
	#include <vector>
	#include <numeric>
	#include <cmath>
	#include <limits>


	template<typename T>
	auto print(const T& c)
	{
	for(auto x: c)
	{
	std::cout << x << ' ';
	}
	std::cout << std::endl;
	}

	namespace rank
	{

	enum class nan_policy
	{
	none, first, last
	};


	namespace details
	{

	template<typename RandomAcess>
	auto index_sort(const RandomAcess& rng)
	{
	auto indices = std::vector<size_t>(rng.size());

	std::iota(indices.begin(), indices.end(), 0ul);

	std::sort(indices.begin(), indices.end(), [&rng](auto i, auto j)
	{
	return rng[i] < rng[j];
	});

	return indices;
	}

	template<typename RandomAcess>
	auto sanitize_nans(RandomAcess& rng, nan_policy policy)
	{

	using value_type = typename RandomAcess::value_type;

	if (std::is_integral_v<value_type> \|\| policy == nan_policy::none)
	{
	return;
	}

	// If you want nans with the same rank just replace them with
	// numeric limits max or min.

	constexpr auto max = std::numeric_limits<value_type>::max();
	constexpr auto epsilon = std::numeric_limits<value_type>::epsilon();

	auto nan = (policy == nan_policy::last)? max: -max;

	// This assigns a different rank for each nan (R like)
	for(int i = rng.size() - 1; i >= 0; i--)
	{
	if(std::isnan(rng[i]))
	{
	rng[i] = nan;
	nan = std::nextafter(nan, -nan);
	}
	}
	}

	}

	template<typename OutRandomAcess = std::vector<int>, typename RandomAcess>
	auto dense(RandomAcess vec, nan_policy policy = nan_policy::none) -> OutRandomAcess
	{
	assert(vec.empty() == false);

	details::sanitize_nans(vec, policy);

	const auto n = vec.size();
	const auto indices = details::index_sort(vec);
	auto res = OutRandomAcess(n);

	auto rank = 1;
	res[indices[0]] = rank;

	for(auto i = 1ul; i < n; i++)
	{
	rank += vec[indices[i]] != vec[indices[i - 1]];
	res[indices[i]] = rank;
	}
	return res;
	}


	template<typename OutRandomAcess = std::vector<float>, typename RandomAcess>
	auto average(RandomAcess vec, nan_policy policy = nan_policy::none
	) -> OutRandomAcess
	{
	static_assert(std::is_integral_v<typename OutRandomAcess::value_type> == false);
	details::sanitize_nans(vec, policy);

	assert(vec.empty() == false);

	const auto n = vec.size();
	const auto indices = details::index_sort(vec);
	auto res = OutRandomAcess(n);

	auto start = 0;
	auto end = 0;

	do
	{
	end = start + 1;
	// find the end of the current tie
	while(end < vec.size() && vec[indices[start]] == vec[indices[end]])
	{
	end++;
	}

	// fill the ties with rank
	for(auto i = start; i < end; i++)
	{
	res[indices[i]] = start + (end - start + 1) / 2.0;
	}

	start = end;

	}while(end < vec.size());

	return res;
	}

	} // namespace rank


	int main()
	{
	auto nan = std::numeric_limits<float>::quiet_NaN();
	auto vec = std::vector<float>{nan, 5, nan, 5, nan, 3, 5};
	print(rank::dense(vec, rank::nan_policy::first));
	print(rank::average(vec, rank::nan_policy::first));
	}