Skip to content

Instantly share code, notes, and snippets.

@Eleobert
Created August 28, 2021 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Eleobert/a80de036f87d853e84c59bc977213c14 to your computer and use it in GitHub Desktop.
Save Eleobert/a80de036f87d853e84c59bc977213c14 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <algorithm>
#include <cassert>
#include <vector>
#include <numeric>
#include <cmath>
#include <limits>
template<typename T>
auto print(const T& c)
{
for(auto x: c)
{
std::cout << x << ' ';
}
std::cout << std::endl;
}
namespace rank
{
enum class nan_policy
{
none, first, last
};
namespace details
{
template<typename RandomAcess>
auto index_sort(const RandomAcess& rng)
{
auto indices = std::vector<size_t>(rng.size());
std::iota(indices.begin(), indices.end(), 0ul);
std::sort(indices.begin(), indices.end(), [&rng](auto i, auto j)
{
return rng[i] < rng[j];
});
return indices;
}
template<typename RandomAcess>
auto sanitize_nans(RandomAcess& rng, nan_policy policy)
{
using value_type = typename RandomAcess::value_type;
if (std::is_integral_v<value_type> || policy == nan_policy::none)
{
return;
}
// If you want nans with the same rank just replace them with
// numeric limits max or min.
constexpr auto max = std::numeric_limits<value_type>::max();
constexpr auto epsilon = std::numeric_limits<value_type>::epsilon();
auto nan = (policy == nan_policy::last)? max: -max;
// This assigns a different rank for each nan (R like)
for(int i = rng.size() - 1; i >= 0; i--)
{
if(std::isnan(rng[i]))
{
rng[i] = nan;
nan = std::nextafter(nan, -nan);
}
}
}
}
template<typename OutRandomAcess = std::vector<int>, typename RandomAcess>
auto dense(RandomAcess vec, nan_policy policy = nan_policy::none) -> OutRandomAcess
{
assert(vec.empty() == false);
details::sanitize_nans(vec, policy);
const auto n = vec.size();
const auto indices = details::index_sort(vec);
auto res = OutRandomAcess(n);
auto rank = 1;
res[indices[0]] = rank;
for(auto i = 1ul; i < n; i++)
{
rank += vec[indices[i]] != vec[indices[i - 1]];
res[indices[i]] = rank;
}
return res;
}
template<typename OutRandomAcess = std::vector<float>, typename RandomAcess>
auto average(RandomAcess vec, nan_policy policy = nan_policy::none
) -> OutRandomAcess
{
static_assert(std::is_integral_v<typename OutRandomAcess::value_type> == false);
details::sanitize_nans(vec, policy);
assert(vec.empty() == false);
const auto n = vec.size();
const auto indices = details::index_sort(vec);
auto res = OutRandomAcess(n);
auto start = 0;
auto end = 0;
do
{
end = start + 1;
// find the end of the current tie
while(end < vec.size() && vec[indices[start]] == vec[indices[end]])
{
end++;
}
// fill the ties with rank
for(auto i = start; i < end; i++)
{
res[indices[i]] = start + (end - start + 1) / 2.0;
}
start = end;
}while(end < vec.size());
return res;
}
} // namespace rank
int main()
{
auto nan = std::numeric_limits<float>::quiet_NaN();
auto vec = std::vector<float>{nan, 5, nan, 5, nan, 3, 5};
print(rank::dense(vec, rank::nan_policy::first));
print(rank::average(vec, rank::nan_policy::first));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment