Skip to content

Instantly share code, notes, and snippets.

@JosepER
Created December 5, 2022 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JosepER/4d49958a6e015cd1419be72979f71166 to your computer and use it in GitHub Desktop.
Save JosepER/4d49958a6e015cd1419be72979f71166 to your computer and use it in GitHub Desktop.
import cppyy
import numpy as np
# 'translation' of the Python function below done by ChatGPT
cppyy.cppdef("""
#include <iostream>
#include <vector>
#include <algorithm>
#include <numeric>
double gini_cpp(const std::vector<double>& x, const std::vector<double>& w) {
// The rest of the code requires numpy arrays.
std::vector<double> sorted_x = x;
std::sort(sorted_x.begin(), sorted_x.end());
std::vector<double> sorted_w = w;
std::sort(sorted_w.begin(), sorted_w.end());
// Force float dtype to avoid overflows
std::vector<double> cumw(sorted_w.size());
std::partial_sum(sorted_w.begin(), sorted_w.end(), cumw.begin());
std::vector<double> cumxw(sorted_x.size());
std::transform(sorted_x.begin(), sorted_x.end(), sorted_w.begin(), cumxw.begin(), std::multiplies<double>());
std::partial_sum(cumxw.begin(), cumxw.end(), cumxw.begin());
return (std::inner_product(cumxw.begin() + 1, cumxw.end(), cumw.begin(), 0.0) -
std::inner_product(cumxw.begin(), cumxw.end() - 1, cumw.begin() + 1, 0.0)) /
(cumxw.back() * cumw.back());
}
""")
from cppyy.gbl import gini_cpp
# found the function here: https://stackoverflow.com/questions/48999542/more-efficient-weighted-gini-coefficient-in-python
def gini_py(x, w=None):
# The rest of the code requires numpy arrays.
x = np.asarray(x)
if w is not None:
w = np.asarray(w)
sorted_indices = np.argsort(x)
sorted_x = x[sorted_indices]
sorted_w = w[sorted_indices]
# Force float dtype to avoid overflows
cumw = np.cumsum(sorted_w, dtype=float)
cumxw = np.cumsum(sorted_x * sorted_w, dtype=float)
return (np.sum(cumxw[1:] * cumw[:-1] - cumxw[:-1] * cumw[1:]) /
(cumxw[-1] * cumw[-1]))
else:
sorted_x = np.sort(x)
n = len(x)
cumx = np.cumsum(sorted_x, dtype=float)
# The above formula, with all weights equal to 1 simplifies to:
return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n
print(gini_cpp([6,5,8,7], [0.1,0.1,0.2,0.2]))
print(gini_py([6,5,8,7], [0.1,0.1,0.2,0.2]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment