Skip to content

Instantly share code, notes, and snippets.

@odinokov
Last active December 5, 2022 16:59
Show Gist options
  • Save odinokov/16243213f588d5db64445bd75838c49c to your computer and use it in GitHub Desktop.
Save odinokov/16243213f588d5db64445bd75838c49c to your computer and use it in GitHub Desktop.
An efficient way to calculate weighted Gini coefficient in Python
import numba
import numpy as np
@numba.jit
def gini(x, w=None):
# from https://stackoverflow.com/questions/48999542/more-efficient-weighted-gini-coefficient-in-python
# The rest of the code requires numpy arrays.
# x = np.asarray(x)
if w is not None:
# w = np.asarray(w)
sorted_indices = np.argsort(x)
sorted_x = x[sorted_indices]
sorted_w = w[sorted_indices]
# Force float dtype to avoid overflows
cumw = np.cumsum(sorted_w)
cumxw = np.cumsum(sorted_x * sorted_w)
return (np.sum(cumxw[1:] * cumw[:-1] - cumxw[:-1] * cumw[1:]) /
(cumxw[-1] * cumw[-1]))
else:
# x = np.asarray(x)
sorted_x = np.sort(x)
n = x.size
cumx = np.cumsum(sorted_x)
# The above formula, with all weights equal to 1 simplifies to:
return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n
gini(np.asarray([1,2,3,4,5]))
# 0.2666666666666666
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment