Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Visualizing the relationship between distributions and gini coefficient with Lorenz curves
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def gini(x):
# Mean absolute difference
mad = np.abs(np.subtract.outer(x, x)).mean()
# Relative mean absolute difference
rmad = mad / np.mean(x)
# Gini coefficient
g = 0.5 * rmad
return g
def lorenz(size, dist="normal", **params):
if dist == "normal" and not params:
params = {"loc": 0, "scale": 1}
dist = getattr(np.random, dist)
x = pd.Series(dist(size=size, **params)).abs().sort_values()
g = gini(x)
s = "; ".join([f"{k}={v}" for k, v in params.items()])
plt.plot(
x.rank() / x.shape[0], x.cumsum() / x.sum(),
label=f"{dist.__name__}\nParams: {s}\nGini: {g:.2f}")
plt.legend()
n = 1000
lorenz(n)
lorenz(n, loc=1, scale=1)
lorenz(n, loc=20, scale=1)
lorenz(n, loc=1, scale=0.1)
lorenz(n, dist="uniform")
lorenz(n, dist="poisson", lam=5)
lorenz(n, dist="poisson", lam=10)
lorenz(n, dist="negative_binomial", n=10, p=0.1)
lorenz(n, dist="negative_binomial", n=10, p=0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment