Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Created September 19, 2019 07:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/9302c02b427e000ee5f422a836ec48ef to your computer and use it in GitHub Desktop.
Save afrendeiro/9302c02b427e000ee5f422a836ec48ef to your computer and use it in GitHub Desktop.
Visualizing the relationship between distributions and gini coefficient with Lorenz curves
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def gini(x):
# Mean absolute difference
mad = np.abs(np.subtract.outer(x, x)).mean()
# Relative mean absolute difference
rmad = mad / np.mean(x)
# Gini coefficient
g = 0.5 * rmad
return g
def lorenz(size, dist="normal", **params):
if dist == "normal" and not params:
params = {"loc": 0, "scale": 1}
dist = getattr(np.random, dist)
x = pd.Series(dist(size=size, **params)).abs().sort_values()
g = gini(x)
s = "; ".join([f"{k}={v}" for k, v in params.items()])
plt.plot(
x.rank() / x.shape[0], x.cumsum() / x.sum(),
label=f"{dist.__name__}\nParams: {s}\nGini: {g:.2f}")
plt.legend()
n = 1000
lorenz(n)
lorenz(n, loc=1, scale=1)
lorenz(n, loc=20, scale=1)
lorenz(n, loc=1, scale=0.1)
lorenz(n, dist="uniform")
lorenz(n, dist="poisson", lam=5)
lorenz(n, dist="poisson", lam=10)
lorenz(n, dist="negative_binomial", n=10, p=0.1)
lorenz(n, dist="negative_binomial", n=10, p=0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment