Skip to content

Instantly share code, notes, and snippets.

@fclesio
Created October 29, 2019 16:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fclesio/dfde0c512a0af55ef3c3dbdbec4bd445 to your computer and use it in GitHub Desktop.
Save fclesio/dfde0c512a0af55ef3c3dbdbec4bd445 to your computer and use it in GitHub Desktop.
# Gini function as PyGini package
def gini(arr, eps=1e-8):
'''
Reference: PyGini (I owe you a beer @o_guest)
https://github.com/mckib2/pygini/blob/master/pygini/gini.py
Calculate the Gini coefficient of a numpy array.
Notes
-----
Based on bottom eq on [2]_.
References
----------
.. [2]_ http://www.statsdirect.com/help/
default.htm#nonparametric_methods/gini.htm
'''
# All values are treated equally, arrays must be 1d and > 0:
arr = np.abs(arr).flatten() + eps
# Values must be sorted:
arr = np.sort(arr)
# Index per array element:
index = np.arange(1, arr.shape[0]+1)
# Number of array elements:
N = arr.shape[0]
# Gini coefficient:
return(np.sum((2*index - N - 1)*arr))/(N*np.sum(arr))
def get_gini_df(df):
"""Generate DF with Gini Index
Parameters
----------
df : Pandas Dataframe
Dataframe with Brasileirão data
Returns
-------
gini_df : Pandas Dataframe
Returns a Pandas Dataframe with the year, team and gini index
"""
gini_per_year = []
for year in df['year'].unique():
championship_index = gini(np.array(df[df['year'] == year]['points']))
champion = (df[(df['year'] == year) & (df['position'] == 1)]['team'])
gini_per_year.append((year, champion.values[0], round(championship_index, 4)))
gini_df = pd.DataFrame(gini_per_year)
gini_df.columns = ['year', 'team', 'gini']
# Indexing the date field for graph it smoothly
gini_df.set_index('year', inplace=True)
return gini_df
gini_df = get_gini_df(df_brasileirao)
gini_df.sort_values(by=['gini'], ascending=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment