Created
October 29, 2019 16:17
-
-
Save fclesio/dfde0c512a0af55ef3c3dbdbec4bd445 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Gini function as PyGini package | |
def gini(arr, eps=1e-8): | |
''' | |
Reference: PyGini (I owe you a beer @o_guest) | |
https://github.com/mckib2/pygini/blob/master/pygini/gini.py | |
Calculate the Gini coefficient of a numpy array. | |
Notes | |
----- | |
Based on bottom eq on [2]_. | |
References | |
---------- | |
.. [2]_ http://www.statsdirect.com/help/ | |
default.htm#nonparametric_methods/gini.htm | |
''' | |
# All values are treated equally, arrays must be 1d and > 0: | |
arr = np.abs(arr).flatten() + eps | |
# Values must be sorted: | |
arr = np.sort(arr) | |
# Index per array element: | |
index = np.arange(1, arr.shape[0]+1) | |
# Number of array elements: | |
N = arr.shape[0] | |
# Gini coefficient: | |
return(np.sum((2*index - N - 1)*arr))/(N*np.sum(arr)) | |
def get_gini_df(df): | |
"""Generate DF with Gini Index | |
Parameters | |
---------- | |
df : Pandas Dataframe | |
Dataframe with Brasileirão data | |
Returns | |
------- | |
gini_df : Pandas Dataframe | |
Returns a Pandas Dataframe with the year, team and gini index | |
""" | |
gini_per_year = [] | |
for year in df['year'].unique(): | |
championship_index = gini(np.array(df[df['year'] == year]['points'])) | |
champion = (df[(df['year'] == year) & (df['position'] == 1)]['team']) | |
gini_per_year.append((year, champion.values[0], round(championship_index, 4))) | |
gini_df = pd.DataFrame(gini_per_year) | |
gini_df.columns = ['year', 'team', 'gini'] | |
# Indexing the date field for graph it smoothly | |
gini_df.set_index('year', inplace=True) | |
return gini_df | |
gini_df = get_gini_df(df_brasileirao) | |
gini_df.sort_values(by=['gini'], ascending=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment