Skip to content

Instantly share code, notes, and snippets.

@fclesio
Created October 29, 2019 16:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fclesio/7b8a086283570fd3b46fa169292542db to your computer and use it in GitHub Desktop.
Save fclesio/7b8a086283570fd3b46fa169292542db to your computer and use it in GitHub Desktop.
def get_brasileirao_no_outliers(df):
"""Generate a DF removing the champion and the worst team of the championship
Parameters
----------
df : Pandas Dataframe
Dataframe with Brasileirão data
Returns
-------
df_concat : Pandas Dataframe
Returns a Pandas Dataframe without the outliers
"""
df_concat = pd.DataFrame()
for year in df['year'].unique():
pos_min = df[df['year'] == year]['position'].min()
pos_max = df[df['year'] == year]['position'].max()
df_filtered = df[(df['year'] == year) \
& (~df['position'].isin([pos_min, pos_max]))]
df_concat = df_concat.append(df_filtered)
return df_concat
def get_gini(df):
"""Generate a DF with the year and the following Gini Index calculated
Parameters
----------
df : Pandas Dataframe
Dataframe with Brasileirão data
Returns
-------
gini_df : Pandas Dataframe
Returns a Pandas Dataframe with the year, and gini index
"""
gini_per_year = []
for year in df['year'].unique():
championship_index = gini(np.array(df[df['year'] == year]['points']))
gini_per_year.append((year, round(championship_index, 4)))
gini_df = pd.DataFrame(gini_per_year)
gini_df.columns = ['year', 'gini']
# Indexing the date field for graph it smoothly
gini_df.set_index('year', inplace=True)
return gini_df
# Outlier removal
df_brasileirao_no_outliers = get_brasileirao_no_outliers(df_brasileirao)
df_brasileirao_no_outliers_gini = get_gini(df_brasileirao_no_outliers)
df_brasileirao_no_outliers_gini.sort_values(by=['gini'], ascending=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment