Skip to content

Instantly share code, notes, and snippets.

@jonberliner
Created March 5, 2018 20:47
Show Gist options
  • Save jonberliner/8e0f5eeaf1754556ab70fdfa8ebb8151 to your computer and use it in GitHub Desktop.
Save jonberliner/8e0f5eeaf1754556ab70fdfa8ebb8151 to your computer and use it in GitHub Desktop.
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
# download csv and rename as below
df = pd.read_csv('sport_winning_by_city.csv')
# prep to deal with missing data (for cities without league teams)
df.replace('/', np.nan, inplace=True)
# keep winning percentages only
col_names = list(df.columns)
keep_col = [('City' in cn) or ('PCT' in cn) for cn in col_names]
keep_col_names = np.array(col_names)[keep_col]
new_keep_col_names = np.array([cn.split(' ')[0].upper() for cn in keep_col_names])
df = df[keep_col_names]
df.columns = new_keep_col_names
for league in ['NFL', 'NBA', 'NHL', 'MLB']:
df[league] = df[league].astype(float)
# rebuild the average from the indiv teams data
df.drop('AVERAGE', axis=1, inplace=True)
df['average'] = [np.nanmean([row.NBA, row.NFL, row.MLB, row.NHL]) for _, row in df.iterrows()]
df.sort_values(by='average', inplace=True, ascending=False)
# reorder cols for prettier plotting
df = df[['CITY', 'average', 'NBA', 'NFL', 'MLB', 'NHL']]
# prep dataframe for plotting on same chart
mdf = df.melt('CITY', var_name='league', value_name='pct')
mdf['pct'] = mdf['pct'].astype(float)
# plot!
g = sns.factorplot(x="CITY", y="pct", hue="league", data=mdf,
size=10, kind="bar", palette="Set2", legend=False)
g.despine(left=True)
g.set_ylabels("survival probability")
plt.xticks(rotation=45)
g.fig.set_size_inches((16., 6.))
g.axes[0][0].set_ylabel('winning percentage')
g.axes[0][0].set_xlabel('city')
plt.tight_layout()
plt.legend(loc='upper right')
plt.savefig('city_winners.png', dpi=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment