Skip to content

Instantly share code, notes, and snippets.

@walterreade
Last active August 29, 2015 14:21
Show Gist options
  • Save walterreade/65460f077c81b6095c1f to your computer and use it in GitHub Desktop.
Save walterreade/65460f077c81b6095c1f to your computer and use it in GitHub Desktop.
Show the progression of a Kaggle leader board distribution over time
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
scores = pd.read_csv('liberty-mutual-group-property-inspection-prediction_public_leaderboard.csv')
scores['SubmissionDate'] = [time.date() for time in scores['SubmissionDate'].astype('datetime64[ns]')]
scores['SubmissionDate'] = [time for time in scores['SubmissionDate'].astype(str)]
cum_date = []
min_score = 0.30
max_score = 0.40
for e, d in enumerate(scores['SubmissionDate'].unique()):
cum_date.append(d)
lb = scores.loc[scores['SubmissionDate'].isin(cum_date)].groupby('TeamName').min()
plt.hist(lb[(lb.Score < max_score) & (lb.Score > min_score)].Score.values,np.linspace(min_score,max_score,101), color='b')
plt.xlim((min_score,max_score))
plt.ylim((0,200))
plt.title(d)
plt.savefig('figs/{:03d}.png'.format(e))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment