deargle/get-kaggle-scores.py

## get-kaggle-scores.py
'''
This script downloads the public leaderboard data for a competition,
filters down to just the highest-scoring submissions for each
team with "Eargle" in the name, and saves the top-three scores
to a .csv in the current directory called `top-three <date-timestamp>.csv`

If you want to see the highest submission for _each_ team, just comment out
line 40 (put a `#` before it).

To run this, get python if you don't already have it, and also `pip` if you don't already have it,
and install the needed packages if the script complains when you try to run it. e.g., `pip install pandas`
'''

import zipfile
import pandas as pd
import requests

#download the zip
username = '' # set your kaggle username here
password = '' # set your kaggle password here
competition_id='3136' #the code for titanic
read_me = 'titanic-publicleaderboard.csv'
is_higher_score_better = True # For Titanic, higher scores are better

session = requests.Session()
get_me = 'https://www.kaggle.com/account/login?ReturnUrl=%2fc%2f' + competition_id + '%2fpublicleaderboarddata.zip'
response = session.get(get_me) #call it once to get the request verification cookie
payload = {'username':username,'password':password,'__RequestVerificationToken':session.cookies.get('__RequestVerificationToken')}
r = session.post(get_me, data=payload)
with open('the-zip.zip', 'wb') as f:
    f.write(r.content)

#extract zip file (must be placed in current directory)
path_to_zip_file = "the-zip.zip"
zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
zip_ref.extractall('.')
zip_ref.close()

# filter dat
df = pd.read_csv(read_me)
eargle_teams = df[df['TeamName'].str.contains('Eargle', case=False)] # select only those from my class
eargle_teams = eargle_teams[~eargle_teams['TeamName'].str.contains('999')] # remove my own submissions
eargle_teams_highest_for_each_team = eargle_teams.groupby('TeamName').apply(lambda g: g[g['Score'] == g['Score'].max()]).reset_index(drop=True)

eargle_teams_highest_first = eargle_teams_highest_for_each_team.sort_values(by=['Score', 'SubmissionDate'], ascending=[not is_higher_score_better, 1])
eargle_teams_highest_first = eargle_teams_highest_first.head(3)
for_csv = eargle_teams_highest_first[['TeamName', 'SubmissionDate', 'Score']]
for_csv.to_csv('top-three %s.csv' % datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S'), index=False)
	'''
	This script downloads the public leaderboard data for a competition,
	filters down to just the highest-scoring submissions for each
	team with "Eargle" in the name, and saves the top-three scores
	to a .csv in the current directory called `top-three <date-timestamp>.csv`

	If you want to see the highest submission for _each_ team, just comment out
	line 40 (put a `#` before it).

	To run this, get python if you don't already have it, and also `pip` if you don't already have it,
	and install the needed packages if the script complains when you try to run it. e.g., `pip install pandas`
	'''

	import zipfile
	import pandas as pd
	import requests

	#download the zip
	username = '' # set your kaggle username here
	password = '' # set your kaggle password here
	competition_id='3136' #the code for titanic
	read_me = 'titanic-publicleaderboard.csv'
	is_higher_score_better = True # For Titanic, higher scores are better

	session = requests.Session()
	get_me = 'https://www.kaggle.com/account/login?ReturnUrl=%2fc%2f' + competition_id + '%2fpublicleaderboarddata.zip'
	response = session.get(get_me) #call it once to get the request verification cookie
	payload = {'username':username,'password':password,'__RequestVerificationToken':session.cookies.get('__RequestVerificationToken')}
	r = session.post(get_me, data=payload)
	with open('the-zip.zip', 'wb') as f:
	f.write(r.content)

	#extract zip file (must be placed in current directory)
	path_to_zip_file = "the-zip.zip"
	zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
	zip_ref.extractall('.')
	zip_ref.close()

	# filter dat
	df = pd.read_csv(read_me)
	eargle_teams = df[df['TeamName'].str.contains('Eargle', case=False)] # select only those from my class
	eargle_teams = eargle_teams[~eargle_teams['TeamName'].str.contains('999')] # remove my own submissions
	eargle_teams_highest_for_each_team = eargle_teams.groupby('TeamName').apply(lambda g: g[g['Score'] == g['Score'].max()]).reset_index(drop=True)

	eargle_teams_highest_first = eargle_teams_highest_for_each_team.sort_values(by=['Score', 'SubmissionDate'], ascending=[not is_higher_score_better, 1])
	eargle_teams_highest_first = eargle_teams_highest_first.head(3)
	for_csv = eargle_teams_highest_first[['TeamName', 'SubmissionDate', 'Score']]
	for_csv.to_csv('top-three %s.csv' % datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S'), index=False)