Skip to content

Instantly share code, notes, and snippets.

@jkukul
Last active March 16, 2021 03:01
Show Gist options
  • Save jkukul/f02d239936a74f3db273bd02c17c1de1 to your computer and use it in GitHub Desktop.
Save jkukul/f02d239936a74f3db273bd02c17c1de1 to your computer and use it in GitHub Desktop.
Script to calculate your current score in Kaggle's March madness competition.
# python3
#
# Example usage:
# python get_score.py --gender M --submissions-file SampleSubmissionStage2.csv
#
# TeamSpellings.csv and/or WTeamSpellings.csv file must be present in the current directory
import argparse
from math import log
import pandas as pd
import requests
parser = argparse.ArgumentParser(description='''
This script calculates your current score in Kaggle's March madness competition.
''')
parser.add_argument('--submission-file', type=str)
parser.add_argument('--gender', type=str, help='W or M')
args = parser.parse_args()
df_submissions = pd.read_csv(args.submission_file)
df_spellings = pd.read_csv(f'{"W" if args.gender == "W" else ""}TeamSpellings.csv', encoding='latin1')
# FETCH RESULTS FROM NCAA API:
gender = 'women' if args.gender == 'W' else 'men'
results = requests.get(
f"https://data.ncaa.com/casablanca/carmen/brackets/championships/basketball-{gender}/d1/2019/data.json")
json_results = results.json()
games = map(lambda x: x['game'], results.json()['games'])
scores = []
for game in games:
if game['bracketRound'].startswith('First Four') or game['gameState'] == 'pre':
# Filter out First Four games and games which haven't started yet
continue
score = dict()
score['isLive'] = game['gameState'] != 'final'
score['homeTeamName'] = game['home']['names']['seo']
score['awayTeamName'] = game['away']['names']['seo']
home_team_score = int(game['home']['score'])
away_team_score = int(game['away']['score'])
score['winnerTeamName'] = score['homeTeamName'] if home_team_score > away_team_score else score['awayTeamName']
scores.append(score)
df_results = pd.DataFrame(scores)
df_results = pd.merge(left=df_results, right=df_spellings, how='left', left_on='homeTeamName', right_on='TeamNameSpelling')
df_results = df_results.rename(index=str, columns={"TeamID": "homeTeamID", "TeamNameSpelling": "homeTeamSpelling"})
df_results = pd.merge(left=df_results, right=df_spellings, how='left', left_on='awayTeamName', right_on='TeamNameSpelling')
df_results = df_results.rename(index=str, columns={"TeamID": "awayTeamID", "TeamNameSpelling": "awayTeamSpelling"})
df_results['team1_name'] = df_results[['homeTeamID', 'awayTeamID', 'homeTeamName', 'awayTeamName']].apply(lambda x: x[2] if x[0] < x[1] else x[3], axis=1)
df_results['team2_name'] = df_results[['homeTeamID', 'awayTeamID', 'homeTeamName', 'awayTeamName']].apply(lambda x: x[2] if x[0] > x[1] else x[3], axis=1)
df_results['ID'] = df_results[['homeTeamID', 'awayTeamID']].apply(lambda x: f'2019_{min(x)}_{max(x)}', axis=1)
df_results['True'] = df_results[['winnerTeamName', 'team1_name']].apply(lambda x: 1 if x[0] == x[1] else 0, axis=1)
df_results = pd.merge(left=df_results, right=df_submissions, how='left', on='ID')
df_results = df_results[['ID', 'isLive', 'team1_name', 'team2_name', 'Pred', 'True']]
df_results['log_loss'] = df_results[['Pred', 'True']].apply(lambda x: -1*(x[1]*log(x[0] or 1e-15)+(1-x[1])*log((1-x[0]) or 1e-15)), axis=1)
print(df_results.to_string(index=False))
print(f'Your score: {round(df_results["log_loss"].mean(), 5)}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment