Skip to content

Instantly share code, notes, and snippets.

Avatar

Plegas Gerasimos makispl

View GitHub Profile
View predict.py
def make_predictions():
"""
Returns a dataframe with the predicted clusters.
Parameters
---------
-
Returns
-------
preds_df : a dataframe object
Contains the per game 'pred_cluster'
View dataset.py
# import the libraries
from nba_api.stats.endpoints import boxscoreadvancedv2
import sqlite3 as sql
import pandas as pd
import numpy as np
from multiprocessing import Pool
import requests
from functools import partial
from io import BytesIO
import os
View pred_clusters_net_score.tsv
pred_cluster OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS NET_SCORE
0 108.127076 0.123707 0.698970 9.473168 0.567697 0.602506 37.103216 87.760614
1 111.572510 0.103166 1.015115 9.961357 0.546970 0.578256 65.852969 103.457844
2 83.417965 0.092533 0.175866 12.973969 0.000000 0.000256 14.727773 81.352873
3 111.796596 0.263722 2.570398 9.926654 0.514248 0.557363 70.367210 109.353358
View xgboost_grid_search.py
if __name__ == "__main__":
# load the full training data with folds
df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv',
converters={'GAME_ID': lambda x: str(x)})
features = [
'DEF_RATING',
'AST_PCT',
'AST_RATIO',
View START_POSITION_descriptive_stats.tsv
START_POSITION OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS
0 101.680642 0.120858 0.655623 10.245366 0.455788 0.486946 34.830412
1 108.583286 0.217609 2.013673 10.038103 0.504163 0.539964 64.867027
2 108.151839 0.119372 1.155329 9.782779 0.520350 0.552918 62.546659
3 108.147874 0.114246 0.995818 11.557729 0.562291 0.589391 56.833615
View permutation_feat_importance.py
def feat_permutation_importance(df, feats, model):
"""
Takes in a dataframe of 'plays', features list
and model, plots and returns the mean score across
all the folds
Parameters
---------
df : a dataframe object
Contains the plays
feats : a list object
View baseline_model.py
def run_model(df, folds, feats, model):
"""
Takes in a dataframe of 'plays', the # folds,
features list and model, prints and returns
the mean score across all the folds
Parameters
---------
df : a dataframe object
Contains the plays
folds : int
View net_scoring.py
# allocate weights
wts = []
for col in features:
if col in group_1:
wts.append(0.5)
elif col in group_2:
wts.append(0.3)
elif col in group_3:
wts.append(0.2)
View gm_clusters_net_score.tsv
gm_cluster OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS NET_SCORE
0 105.717206 0.126598 0.758258 9.772913 0.550722 0.588245 39.160862 422.192820
1 108.197048 0.118307 1.101867 10.395477 0.533829 0.564773 60.543945 621.134763
2 82.131297 0.093151 0.165360 12.502479 0.000000 0.000000 14.171583 193.313073
3 108.582897 0.217407 2.018245 10.009031 0.504521 0.540345 64.923704 662.488581
View gmm.py
# Switch to a new dataframe instance
# for the gm implementation
plays_gm_df = plays_df.copy()
# Instantiate a GM model with 4 clusters, fit and predict cluster indices
# pay attention to the 'init_params' - we initialized gm based on kmeans
gm = GaussianMixture(n_components=4, init_params='kmeans', tol=1e-4,
covariance_type='full', n_init=10, random_state=1)
plays_gm_df['gm_cluster'] = gm.fit_predict(pca_scores)