This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_predictions(): | |
""" | |
Returns a dataframe with the predicted clusters. | |
Parameters | |
--------- | |
- | |
Returns | |
------- | |
preds_df : a dataframe object | |
Contains the per game 'pred_cluster' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import the libraries | |
from nba_api.stats.endpoints import boxscoreadvancedv2 | |
import sqlite3 as sql | |
import pandas as pd | |
import numpy as np | |
from multiprocessing import Pool | |
import requests | |
from functools import partial | |
from io import BytesIO | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pred_cluster | OFF_RATING | AST_PCT | AST_TOV | TM_TOV_PCT | EFG_PCT | TS_PCT | POSS | NET_SCORE | |
---|---|---|---|---|---|---|---|---|---|
0 | 108.127076 | 0.123707 | 0.698970 | 9.473168 | 0.567697 | 0.602506 | 37.103216 | 87.760614 | |
1 | 111.572510 | 0.103166 | 1.015115 | 9.961357 | 0.546970 | 0.578256 | 65.852969 | 103.457844 | |
2 | 83.417965 | 0.092533 | 0.175866 | 12.973969 | 0.000000 | 0.000256 | 14.727773 | 81.352873 | |
3 | 111.796596 | 0.263722 | 2.570398 | 9.926654 | 0.514248 | 0.557363 | 70.367210 | 109.353358 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__ == "__main__": | |
# load the full training data with folds | |
df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv', | |
converters={'GAME_ID': lambda x: str(x)}) | |
features = [ | |
'DEF_RATING', | |
'AST_PCT', | |
'AST_RATIO', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
START_POSITION | OFF_RATING | AST_PCT | AST_TOV | TM_TOV_PCT | EFG_PCT | TS_PCT | POSS | |
---|---|---|---|---|---|---|---|---|
0 | 101.680642 | 0.120858 | 0.655623 | 10.245366 | 0.455788 | 0.486946 | 34.830412 | |
1 | 108.583286 | 0.217609 | 2.013673 | 10.038103 | 0.504163 | 0.539964 | 64.867027 | |
2 | 108.151839 | 0.119372 | 1.155329 | 9.782779 | 0.520350 | 0.552918 | 62.546659 | |
3 | 108.147874 | 0.114246 | 0.995818 | 11.557729 | 0.562291 | 0.589391 | 56.833615 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def feat_permutation_importance(df, feats, model): | |
""" | |
Takes in a dataframe of 'plays', features list | |
and model, plots and returns the mean score across | |
all the folds | |
Parameters | |
--------- | |
df : a dataframe object | |
Contains the plays | |
feats : a list object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def run_model(df, folds, feats, model): | |
""" | |
Takes in a dataframe of 'plays', the # folds, | |
features list and model, prints and returns | |
the mean score across all the folds | |
Parameters | |
--------- | |
df : a dataframe object | |
Contains the plays | |
folds : int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read in the training data | |
plays_df = pd.read_csv('../data/interim/plays_17_18_19_pre_proc_train.csv', | |
converters={'GAME_ID': lambda x: str(x)}) | |
# switch to the for-normalisation-features | |
data_stnd = data.copy() | |
# instantiate, fit, transform scaler | |
scaler = MinMaxScaler() | |
data_stnd = scaler.fit_transform(data_stnd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# allocate weights | |
wts = [] | |
for col in features: | |
if col in group_1: | |
wts.append(0.5) | |
elif col in group_2: | |
wts.append(0.3) | |
elif col in group_3: | |
wts.append(0.2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gm_cluster | OFF_RATING | AST_PCT | AST_TOV | TM_TOV_PCT | EFG_PCT | TS_PCT | POSS | NET_SCORE | |
---|---|---|---|---|---|---|---|---|---|
0 | 105.717206 | 0.126598 | 0.758258 | 9.772913 | 0.550722 | 0.588245 | 39.160862 | 422.192820 | |
1 | 108.197048 | 0.118307 | 1.101867 | 10.395477 | 0.533829 | 0.564773 | 60.543945 | 621.134763 | |
2 | 82.131297 | 0.093151 | 0.165360 | 12.502479 | 0.000000 | 0.000000 | 14.171583 | 193.313073 | |
3 | 108.582897 | 0.217407 | 2.018245 | 10.009031 | 0.504521 | 0.540345 | 64.923704 | 662.488581 |