Skip to content

Instantly share code, notes, and snippets.

View makispl's full-sized avatar

Plegas Gerasimos makispl

View GitHub Profile
@makispl
makispl / predict.py
Last active September 10, 2021 12:38
def make_predictions():
"""
Returns a dataframe with the predicted clusters.
Parameters
---------
-
Returns
-------
preds_df : a dataframe object
Contains the per game 'pred_cluster'
# import the libraries
from nba_api.stats.endpoints import boxscoreadvancedv2
import sqlite3 as sql
import pandas as pd
import numpy as np
from multiprocessing import Pool
import requests
from functools import partial
from io import BytesIO
import os
pred_cluster OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS NET_SCORE
0 108.127076 0.123707 0.698970 9.473168 0.567697 0.602506 37.103216 87.760614
1 111.572510 0.103166 1.015115 9.961357 0.546970 0.578256 65.852969 103.457844
2 83.417965 0.092533 0.175866 12.973969 0.000000 0.000256 14.727773 81.352873
3 111.796596 0.263722 2.570398 9.926654 0.514248 0.557363 70.367210 109.353358
if __name__ == "__main__":
# load the full training data with folds
df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv',
converters={'GAME_ID': lambda x: str(x)})
features = [
'DEF_RATING',
'AST_PCT',
'AST_RATIO',
START_POSITION OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS
0 101.680642 0.120858 0.655623 10.245366 0.455788 0.486946 34.830412
1 108.583286 0.217609 2.013673 10.038103 0.504163 0.539964 64.867027
2 108.151839 0.119372 1.155329 9.782779 0.520350 0.552918 62.546659
3 108.147874 0.114246 0.995818 11.557729 0.562291 0.589391 56.833615
def feat_permutation_importance(df, feats, model):
"""
Takes in a dataframe of 'plays', features list
and model, plots and returns the mean score across
all the folds
Parameters
---------
df : a dataframe object
Contains the plays
feats : a list object
def run_model(df, folds, feats, model):
"""
Takes in a dataframe of 'plays', the # folds,
features list and model, prints and returns
the mean score across all the folds
Parameters
---------
df : a dataframe object
Contains the plays
folds : int
# read in the training data
plays_df = pd.read_csv('../data/interim/plays_17_18_19_pre_proc_train.csv',
converters={'GAME_ID': lambda x: str(x)})
# switch to the for-normalisation-features
data_stnd = data.copy()
# instantiate, fit, transform scaler
scaler = MinMaxScaler()
data_stnd = scaler.fit_transform(data_stnd)
# allocate weights
wts = []
for col in features:
if col in group_1:
wts.append(0.5)
elif col in group_2:
wts.append(0.3)
elif col in group_3:
wts.append(0.2)
gm_cluster OFF_RATING AST_PCT AST_TOV TM_TOV_PCT EFG_PCT TS_PCT POSS NET_SCORE
0 105.717206 0.126598 0.758258 9.772913 0.550722 0.588245 39.160862 422.192820
1 108.197048 0.118307 1.101867 10.395477 0.533829 0.564773 60.543945 621.134763
2 82.131297 0.093151 0.165360 12.502479 0.000000 0.000000 14.171583 193.313073
3 108.582897 0.217407 2.018245 10.009031 0.504521 0.540345 64.923704 662.488581