Plegas Gerasimos makispl

## predict.py
def make_predictions():
    """
    Returns a dataframe with the predicted clusters.
    Parameters
    ---------
    -
    Returns
    -------
    preds_df : a dataframe object
             Contains the per game 'pred_cluster'

## dataset.py
# import the libraries
from nba_api.stats.endpoints import boxscoreadvancedv2
import sqlite3 as sql
import pandas as pd
import numpy as np
from multiprocessing import Pool
import requests
from functools import partial
from io import BytesIO
import os

## pred_clusters_net_score.tsv

          
            pred_cluster
            OFF_RATING
            AST_PCT
            AST_TOV
            TM_TOV_PCT
            EFG_PCT
            TS_PCT
            POSS
            NET_SCORE

            
              0
              108.127076
              0.123707
              0.698970
              9.473168
              0.567697
              0.602506
              37.103216
              87.760614

            
              1
              111.572510
              0.103166
              1.015115
              9.961357
              0.546970
              0.578256
              65.852969
              103.457844

            
              2
              83.417965
              0.092533
              0.175866
              12.973969
              0.000000
              0.000256
              14.727773
              81.352873

            
              3
              111.796596
              0.263722
              2.570398
              9.926654
              0.514248
              0.557363
              70.367210
              109.353358

## xgboost_grid_search.py
if __name__ == "__main__":

    # load the full training data with folds
    df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv',
                     converters={'GAME_ID': lambda x: str(x)})

    features = [
      'DEF_RATING',
      'AST_PCT',
      'AST_RATIO',

## START_POSITION_descriptive_stats.tsv

          
            START_POSITION
            OFF_RATING
            AST_PCT
            AST_TOV
            TM_TOV_PCT
            EFG_PCT
            TS_PCT
            POSS

            
              0
              101.680642
              0.120858
              0.655623
              10.245366
              0.455788
              0.486946
              34.830412

            
              1
              108.583286
              0.217609
              2.013673
              10.038103
              0.504163
              0.539964
              64.867027

            
              2
              108.151839
              0.119372
              1.155329
              9.782779
              0.520350
              0.552918
              62.546659

            
              3
              108.147874
              0.114246
              0.995818
              11.557729
              0.562291
              0.589391
              56.833615

## permutation_feat_importance.py
def feat_permutation_importance(df, feats, model):
    """
    Takes in a dataframe of 'plays', features list
    and model, plots and returns the mean score across
    all the folds
    Parameters
    ---------
    df : a dataframe object
            Contains the plays
    feats : a list object

## baseline_model.py
def run_model(df, folds, feats, model):
    """
    Takes in a dataframe of 'plays', the # folds,
    features list and model, prints and returns
    the mean score across all the folds
    Parameters
    ---------
    df : a dataframe object
            Contains the plays
    folds : int

## pca4clustering.py
# read in the training data
plays_df = pd.read_csv('../data/interim/plays_17_18_19_pre_proc_train.csv',
                       converters={'GAME_ID': lambda x: str(x)})

# switch to the for-normalisation-features
data_stnd = data.copy()

# instantiate, fit, transform scaler
scaler = MinMaxScaler()
data_stnd = scaler.fit_transform(data_stnd)

## net_scoring.py
# allocate weights
wts = []

for col in features:
    if col in group_1:
        wts.append(0.5)
    elif col in group_2:
        wts.append(0.3)
    elif col in group_3:
        wts.append(0.2)

## gm_clusters_net_score.tsv

          
            gm_cluster
            OFF_RATING
            AST_PCT
            AST_TOV
            TM_TOV_PCT
            EFG_PCT
            TS_PCT
            POSS
            NET_SCORE

            
              0
              105.717206
              0.126598
              0.758258
              9.772913
              0.550722
              0.588245
              39.160862
              422.192820

            
              1
              108.197048
              0.118307
              1.101867
              10.395477
              0.533829
              0.564773
              60.543945
              621.134763

            
              2
              82.131297
              0.093151
              0.165360
              12.502479
              0.000000
              0.000000
              14.171583
              193.313073

            
              3
              108.582897
              0.217407
              2.018245
              10.009031
              0.504521
              0.540345
              64.923704
              662.488581
	def make_predictions():
	"""
	Returns a dataframe with the predicted clusters.
	Parameters
	---------
	-
	Returns
	-------
	preds_df : a dataframe object
	Contains the per game 'pred_cluster'
	# import the libraries
	from nba_api.stats.endpoints import boxscoreadvancedv2
	import sqlite3 as sql
	import pandas as pd
	import numpy as np
	from multiprocessing import Pool
	import requests
	from functools import partial
	from io import BytesIO
	import os
pred_cluster	OFF_RATING	AST_PCT	AST_TOV	TM_TOV_PCT	EFG_PCT	TS_PCT	POSS	NET_SCORE
0	108.127076	0.123707	0.698970	9.473168	0.567697	0.602506	37.103216	87.760614
1	111.572510	0.103166	1.015115	9.961357	0.546970	0.578256	65.852969	103.457844
2	83.417965	0.092533	0.175866	12.973969	0.000000	0.000256	14.727773	81.352873
3	111.796596	0.263722	2.570398	9.926654	0.514248	0.557363	70.367210	109.353358
	if __name__ == "__main__":

	# load the full training data with folds
	df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv',
	converters={'GAME_ID': lambda x: str(x)})

	features = [
	'DEF_RATING',
	'AST_PCT',
	'AST_RATIO',
START_POSITION	OFF_RATING	AST_PCT	AST_TOV	TM_TOV_PCT	EFG_PCT	TS_PCT	POSS
0	101.680642	0.120858	0.655623	10.245366	0.455788	0.486946	34.830412
1	108.583286	0.217609	2.013673	10.038103	0.504163	0.539964	64.867027
2	108.151839	0.119372	1.155329	9.782779	0.520350	0.552918	62.546659
3	108.147874	0.114246	0.995818	11.557729	0.562291	0.589391	56.833615
	def feat_permutation_importance(df, feats, model):
	"""
	Takes in a dataframe of 'plays', features list
	and model, plots and returns the mean score across
	all the folds
	Parameters
	---------
	df : a dataframe object
	Contains the plays
	feats : a list object
	def run_model(df, folds, feats, model):
	"""
	Takes in a dataframe of 'plays', the # folds,
	features list and model, prints and returns
	the mean score across all the folds
	Parameters
	---------
	df : a dataframe object
	Contains the plays
	folds : int
	# read in the training data
	plays_df = pd.read_csv('../data/interim/plays_17_18_19_pre_proc_train.csv',
	converters={'GAME_ID': lambda x: str(x)})

	# switch to the for-normalisation-features
	data_stnd = data.copy()

	# instantiate, fit, transform scaler
	scaler = MinMaxScaler()
	data_stnd = scaler.fit_transform(data_stnd)
	# allocate weights
	wts = []

	for col in features:
	if col in group_1:
	wts.append(0.5)
	elif col in group_2:
	wts.append(0.3)
	elif col in group_3:
	wts.append(0.2)
gm_cluster	OFF_RATING	AST_PCT	AST_TOV	TM_TOV_PCT	EFG_PCT	TS_PCT	POSS	NET_SCORE
0	105.717206	0.126598	0.758258	9.772913	0.550722	0.588245	39.160862	422.192820
1	108.197048	0.118307	1.101867	10.395477	0.533829	0.564773	60.543945	621.134763
2	82.131297	0.093151	0.165360	12.502479	0.000000	0.000000	14.171583	193.313073
3	108.582897	0.217407	2.018245	10.009031	0.504521	0.540345	64.923704	662.488581