Skip to content

Instantly share code, notes, and snippets.

@makispl
Last active September 10, 2021 12:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save makispl/a1d769956720f9f4bee3c6aa4d3827e7 to your computer and use it in GitHub Desktop.
Save makispl/a1d769956720f9f4bee3c6aa4d3827e7 to your computer and use it in GitHub Desktop.
def make_predictions():
"""
Returns a dataframe with the predicted clusters.
Parameters
---------
-
Returns
-------
preds_df : a dataframe object
Contains the per game 'pred_cluster'
"""
# read the testing dataset
df = pd.read_csv(config.TESTING_FILE, converters={
'GAME_ID': lambda x: str(x)})
# define pca features
pca_feats = ["pca_"+f"{i}" for i in range(1, 10)]
# define original features
feats = [
col
for col in df.columns
if col
not in (
"GAME_ID",
"TEAM_ID",
"TEAM_ABBREVIATION",
"TEAM_CITY",
"PLAYER_ID",
"PLAYER_NAME",
"NICKNAME",
"START_POSITION",
"MIN",
"gm_cluster",
"NET_SCORE",
"kfold",
"GAME_DATE"
)
]
# define normalized features
norm_feats = [
feat+'_n' for feat in feats
]
# define the selected features
# opt for the pca feats
features = pca_feats
# switch to the prediction data from 2020-01-01 to 2020-08-31
preds_df = df.loc[(df.GAME_DATE < '2020-11-01'), :].copy()
X_test = preds_df.loc[:, pca_feats].values
# load the model
clf = joblib.load(config.MODEL_IN_USE)
# Make prediction via the logres model, using the 9 pca_feats
y_pred = clf.predict(X_test)
# complete the testing dataset
preds_df.loc[:, 'pred_cluster'] = y_pred
# save the new csv with kfold column
preds_df.to_csv(
'../data/processed/test_proc_labeled.csv', index=False)
return preds_df
def select_player(preds_df):
"""
Takes in the preds_df, prompts the user to enter
the desired players full names,
returns a dictionary with each players % of cluster_3 plays
Parameters
---------
preds_df : a dataframe object
Contains the per game 'pred_cluster'
Returns
-------
ranking_sorted : a dictionary object
Contains the cluster_3 % of plays per player
"""
# set candidates
candidates = [item for item in input(
"enter the candidate players' full names separated by comma, like:\nGerasimos Plegas, GitHub Reader : ").split(',')]
# define the dataset's players
names = pd.Series(preds_df.PLAYER_NAME.unique()).tolist()
# assert the final candidates names
final_candidates = []
for candit in candidates:
try:
assert candit in names
final_candidates.append(candit)
except:
print(f"The name: {candit} is not registered.")
ranking = {}
# check for their mebmership in cluster_3 and the ratio
for candit in final_candidates:
candit_df = preds_df.loc[preds_df.PLAYER_NAME == candit, :].copy()
vals = candit_df.loc[:, 'pred_cluster'].value_counts(normalize=True)
rank = vals.loc[2]
ranking[candit] = round(rank, 2)
# sort the players by their ranking
ranking_sorted = {k: v for k, v in sorted(
ranking.items(), reverse=True, key=lambda item: item[1])}
print(ranking_sorted)
if __name__ == "__main__":
# initialize ArgumentParser class of argparse
parser = argparse.ArgumentParser()
# add the different arguments you need and their type
# currently the rank in engaged
parser.add_argument(
"--rank",
type=bool
)
# read the arguments from the command line
args = parser.parse_args()
preds_df = make_predictions()
if args.rank == True:
# run player's ranking
select_player(preds_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment