Last active
September 10, 2021 12:38
-
-
Save makispl/a1d769956720f9f4bee3c6aa4d3827e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_predictions(): | |
""" | |
Returns a dataframe with the predicted clusters. | |
Parameters | |
--------- | |
- | |
Returns | |
------- | |
preds_df : a dataframe object | |
Contains the per game 'pred_cluster' | |
""" | |
# read the testing dataset | |
df = pd.read_csv(config.TESTING_FILE, converters={ | |
'GAME_ID': lambda x: str(x)}) | |
# define pca features | |
pca_feats = ["pca_"+f"{i}" for i in range(1, 10)] | |
# define original features | |
feats = [ | |
col | |
for col in df.columns | |
if col | |
not in ( | |
"GAME_ID", | |
"TEAM_ID", | |
"TEAM_ABBREVIATION", | |
"TEAM_CITY", | |
"PLAYER_ID", | |
"PLAYER_NAME", | |
"NICKNAME", | |
"START_POSITION", | |
"MIN", | |
"gm_cluster", | |
"NET_SCORE", | |
"kfold", | |
"GAME_DATE" | |
) | |
] | |
# define normalized features | |
norm_feats = [ | |
feat+'_n' for feat in feats | |
] | |
# define the selected features | |
# opt for the pca feats | |
features = pca_feats | |
# switch to the prediction data from 2020-01-01 to 2020-08-31 | |
preds_df = df.loc[(df.GAME_DATE < '2020-11-01'), :].copy() | |
X_test = preds_df.loc[:, pca_feats].values | |
# load the model | |
clf = joblib.load(config.MODEL_IN_USE) | |
# Make prediction via the logres model, using the 9 pca_feats | |
y_pred = clf.predict(X_test) | |
# complete the testing dataset | |
preds_df.loc[:, 'pred_cluster'] = y_pred | |
# save the new csv with kfold column | |
preds_df.to_csv( | |
'../data/processed/test_proc_labeled.csv', index=False) | |
return preds_df | |
def select_player(preds_df): | |
""" | |
Takes in the preds_df, prompts the user to enter | |
the desired players full names, | |
returns a dictionary with each players % of cluster_3 plays | |
Parameters | |
--------- | |
preds_df : a dataframe object | |
Contains the per game 'pred_cluster' | |
Returns | |
------- | |
ranking_sorted : a dictionary object | |
Contains the cluster_3 % of plays per player | |
""" | |
# set candidates | |
candidates = [item for item in input( | |
"enter the candidate players' full names separated by comma, like:\nGerasimos Plegas, GitHub Reader : ").split(',')] | |
# define the dataset's players | |
names = pd.Series(preds_df.PLAYER_NAME.unique()).tolist() | |
# assert the final candidates names | |
final_candidates = [] | |
for candit in candidates: | |
try: | |
assert candit in names | |
final_candidates.append(candit) | |
except: | |
print(f"The name: {candit} is not registered.") | |
ranking = {} | |
# check for their mebmership in cluster_3 and the ratio | |
for candit in final_candidates: | |
candit_df = preds_df.loc[preds_df.PLAYER_NAME == candit, :].copy() | |
vals = candit_df.loc[:, 'pred_cluster'].value_counts(normalize=True) | |
rank = vals.loc[2] | |
ranking[candit] = round(rank, 2) | |
# sort the players by their ranking | |
ranking_sorted = {k: v for k, v in sorted( | |
ranking.items(), reverse=True, key=lambda item: item[1])} | |
print(ranking_sorted) | |
if __name__ == "__main__": | |
# initialize ArgumentParser class of argparse | |
parser = argparse.ArgumentParser() | |
# add the different arguments you need and their type | |
# currently the rank in engaged | |
parser.add_argument( | |
"--rank", | |
type=bool | |
) | |
# read the arguments from the command line | |
args = parser.parse_args() | |
preds_df = make_predictions() | |
if args.rank == True: | |
# run player's ranking | |
select_player(preds_df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment