Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
if __name__ == "__main__":
# load the full training data with folds
df = pd.read_csv('../data/processed/train_proc_labeled_folds.csv',
converters={'GAME_ID': lambda x: str(x)})
features = [
'DEF_RATING',
'AST_PCT',
'AST_RATIO',
'OREB_PCT',
'DREB_PCT',
'REB_PCT',
'TM_TOV_PCT',
'EFG_PCT',
'TS_PCT',
'USG_PCT',
'POSS'
]
# get training data
X = df[features]
# define the target data
y = df.gm_cluster.values
# initialize Random Forest model
classifier = xgb.XGBClassifier(
n_jobs=-1
)
# define a grid of parameters
param_grid = {
"max_depth": [3, 5, 7],
"colsample_bytree": [0.5, 1.0],
"lambda": [0.1, 1.0],
"n_estimators": [50, 100, 200]
}
# initialize grid search
# we use f1 weighted as our metric
model = model_selection.GridSearchCV(
estimator=classifier,
param_grid=param_grid,
scoring="f1_weighted",
verbose=10,
n_jobs=1,
cv=5
)
# fit model on training data
model.fit(X, y)
print(f"Best score: {model.best_score_}")
print("Best parameters set:")
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(param_grid.keys()):
print(f"\t{param_name}: {best_parameters[param_name]}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment