wrwr/xgboost_randomized_search.py

## xgboost_randomized_search.py
import time

import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

x_train, y_train, x_valid, y_valid, x_test, y_test =  # load datasets

clf = xgb.XGBClassifier()

param_grid = {
        'silent': [False],
        'max_depth': [6, 10, 15, 20],
        'learning_rate': [0.001, 0.01, 0.1, 0.2, 0,3],
        'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bylevel': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
        'min_child_weight': [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
        'gamma': [0, 0.25, 0.5, 1.0],
        'reg_lambda': [0.1, 1.0, 5.0, 10.0, 50.0, 100.0],
        'n_estimators': [100]}

fit_params = {'eval_metric': 'mlogloss',
              'early_stopping_rounds': 10,
              'eval_set': [(x_valid, y_valid)]}

rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=20,
                            n_jobs=1, verbose=2, cv=2,
                            fit_params=fit_params,
                            scoring='neg_log_loss', refit=False, random_state=42)
print("Randomized search..")
search_time_start = time.time()
rs_clf.fit(x_train, y_train)
print("Randomized search time:", time.time() - search_time_start)

best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Best score: {}".format(best_score))
print("Best params: ")
for param_name in sorted(best_params.keys()):
    print('%s: %r' % (param_name, best_params[param_name]))
	import time

	import xgboost as xgb
	from sklearn.model_selection import RandomizedSearchCV

	x_train, y_train, x_valid, y_valid, x_test, y_test = # load datasets

	clf = xgb.XGBClassifier()

	param_grid = {
	'silent': [False],
	'max_depth': [6, 10, 15, 20],
	'learning_rate': [0.001, 0.01, 0.1, 0.2, 0,3],
	'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
	'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
	'colsample_bylevel': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
	'min_child_weight': [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
	'gamma': [0, 0.25, 0.5, 1.0],
	'reg_lambda': [0.1, 1.0, 5.0, 10.0, 50.0, 100.0],
	'n_estimators': [100]}

	fit_params = {'eval_metric': 'mlogloss',
	'early_stopping_rounds': 10,
	'eval_set': [(x_valid, y_valid)]}

	rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=20,
	n_jobs=1, verbose=2, cv=2,
	fit_params=fit_params,
	scoring='neg_log_loss', refit=False, random_state=42)
	print("Randomized search..")
	search_time_start = time.time()
	rs_clf.fit(x_train, y_train)
	print("Randomized search time:", time.time() - search_time_start)

	best_score = rs_clf.best_score_
	best_params = rs_clf.best_params_
	print("Best score: {}".format(best_score))
	print("Best params: ")
	for param_name in sorted(best_params.keys()):
	print('%s: %r' % (param_name, best_params[param_name]))