WillKoehrsen/gradient_boosted_hyperparameter_tuning.py

## gradient_boosted_hyperparameter_tuning.py
# Loss function to be optimized
loss = ['ls', 'lad', 'huber']

# Number of trees used in the boosting process
n_estimators = [100, 500, 900, 1100, 1500]

# Maximum depth of each tree
max_depth = [2, 3, 5, 10, 15]

# Minimum number of samples per leaf
min_samples_leaf = [1, 2, 4, 6, 8]

# Minimum number of samples to split a node
min_samples_split = [2, 4, 6, 10]

# Maximum number of features to consider for making splits
max_features = ['auto', 'sqrt', 'log2', None]

# Define the grid of hyperparameters to search
hyperparameter_grid = {'loss': loss,
                       'n_estimators': n_estimators,
                       'max_depth': max_depth,
                       'min_samples_leaf': min_samples_leaf,
                       'min_samples_split': min_samples_split,
                       'max_features': max_features}

# Create the model to use for hyperparameter tuning
model = GradientBoostingRegressor(random_state = 42)

# Set up the random search with 4-fold cross validation
random_cv = RandomizedSearchCV(estimator=model,
                               param_distributions=hyperparameter_grid,
                               cv=4, n_iter=25,
                               scoring = 'neg_mean_absolute_error',
                               n_jobs = -1, verbose = 1,
                               return_train_score = True,
                               random_state=42)

# Fit on the training data
random_cv.fit(X, y)
	# Loss function to be optimized
	loss = ['ls', 'lad', 'huber']

	# Number of trees used in the boosting process
	n_estimators = [100, 500, 900, 1100, 1500]

	# Maximum depth of each tree
	max_depth = [2, 3, 5, 10, 15]

	# Minimum number of samples per leaf
	min_samples_leaf = [1, 2, 4, 6, 8]

	# Minimum number of samples to split a node
	min_samples_split = [2, 4, 6, 10]

	# Maximum number of features to consider for making splits
	max_features = ['auto', 'sqrt', 'log2', None]

	# Define the grid of hyperparameters to search
	hyperparameter_grid = {'loss': loss,
	'n_estimators': n_estimators,
	'max_depth': max_depth,
	'min_samples_leaf': min_samples_leaf,
	'min_samples_split': min_samples_split,
	'max_features': max_features}

	# Create the model to use for hyperparameter tuning
	model = GradientBoostingRegressor(random_state = 42)

	# Set up the random search with 4-fold cross validation
	random_cv = RandomizedSearchCV(estimator=model,
	param_distributions=hyperparameter_grid,
	cv=4, n_iter=25,
	scoring = 'neg_mean_absolute_error',
	n_jobs = -1, verbose = 1,
	return_train_score = True,
	random_state=42)

	# Fit on the training data
	random_cv.fit(X, y)