Created
January 16, 2021 18:24
-
-
Save john-adeojo/97f8429e279a7cf8be47327aed43fa66 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Model 1: Ranbdom Forest Rgressor | |
import sklearn | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.ensemble import RandomForestRegressor | |
# Split training data into features (x_train) and labels (Y_train) | |
x_train = Train_encoded.drop(columns=['Id','SalePrice','SalePrice_log']) | |
Y_train = Train_encoded.SalePrice_log | |
# Set paramters for Grid Search | |
param_grid = {'n_estimators':[200, 300, 400, 500, 600], | |
'max_features':[0.1, 0.3, 0.6] | |
} | |
# Initialise the random forest model | |
RandForest = RandomForestRegressor(n_jobs= -1, random_state = 0, bootstrap=True) | |
# Initialise Gridsearch CV with 5 fold corssvalidation and neggative root_mean_squared_error | |
Tuned_RandForest = GridSearchCV(estimator=RandForest, param_grid=param_grid, scoring='neg_root_mean_squared_error', cv=5) | |
# Fit model & Time the process for training the model | |
start_time = time.process_time() | |
Tuned_RandForest.fit(x_train, Y_train) | |
# End of fit time | |
print(time.process_time() - start_time, "Seconds") | |
# Record the results for all models in a pandas dataframe and keep only the best model | |
Results = pd.DataFrame(Tuned_RandForest.cv_results_) | |
Results_Best = Results.loc[Results.rank_test_score==1] | |
print('Random Forest Regressor') | |
#Results = Results.loc[Results.rank_test_score==1] | |
# Create a plot to show all models | |
col = ['param_max_features'] | |
for col in col: | |
grid = sns.FacetGrid(Results, col=col, hue='rank_test_score', palette="tab20c", legend_out=False, | |
col_wrap=5, height=5) | |
# Draw a horizontal line to show the starting point | |
grid.map(plt.axhline, y=0, ls=":", c=".5") | |
# Draw marker on plot and decide what parameters to plot | |
grid.map(plt.plot, "param_n_estimators", "mean_test_score", marker="o") | |
# Adjust the arrangement of the plots | |
grid.fig.tight_layout(w_pad=1) | |
# Add legend to gird | |
grid.add_legend() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment