Created
December 8, 2021 15:16
-
-
Save chrisdmell/589db45b928962bc30afeb3af9f4becd to your computer and use it in GitHub Desktop.
catboost Regression Model Class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class catboost_regressor(): | |
''' | |
''' | |
def __init__(self, param = []): | |
''' | |
''' | |
self._rfr = CatBoostRegressor(**params) ## kwargs loss_function='RMSE', iterations = 100 | |
self._param = param | |
@classmethod | |
def new_instance(cls, param={}): | |
''' | |
rf_model_best = RF_regressor.new_instance(model_cv.best_params_) | |
requires none, but if params is passed, it will call the init call and pass params to it, | |
''' | |
return cls(param) | |
@property | |
def model(self): | |
""" | |
Getter to return the model created | |
:return: handle or instance of the RandomForestReqgressor | |
Property you can use it as a PARAM | |
as in rf_model.model will return the model. | |
""" | |
return self._rfr | |
@property | |
def params(self): | |
""" | |
Getter for model parameters | |
""" | |
return self._param | |
def model_run(self, df, var_dict,cat_features = {}, other_dict = {}): | |
''' | |
self : rf regressor model | |
df : dataframe | |
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"] | |
other_dict : other dict if needed, set to {} default | |
''' | |
feature = var_dict["independant"] | |
label = var_dict["dependant"] | |
X = df[feature] | |
y = df[label] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42) | |
# self._rfr = self._rfr(cat_features) | |
## TypeError: 'CatBoostRegressor' object is not callable | |
self._rfr.fit(X_train, y_train) | |
y_pred = self._rfr.predict(X_test) | |
model_score = self._rfr.score(X_test , y_test) | |
mae = metrics.mean_absolute_error(y_test, y_pred) | |
mse = metrics.mean_squared_error(y_test, y_pred) | |
rmse = np.sqrt(mse) | |
r2 = metrics.r2_score(y_test, y_pred) | |
# errors = abs(y_pred - y_test) | |
# mape = 100 * np.mean(errors / y_test) | |
# accuracy = 100 - mape | |
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination | |
return_dict = {} | |
return_dict["mae"] = mae | |
return_dict["mse"] = mse | |
return_dict["rmse"] = rmse | |
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2 | |
return_dict["model"] = self.model | |
return_dict["y_test"] = y_test | |
return_dict["y_pred"] = y_pred | |
return_dict["model_score"] = model_score ## here it is R2 | |
# return_dict["mape"] = mape | |
# return_dict["accuracy"] = accuracy | |
## TODO when model has no param | |
# return_dict["param"] = self.params | |
return return_dict | |
def model_run_cv(self, df, var_dict, other_dict = {}): | |
''' | |
self : rf regressor model | |
df : dataframe | |
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"] | |
other_dict : other dict if needed, set to {} default - other_dict["parameters"], other_dict["scoring"], | |
other_dict["cv"] | |
neg_mean_absolute_error - we have to minimize mae, but sklearn works rf on maximization so we negative this | |
''' | |
feature = var_dict["independant"] | |
label = var_dict["dependant"] | |
X = df[feature] | |
y = df[label] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42) | |
## this has to be 2*2 matrix | |
# self._rfr.fit(X_train, y_train) | |
# y_pred = self._rfr.predict(X_test) | |
param_grid = other_dict["parameters"] | |
# Instantiate the grid search model | |
grid_search_ad = GridSearchCV(estimator = self._rfr, param_grid = param_grid, | |
scoring = other_dict["scoring"], cv = other_dict["cv"], | |
n_jobs = -1, verbose = 2) | |
grid_search_ad.fit(X_train, y_train) | |
y_pred = grid_search_ad.predict(X_test) | |
mae = metrics.mean_absolute_error(y_test, y_pred) | |
mse = metrics.mean_squared_error(y_test, y_pred) | |
rmse = np.sqrt(mse) | |
r2 = metrics.r2_score(y_test, y_pred) | |
r2_2 = grid_search_ad.score(X_test, y_test) ##score method of CV Features, Labels | |
# errors = abs(y_pred - y_test) | |
# mape = 100 * np.mean(errors / y_test) | |
# accuracy = 100 - mape | |
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination | |
## grid search CV doesnt have this | |
return_dict = {} | |
return_dict["mae"] = mae | |
return_dict["mse"] = mse | |
return_dict["rmse"] = rmse | |
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2 | |
return_dict["model_score"] = r2_2 | |
return_dict["model"] = grid_search_ad | |
return_dict["y_test"] = y_test | |
return_dict["y_pred"] = y_pred | |
# return_dict["mape"] = mape | |
# return_dict["accuracy"] = accuracy | |
## TODO when model has no param | |
# return_dict["param"] = self.params | |
return return_dict | |
#----------------------------------------- MLFLOW ----------------------------------------------------------# | |
def model_run_mlfow(self, df, var_dict, other_dict = {}): | |
''' | |
self : rf regressor model | |
df : dataframe | |
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"] | |
other_dict : other dict if needed, set to {} default | |
''' | |
feature = var_dict["independant"] | |
label = var_dict["dependant"] | |
X = df[feature] | |
y = df[label] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42) | |
self._rfr.fit(X_train, y_train) | |
y_pred = self._rfr.predict(X_test) | |
model_score = self._rfr.score(X_test , y_test) | |
mae = metrics.mean_absolute_error(y_test, y_pred) | |
mse = metrics.mean_squared_error(y_test, y_pred) | |
rmse = np.sqrt(mse) | |
r2 = metrics.r2_score(y_test, y_pred) | |
# errors = abs(y_pred - y_test) | |
# mape = 100 * np.mean(errors / y_test) | |
# accuracy = 100 - mape | |
bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination | |
return_dict = {} | |
return_dict["mae"] = mae | |
return_dict["mse"] = mse | |
return_dict["rmse"] = rmse | |
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2 | |
return_dict["model"] = self.model | |
return_dict["y_test"] = y_test | |
return_dict["y_pred"] = y_pred | |
return_dict["model_score"] = model_score ## here it is R2 | |
# return_dict["mape"] = mape | |
# return_dict["accuracy"] = accuracy | |
## TODO when model has no param | |
# return_dict["param"] = self.params | |
return return_dict | |
def model_run_cv_mlfow(self, df, var_dict, other_dict = {}): | |
''' | |
self : rf regressor model | |
df : dataframe | |
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"] | |
other_dict : other dict if needed, set to {} default - other_dict["parameters"], other_dict["scoring"], | |
other_dict["cv"] | |
neg_mean_absolute_error - we have to minimize mae, but sklearn works rf on maximization so we negative this | |
''' | |
feature = var_dict["independant"] | |
label = var_dict["dependant"] | |
X = df[feature] | |
y = df[label] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42) | |
## this has to be 2*2 matrix | |
# self._rfr.fit(X_train, y_train) | |
# y_pred = self._rfr.predict(X_test) | |
param_grid = other_dict["parameters"] | |
# Instantiate the grid search model | |
grid_search_ad = GridSearchCV(estimator = self._rfr, param_grid = param_grid, | |
scoring = other_dict["scoring"], cv = other_dict["cv"], | |
n_jobs = -1, verbose = 2) | |
grid_search_ad.fit(X_train, y_train) | |
y_pred = grid_search_ad.predict(X_test) | |
mae = metrics.mean_absolute_error(y_test, y_pred) | |
mse = metrics.mean_squared_error(y_test, y_pred) | |
rmse = np.sqrt(mse) | |
r2 = metrics.r2_score(y_test, y_pred) | |
r2_2 = grid_search_ad.score(X_test, y_test) ##score method of CV Features, Labels | |
# errors = abs(y_pred - y_test) | |
# mape = 100 * np.mean(errors / y_test) | |
# accuracy = 100 - mape | |
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination | |
## grid search CV doesnt have this | |
return_dict = {} | |
return_dict["mae"] = mae | |
return_dict["mse"] = mse | |
return_dict["rmse"] = rmse | |
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2 | |
return_dict["model_score"] = r2_2 | |
return_dict["model"] = grid_search_ad | |
return_dict["y_test"] = y_test | |
return_dict["y_pred"] = y_pred | |
# return_dict["mape"] = mape | |
# return_dict["accuracy"] = accuracy | |
## TODO when model has no param | |
# return_dict["param"] = self.params | |
return return_dict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment