Skip to content

Instantly share code, notes, and snippets.

@chrisdmell
Created December 8, 2021 15:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrisdmell/589db45b928962bc30afeb3af9f4becd to your computer and use it in GitHub Desktop.
Save chrisdmell/589db45b928962bc30afeb3af9f4becd to your computer and use it in GitHub Desktop.
catboost Regression Model Class
class catboost_regressor():
'''
'''
def __init__(self, param = []):
'''
'''
self._rfr = CatBoostRegressor(**params) ## kwargs loss_function='RMSE', iterations = 100
self._param = param
@classmethod
def new_instance(cls, param={}):
'''
rf_model_best = RF_regressor.new_instance(model_cv.best_params_)
requires none, but if params is passed, it will call the init call and pass params to it,
'''
return cls(param)
@property
def model(self):
"""
Getter to return the model created
:return: handle or instance of the RandomForestReqgressor
Property you can use it as a PARAM
as in rf_model.model will return the model.
"""
return self._rfr
@property
def params(self):
"""
Getter for model parameters
"""
return self._param
def model_run(self, df, var_dict,cat_features = {}, other_dict = {}):
'''
self : rf regressor model
df : dataframe
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"]
other_dict : other dict if needed, set to {} default
'''
feature = var_dict["independant"]
label = var_dict["dependant"]
X = df[feature]
y = df[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42)
# self._rfr = self._rfr(cat_features)
## TypeError: 'CatBoostRegressor' object is not callable
self._rfr.fit(X_train, y_train)
y_pred = self._rfr.predict(X_test)
model_score = self._rfr.score(X_test , y_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_test, y_pred)
# errors = abs(y_pred - y_test)
# mape = 100 * np.mean(errors / y_test)
# accuracy = 100 - mape
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination
return_dict = {}
return_dict["mae"] = mae
return_dict["mse"] = mse
return_dict["rmse"] = rmse
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2
return_dict["model"] = self.model
return_dict["y_test"] = y_test
return_dict["y_pred"] = y_pred
return_dict["model_score"] = model_score ## here it is R2
# return_dict["mape"] = mape
# return_dict["accuracy"] = accuracy
## TODO when model has no param
# return_dict["param"] = self.params
return return_dict
def model_run_cv(self, df, var_dict, other_dict = {}):
'''
self : rf regressor model
df : dataframe
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"]
other_dict : other dict if needed, set to {} default - other_dict["parameters"], other_dict["scoring"],
other_dict["cv"]
neg_mean_absolute_error - we have to minimize mae, but sklearn works rf on maximization so we negative this
'''
feature = var_dict["independant"]
label = var_dict["dependant"]
X = df[feature]
y = df[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42)
## this has to be 2*2 matrix
# self._rfr.fit(X_train, y_train)
# y_pred = self._rfr.predict(X_test)
param_grid = other_dict["parameters"]
# Instantiate the grid search model
grid_search_ad = GridSearchCV(estimator = self._rfr, param_grid = param_grid,
scoring = other_dict["scoring"], cv = other_dict["cv"],
n_jobs = -1, verbose = 2)
grid_search_ad.fit(X_train, y_train)
y_pred = grid_search_ad.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_test, y_pred)
r2_2 = grid_search_ad.score(X_test, y_test) ##score method of CV Features, Labels
# errors = abs(y_pred - y_test)
# mape = 100 * np.mean(errors / y_test)
# accuracy = 100 - mape
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination
## grid search CV doesnt have this
return_dict = {}
return_dict["mae"] = mae
return_dict["mse"] = mse
return_dict["rmse"] = rmse
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2
return_dict["model_score"] = r2_2
return_dict["model"] = grid_search_ad
return_dict["y_test"] = y_test
return_dict["y_pred"] = y_pred
# return_dict["mape"] = mape
# return_dict["accuracy"] = accuracy
## TODO when model has no param
# return_dict["param"] = self.params
return return_dict
#----------------------------------------- MLFLOW ----------------------------------------------------------#
def model_run_mlfow(self, df, var_dict, other_dict = {}):
'''
self : rf regressor model
df : dataframe
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"]
other_dict : other dict if needed, set to {} default
'''
feature = var_dict["independant"]
label = var_dict["dependant"]
X = df[feature]
y = df[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42)
self._rfr.fit(X_train, y_train)
y_pred = self._rfr.predict(X_test)
model_score = self._rfr.score(X_test , y_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_test, y_pred)
# errors = abs(y_pred - y_test)
# mape = 100 * np.mean(errors / y_test)
# accuracy = 100 - mape
bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination
return_dict = {}
return_dict["mae"] = mae
return_dict["mse"] = mse
return_dict["rmse"] = rmse
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2
return_dict["model"] = self.model
return_dict["y_test"] = y_test
return_dict["y_pred"] = y_pred
return_dict["model_score"] = model_score ## here it is R2
# return_dict["mape"] = mape
# return_dict["accuracy"] = accuracy
## TODO when model has no param
# return_dict["param"] = self.params
return return_dict
def model_run_cv_mlfow(self, df, var_dict, other_dict = {}):
'''
self : rf regressor model
df : dataframe
var_dict : model variables dict - var_dict["independant"], var_dict["dependant"]
other_dict : other dict if needed, set to {} default - other_dict["parameters"], other_dict["scoring"],
other_dict["cv"]
neg_mean_absolute_error - we have to minimize mae, but sklearn works rf on maximization so we negative this
'''
feature = var_dict["independant"]
label = var_dict["dependant"]
X = df[feature]
y = df[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state = 42)
## this has to be 2*2 matrix
# self._rfr.fit(X_train, y_train)
# y_pred = self._rfr.predict(X_test)
param_grid = other_dict["parameters"]
# Instantiate the grid search model
grid_search_ad = GridSearchCV(estimator = self._rfr, param_grid = param_grid,
scoring = other_dict["scoring"], cv = other_dict["cv"],
n_jobs = -1, verbose = 2)
grid_search_ad.fit(X_train, y_train)
y_pred = grid_search_ad.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_test, y_pred)
r2_2 = grid_search_ad.score(X_test, y_test) ##score method of CV Features, Labels
# errors = abs(y_pred - y_test)
# mape = 100 * np.mean(errors / y_test)
# accuracy = 100 - mape
# bs = self.model.oob_score_ ## OOB score is same as R2, or co-eff of determination
## grid search CV doesnt have this
return_dict = {}
return_dict["mae"] = mae
return_dict["mse"] = mse
return_dict["rmse"] = rmse
return_dict["r2"] = r2 ## rf_model.score(test, pred) is same as r2
return_dict["model_score"] = r2_2
return_dict["model"] = grid_search_ad
return_dict["y_test"] = y_test
return_dict["y_pred"] = y_pred
# return_dict["mape"] = mape
# return_dict["accuracy"] = accuracy
## TODO when model has no param
# return_dict["param"] = self.params
return return_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment