Skip to content

Instantly share code, notes, and snippets.

@okanyenigun
Created September 5, 2022 10:16
Show Gist options
  • Save okanyenigun/032abed85a5b74b500b7ebf7793211e1 to your computer and use it in GitHub Desktop.
Save okanyenigun/032abed85a5b74b500b7ebf7793211e1 to your computer and use it in GitHub Desktop.
catboost
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=4)
import catboost
print(catboost.__version__)
#dataset
from catboost.datasets import amazon
(train_df, test_df) = amazon()
y = train_df.ACTION
X = train_df.drop('ACTION', axis=1)
#all the features are categorical
cat_features = list(range(0, X.shape[1]))
print(cat_features)
#unbalanced labels
print('Labels: {}'.format(set(y)))
print('Zero count = {}, One count = {}'.format(len(y) - sum(y), sum(y)))
#training
from catboost import CatBoostClassifier
model = CatBoostClassifier(iterations=100)
model.fit(X, y, cat_features=cat_features, verbose=10)
model.predict_proba(X)
from catboost import Pool
pool = Pool(data=X, label=y, cat_features=cat_features)
from sklearn.model_selection import train_test_split
data = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_validation, y_train, y_validation = data
train_pool = Pool(
data=X_train,
label=y_train,
cat_features=cat_features
)
validation_pool = Pool(
data=X_validation,
label=y_validation,
cat_features=cat_features
)
model = CatBoostClassifier(
iterations=5,
learning_rate=0.1,
# loss_function='CrossEntropy'
)
model.fit(train_pool, eval_set=validation_pool, verbose=False)
print('Model is fitted: {}'.format(model.is_fitted()))
print('Model params:\n{}'.format(model.get_params()))
model = CatBoostClassifier(
iterations=15,
# verbose=5,
)
model.fit(train_pool, eval_set=validation_pool);
model = CatBoostClassifier(
iterations=50,
learning_rate=0.5,
custom_loss=['AUC', 'Accuracy']
)
model.fit(
train_pool,
eval_set=validation_pool,
verbose=False,
plot=True
);
model_with_early_stop = CatBoostClassifier(
iterations=200,
learning_rate=0.5,
early_stopping_rounds=20
)
model_with_early_stop.fit(
train_pool,
eval_set=validation_pool,
verbose=False,
plot=True
);
from catboost import cv
params = {
'loss_function': 'Logloss',
'iterations': 80,
'custom_loss': 'AUC',
'learning_rate': 0.5,
}
cv_data = cv(
params = params,
pool = train_pool,
fold_count=5,
shuffle=True,
partition_random_seed=0,
plot=True,
verbose=False
)
cv_data.head(10)
from sklearn.model_selection import GridSearchCV
param_grid = {
"iterations": [10,100],
"learning_rate": [0.01,0.1],
"depth": [4,7],
"early_stopping_rounds" : [5,10],
"depth" : [4,8],
"l2_leaf_reg": [2,4]
}
clf = CatBoostClassifier(
cat_features=cat_features,
verbose=20
)
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=3)
results = grid_search.fit(X_train, y_train)
results.best_estimator_.get_params()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment