Last active
October 9, 2023 05:32
-
-
Save M0nteCarl0/2641f80c2285db585f51dfdbfee9a1a8 to your computer and use it in GitHub Desktop.
Yndex Catboost summary cheatshit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import CatBoostClassifier, Pool | |
# Create a CatBoostClassifier model | |
model = CatBoostClassifier() | |
# Load your data into a Pool object | |
train_data = Pool(X_train, y_train) | |
# Train the model | |
model.fit(train_data) | |
# Make predictions on the test set | |
preds = model.predict(X_test) | |
# Get all metrics | |
metrics = model.get_multiclass_metrics(train_data, ['Accuracy', 'Precision', 'Recall', 'F1', 'Logloss']) | |
# Print the metrics | |
for metric_name, metric_value in metrics.items(): | |
print(f"{metric_name}: {metric_value}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import CatBoostClassifier | |
from dask_ml.model_selection import train_test_split | |
from dask.distributed import Client, wait | |
import dask.dataframe as dd | |
# Start a Dask cluster with multiple workers | |
client = Client(n_workers=4) | |
# Read the data into a Dask DataFrame | |
data = dd.read_csv('data.csv') | |
# Split the data into training and testing sets | |
X_train, X_test, y_train, y_test = train_test_split( | |
data.drop('target', axis=1), | |
data['target'], | |
test_size=0.2, | |
random_state=42 | |
) | |
# Convert the Dask DataFrame to Dask arrays | |
X_train = X_train.to_dask_array(lengths=True) | |
y_train = y_train.to_dask_array(lengths=True) | |
# Scatter the training data to distribute it among workers | |
X_train = client.scatter(X_train) | |
y_train = client.scatter(y_train) | |
# Define a function to train the CatBoost model on a single worker | |
def train_model(X, y): | |
model = CatBoostClassifier(iterations=100, learning_rate=0.1) | |
model.fit(X, y) | |
return model | |
# Train the model on each worker | |
futures = client.map(train_model, [X_train] * client.n_workers, [y_train] * client.n_workers) | |
wait(futures) | |
# Collect the trained models from the workers | |
models = client.gather(futures) | |
# Convert the test data to Dask arrays | |
X_test = X_test.to_dask_array(lengths=True) | |
y_test = y_test.to_dask_array(lengths=True) | |
# Predict on the test data using each model | |
y_preds = [model.predict(X_test) for model in models] | |
# Compute the accuracy for each model | |
accuracies = [(y_pred == y_test).mean().compute() for y_pred in y_preds] | |
# Print the accuracies | |
for i, accuracy in enumerate(accuracies): | |
print(f"Accuracy of Model {i+1}: {accuracy}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import CatBoostClassifier | |
# Assuming you have a trained CatBoostClassifier model | |
model = CatBoostClassifier() | |
model.load_model('model.cbm') | |
# Splitting the model into multiple files | |
model.save_model('model_part1.cbm', format='cbm', split_count=2) | |
model.save_model('model_part2.cbm', format='cbm', split_count=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import CatBoostClassifier | |
# Rest of the code... | |
# Save the trained models into a single CBM file | |
combined_model = CatBoostClassifier() | |
combined_model.combine_models(models, inplace=True) | |
combined_model.save_model('combined_model.cbm', format='cbm') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment