Skip to content

Instantly share code, notes, and snippets.

@andrey-khropov
Created April 18, 2024 16:01
Show Gist options
  • Save andrey-khropov/2ea97d8d4fd025f2bb038e49dc8d1986 to your computer and use it in GitHub Desktop.
Save andrey-khropov/2ea97d8d4fd025f2bb038e49dc8d1986 to your computer and use it in GitHub Desktop.
#include <catboost/libs/model_interface/c_api.h>
#include <stdio.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
int main(int argc, const char * argv[]) {
ModelCalcerHandle* modelHandle;
modelHandle = ModelCalcerCreate();
if (!LoadFullModelFromFile(modelHandle, "RMSE.cbm")) {
printf("LoadFullModelFromFile error message: %s\n", GetErrorString());
}
{
std::ifstream in("X_test_first5lines.dsv");
size_t lineidx = 0;
for (std::string line; std::getline(in, line); ++lineidx) {
std::istringstream linein(line);
std::vector<float> features;
for (std::string e; std::getline(linein, e, ' '); ) {
features.push_back(std::stof(e));
}
const float* featuresPtr = features.data();
/*
double result = 0.0;
if (!CalcModelPrediction(
modelHandle,
1,
&featuresPtr, features.size(),
nullptr, 0,
&result, 1)
) {
printf("CalcModelPrediction error message: %s\n", GetErrorString());
}
std::cout << '[' << lineidx << "] = CalcModelPrediction: " << result << std::endl;
*/
double resultSingle = 0.0;
if (!CalcModelPredictionSingle(
modelHandle,
featuresPtr, features.size(),
nullptr, 0,
&resultSingle, 1)
) {
printf("CalcModelPredictionSingle error message: %s\n", GetErrorString());
}
std::cout << '[' << lineidx << "] = CalcModelPredictionSingle: " << resultSingle << std::endl;
}
}
return 0;
}
from copy import deepcopy
import numpy as np
import catboost as cb
default_parameters = {
'iterations': 2000,
'custom_metric': ['NDCG', 'PFound', 'AverageGain:top=10'],
'verbose': False,
'random_seed': 0,
}
parameters = {}
def fit_model(loss_function, additional_params, train_pool, test_pool):
parameters = deepcopy(default_parameters)
parameters['loss_function'] = loss_function
parameters['train_dir'] = loss_function
if additional_params is not None:
parameters.update(additional_params)
model = cb.CatBoostRanker(**parameters)
model.fit(train_pool, eval_set=test_pool, plot=True)
model.save_model(loss_function + '.cbm')
return model
def main():
print(f'catboost version={cb.version.VERSION}')
from catboost.datasets import msrank_10k
train_df, test_df = msrank_10k()
X_train = train_df.drop([0, 1], axis=1).values
y_train = train_df[0].values
queries_train = train_df[1].values
X_test = test_df.drop([0, 1], axis=1).values
y_test = test_df[0].values
queries_test = test_df[1].values
# For calculation such metrics as NDCG and PFound relevances should be in segment [0,1].
max_relevance = np.max(y_train)
y_train /= max_relevance
y_test /= max_relevance
train = cb.Pool(
data=X_train,
label=y_train,
group_id=queries_train
)
test = cb.Pool(
data=X_test,
label=y_test,
group_id=queries_test
)
model = fit_model('RMSE', {'custom_metric': ['PrecisionAt:top=10', 'RecallAt:top=10', 'MAP:top=10']}, train, test)
predict_test = model.predict( X_test )
for i in range(0,5):
print( predict_test[i],"\t", predict_test[i] * 4 )
np.savetxt('X_test_first5lines.dsv', X_test[:5])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment