Skip to content

Instantly share code, notes, and snippets.

@AlbertoEAF
Last active January 16, 2021 23:19
Show Gist options
  • Save AlbertoEAF/5972db15a27c294bab65b97e1bc4c315 to your computer and use it in GitHub Desktop.
Save AlbertoEAF/5972db15a27c294bab65b97e1bc4c315 to your computer and use it in GitHub Desktop.
LightGBM single row predict benchmark script
/**
*
* Quick & dirty Single Row Predict benchmark.
*
*
* Add this .cpp to a new "profiling/" folder and the following line to the end of CMakeLists.txt:
*
* OPTION(BUILD_PROFILING_TESTS "Set to ON to compile profiling executables for development and benchmarks." OFF)
* if(BUILD_PROFILING_TESTS)
* # For profiling builds with valgrind/callgrind use -DUSE_DEBUG=ON
* add_executable(lightgbm_profile_single_row_predict profiling/profile_single_row_predict.cpp ${SOURCES})
* endif(BUILD_PROFILING_TESTS)
*
*
* Requirements:
*
* - Add a "LightGBM_model.txt" file at the repo root.
* - Adapt ``values`` below to your model to have at least 2 different input rows.
*
* Compilation:
*
* cmake .. -DBUILD_PROFILING_TESTS=ON && make -j4
*
* Usage:
*
* time ./lightgbm_profile_single_row_predict <# threads> <# points> [f] # f uses the Fast single row prediction
*
*
* Alberto Ferreira, 2021
*/
#include <iostream>
#include <stdio.h>
#include <math.h>
#include <vector>
#include <thread>
#include <ctime>
#include <cstring>
#include "LightGBM/c_api.h"
using namespace std;
#define FMT_HEADER_ONLY
#include "LightGBM/../../external_libs/fmt/include/fmt/format.h"
inline void predict(BoosterHandle handle,
const void* data,
int32_t ncol,
int num_iterations,
int64_t* out_len,
double* out_result) {
if (0 != LGBM_BoosterPredictForMatSingleRow(
handle,
data,
C_API_DTYPE_FLOAT64,
ncol,
1, // is_row_major
C_API_PREDICT_NORMAL,
0, // start_iteration
num_iterations,
"",
out_len,
out_result)) {
throw std::exception();
}
}
void predict_n(
BoosterHandle boosterHandle,
double *data,
const size_t nrows,
int ncol,
int num_iterations,
int64_t *out_len,
double* out_scores,
const size_t start,
const size_t end) {
for (size_t i = start; i < end; ++i) {
size_t nrow = i%nrows;
predict(boosterHandle, data + nrow*ncol, ncol, num_iterations, out_len, out_scores + i);
}
}
inline void predict_fast(FastConfigHandle handle,
const void* data,
int64_t* out_len,
double* out_result) {
if (0 != LGBM_BoosterPredictForMatSingleRowFast(handle, data, out_len, out_result)) {
throw std::exception();
}
}
void predict_fast_n(
FastConfigHandle handle,
double *data,
const size_t nrows,
const size_t ncol,
int64_t *out_len,
double* out_scores,
const size_t start,
const size_t end) {
for (size_t i = start; i < end; ++i) {
size_t nrow = i%nrows;
predict_fast(handle, data + nrow*ncol, out_len, out_scores + i);
}
}
int main(int argc, char **argv) {
// Input parsing & experiment setup:
if (argc < 2) {
// argv[1] = #threads
// argv[2] == "f" ? => Use Fast variant.
cout << "Please pass #threads!\n";
exit(1);
}
const int nthreads = std::atoi(argv[1]);
const size_t N_PREDICTIONS = size_t(std::atol(argv[2]));
bool fast_mode = strcmp(argv[3], "f") == 0;
cout << "fast_mode=" << fast_mode << "\n";
cout << "start\n";
BoosterHandle boosterHandle;
int num_iterations;
LGBM_BoosterCreateFromModelfile("./LightGBM_model.txt", &num_iterations, &boosterHandle);
cout << "Model iterations " << num_iterations<< "\n";
/*
Dataset:
feature_names=amount num1_float num2_double num3_int
fraud := 400<amount<700 & cat1_string="C"~=2 & num1_float < 70
Use input "rows" that provide different output scores to ensure thread-safety:
*/
double values[] = {
0.25, 1.4, 0.12, -0.5,
500, 2, 9999, 200,
};
const size_t NROWS=2;
const int NUM_FEATURES = 4;
double ref_scores[NUM_FEATURES * NROWS];
int64_t dummy_out_len;
std::vector<double> scores(N_PREDICTIONS);
FastConfigHandle fastConfigHandle;
LGBM_BoosterPredictForMatSingleRowFastInit(boosterHandle, C_API_PREDICT_NORMAL, 0, num_iterations, C_API_DTYPE_FLOAT64, NUM_FEATURES, "", &fastConfigHandle);
// Generate 2 distinct reference scores - 1 per input row:
predict(boosterHandle, values, NUM_FEATURES, num_iterations, &dummy_out_len, &ref_scores[0]);
predict(boosterHandle, values+NUM_FEATURES, NUM_FEATURES, num_iterations, &dummy_out_len, &ref_scores[1]);
fmt::print("Ref scores: {:.6g}, {:.6g}\n", ref_scores[0], ref_scores[1]);
// Schedule work ////////////////////////////////////////////////////////////////////////////////////////////
const size_t full_span = scores.size();
const size_t base_thread_span = full_span / nthreads;
fmt::print("Work span={}, {} threads, items/thread ~= {}\n", full_span, nthreads, base_thread_span);
auto t0 = std::clock();
std::vector<std::thread> threads;
for (int nthread = 0; nthread < nthreads; ++nthread) {
const size_t start = nthread * base_thread_span;
const size_t end = nthread < nthreads-1 ? start + base_thread_span : full_span;
fmt::print("Thread {} [{}:{}] ({} items)\n", nthread, start, end, end-start);
if (fast_mode) {
threads.push_back(std::thread(&predict_fast_n, fastConfigHandle, values, NROWS, NUM_FEATURES, &dummy_out_len, scores.data(), start, end));
} else {
threads.push_back(std::thread(&predict_n, boosterHandle, values, NROWS, NUM_FEATURES, num_iterations, &dummy_out_len, scores.data(), start, end));
}
}
for (auto &th: threads)
th.join();
// Check output scores against reference scores /////////////////////////////////////////////////////////////
for (size_t i = 0; i < N_PREDICTIONS; ++i) {
const size_t row = i%2;
const double error = scores[i]-ref_scores[row];
if (abs(error) > 1e-30) {
fmt::print("{} Score {} ref_score {}\n", i, scores[i], ref_scores[row]);
fmt::print("{} Score error: {}\n", i, error);
}
}
cout << "len=" << dummy_out_len << endl;
cout << "end\n";
auto t_exec = double(clock() - t0) / CLOCKS_PER_SEC;
cout << "Executed in " << t_exec << "s\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment