Skip to content

Instantly share code, notes, and snippets.

@Roffild
Created September 15, 2022 11:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Roffild/5043e14696e7ce198d8b8b0fafc5de14 to your computer and use it in GitHub Desktop.
Save Roffild/5043e14696e7ce198d8b8b0fafc5de14 to your computer and use it in GitHub Desktop.
XGBoost for MQL
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* https://github.com/Roffild/RoffildLibrary
*/
enum ENUM_XGBOOST_ACTIVATIONS
{
XGBOOST_MARGIN = 0,
XGBOOST_EXP = 1,
XGBOOST_SIGMOID = 2,
XGBOOST_SOFTPROB = 3,
XGBOOST_SOFTMAX = 4,
XGBOOST_BINARY_HINGE = 5
};
/**
* Save and load data for the class CDecisionForest (Alglib).
Эта реализация выполнения чпи на ьйд5.
Создание и обучение моджели чпи ншужно проводить на других языках программировая.
Конвертация модели чпи для выполнением этим классом доступно в (ССЫЛКА) для Питон.
За основу выбран простой бинарный формат АЛглиб для случайного леса.
Имеется потеря точности из-за срабатывания не тех веток. Потому что чпи хранит всё в типе float(float32).
Конвертация вщгиду в адщфе имеется в этом классе и снижает погрешность до минимума.
ёёёёёёёёёёёёёёёёё
double(float64) = 0.366326530612243 (YES)
float(float32) = 0.366326541 (NO)
338:[f171<0.366326541] yes=483,no=484,missing=483
ёёёёёёёёёёё
*/
class CXGBoost
{
public:
ENUM_XGBOOST_ACTIVATIONS m_activation;
int m_nvars;
int m_nclasses;
int m_ntrees;
int m_bufsize;
double m_base_score;
double m_trees[];
CXGBoost()
{
m_activation = XGBOOST_MARGIN;
m_nvars = 0;
m_nclasses = 0;
m_ntrees = 0;
m_bufsize = 0;
m_base_score = 0.0;
ArrayFree(m_trees);
}
/**
* @return XGBOOST_SOFTMAX ? ArrayMaximum(y) : -1
*/
int predict(const double &x[], double &y[], const bool margin = false)
{
const double bscore = m_nclasses > 1 ? m_base_score : 0.0;
int root = 0;
int i = 0;
int k = 0;
int idx = -1;
if (ArraySize(y) < m_nclasses) {
ArrayResize(y, m_nclasses);
}
ZeroMemory(y);
for (i = m_ntrees - 1; i > -1; i--) {
k = root + 1;
idx++;
if (idx == m_nclasses) {
idx = 0;
}
while (true) {
if (m_trees[k] == -1.0) {
y[idx] += (float)(m_trees[k+1] + bscore);
break;
}
if ((float)(x[(int)(m_trees[k] + 0.3)]) < (float)(m_trees[k+1])) {
k += 3;
} else {
k = root + (int)(m_trees[k+2] + 0.3);
}
}
root += (int)(m_trees[root] + 0.3);
}
if (margin || m_activation == XGBOOST_MARGIN) {
return -1;
}
if (m_activation == XGBOOST_EXP) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = MathExp(y[i]);
}
return -1;
}
if (m_activation == XGBOOST_SIGMOID) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = 1.0 / (1.0 + MathExp(-y[i]));
}
return -1;
}
if (m_activation == XGBOOST_SOFTPROB || m_activation == XGBOOST_SOFTMAX) {
double sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
if (y[i] > sum) {
sum = y[i];
}
}
const float max = (float)(sum);
sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
sum += y[i] = MathExp(y[i] - max);
}
const float sumf = (float)(sum);
for (i = m_nclasses - 1; i > -1; i--) {
y[i] /= sumf;
}
if (m_activation == XGBOOST_SOFTMAX) {
idx = 0;
sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
if (y[i] > sum) {
sum = y[i];
idx = i;
}
}
return idx;
}
return -1;
}
if (m_activation == XGBOOST_BINARY_HINGE) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = y[i] > 0.0 ? 1.0 : 0.0;
}
return -1;
}
return -1;
}
/**
* Load from the binary format (fast).
*/
void load(int hfile)
{
m_activation = (ENUM_XGBOOST_ACTIVATIONS)FileReadInteger(hfile);
m_nvars = FileReadInteger(hfile);
m_nclasses = FileReadInteger(hfile);
m_ntrees = FileReadInteger(hfile);
m_bufsize = FileReadInteger(hfile);
m_base_score = FileReadDouble(hfile);
ArrayResize(m_trees, m_bufsize);
FileReadArray(hfile, m_trees, 0, m_bufsize);
}
/**
* Load from the binary format (fast).
*/
bool load(const string filename, const bool common = true)
{
int hfile = FileOpen(filename, FILE_BIN|FILE_READ|FILE_SHARE_READ|(common ? FILE_COMMON : 0));
if (hfile == INVALID_HANDLE) {
return false;
}
load(hfile);
FileClose(hfile);
return true;
}
/**
* Dump model into a text or JSON file.
* Simulating xgboost.Booster.dump_model() without statistics.
* @param filename File name to save.
* @param json Save as JSON?
* @param common Use FILE_COMMON flag when saving file?
* @param tree_index Use the whole forest (WHOLE_ARRAY) or just one tree.
*/
bool dump(const string filename, const bool json = false,
const bool common = true, const int tree_index = WHOLE_ARRAY)
{
string fnames[];
return dump(filename, fnames, json, common, tree_index);
}
/**
* Dump model into a text or JSON file.
* Simulating xgboost.Booster.dump_model() without statistics.
* @param filename File name to save.
* @param fnames An array of feature names. If the array is empty, then the index is used.
* @param json Save as JSON?
* @param common Use FILE_COMMON flag when saving file?
* @param tree_index Use the whole forest (WHOLE_ARRAY) or just one tree.
*/
bool dump(const string filename, const string &fnames[], const bool json = false,
const bool common = true, const int tree_index = WHOLE_ARRAY)
{
const int hFile = FileOpen(filename, FILE_WRITE|FILE_ANSI|(common ? FILE_COMMON : 0), "", CP_UTF8);
if (hFile == INVALID_HANDLE) {
return false;
}
int root = 0;
int i = 0;
int k = 0;
int ndsize = 0;
const int fnsize = ArraySize(fnames);
ulong nodeid[];
ArrayResize(nodeid, 100, 100);
if (json) {
FileWrite(hFile, "[");
}
for (i = 0; i < m_ntrees; i++) {
if (tree_index == WHOLE_ARRAY || i == tree_index) {
if (json == false) {
FileWrite(hFile, "booster[", i, "]:");
}
ZeroMemory(nodeid);
dumpNode(nodeid, root, root + 1, 1, ArraySize(nodeid));
for (k = 1, ndsize = ArraySize(nodeid); k < ndsize; k++) {
nodeid[k] += nodeid[k-1];
}
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, root + 1, 0,
tree_index == WHOLE_ARRAY && i < (m_ntrees - 1));
}
root += (int)(m_trees[root] + 0.3);
}
if (json) {
FileWriteString(hFile, "]");
}
FileClose(hFile);
return true;
}
protected:
/**
* Генерация nodeid для удобста сравнения в текстовом формате.
*/
void dumpNode(ulong &nodeid[], const int root, const int k, const int depth, const int size)
{
int ndsize = size;
const int depth1 = depth + 1;
if (size < depth1) {
ndsize = ArrayResize(nodeid, depth1, 100);
nodeid[depth] = 0;
} else {
nodeid[depth]++;
}
if (m_trees[k] == -1.0) {
return;
}
dumpNode(nodeid, root, k + 3, depth1, ndsize);
dumpNode(nodeid, root, root + (int)(m_trees[k+2] + 0.3), depth1, ndsize);
}
/**
* Запись в файл.
*/
void dumpToFile(const int hFile, ulong &nodeid[], const string &fnames[], const int fnsize,
const bool json, const int root, const int k, const int depth, const bool first)
{
int i = 0;
const int depth1 = depth + 1;
const ulong yes = nodeid[depth1];
const ulong no = yes + 1;
string tabs = "";
if (json) {
for (i = depth1; i > 0; i--) {
tabs += " ";
}
} else {
for (i = depth; i > 0; i--) {
tabs += "\t";
}
}
if (m_trees[k] == -1.0) {
if (json) {
FileWrite(hFile, tabs,
"{ \"nodeid\": ", nodeid[depth]++,
", \"leaf\": ", m_trees[k+1],
first ? " }, " : " }");
} else {
FileWrite(hFile, tabs, nodeid[depth]++, ":leaf=", m_trees[k+1]);
}
return;
}
if (json) {
FileWrite(hFile, tabs,
"{ \"nodeid\": ", nodeid[depth]++,
", \"depth\": ", depth,
", \"split\": ", fnsize > 0 ? "\"" + fnames[(int)(m_trees[k] + 0.3)] + "\""
: (string)((int)(m_trees[k] + 0.3)),
", \"split_condition\": ", m_trees[k+1],
", \"yes\": ", yes, ", \"no\": ", no, ", \"missing\": ", yes, " , \"children\": [");
} else {
FileWrite(hFile, tabs, nodeid[depth]++,
":[", fnsize > 0 ? fnames[(int)(m_trees[k] + 0.3)] : "f" + (string)((int)(m_trees[k] + 0.3)),
"<", m_trees[k+1],
"] yes=", yes, ",no=", no, ",missing=", yes);
}
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, k + 3, depth1, true);
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, root + (int)(m_trees[k+2] + 0.3), depth1, false);
if (json) {
FileWrite(hFile, tabs, first ? "]}," + (depth > 0 ? " " : "") : "]}");
}
}
};
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
#define CL_MEM_HOST_READ_ONLY (1 << 8)
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
class CXGBoostOpenCL : public CXGBoost
{
protected:
double out[];
int h_context;
int h_program;
int h_kernel;
int h_bx;
int h_by;
int h_btrees;
int h_broots;
public:
CXGBoostOpenCL() : CXGBoost()
{
h_context = INVALID_HANDLE;
h_program = INVALID_HANDLE;
h_kernel = INVALID_HANDLE;
h_bx = INVALID_HANDLE;
h_by = INVALID_HANDLE;
h_btrees = INVALID_HANDLE;
h_broots = INVALID_HANDLE;
}
~CXGBoostOpenCL()
{
freeOpenCL();
}
void freeOpenCL()
{
if (h_bx != INVALID_HANDLE) {
CLBufferFree(h_btrees);
}
if (h_by != INVALID_HANDLE) {
CLBufferFree(h_by);
}
if (h_btrees != INVALID_HANDLE) {
CLBufferFree(h_btrees);
}
if (h_broots != INVALID_HANDLE) {
CLBufferFree(h_broots);
}
if (h_kernel != INVALID_HANDLE) {
CLKernelFree(h_kernel);
}
if (h_program != INVALID_HANDLE) {
CLProgramFree(h_program);
}
if (h_context != INVALID_HANDLE) {
CLContextFree(h_context);
}
h_context = INVALID_HANDLE;
h_program = INVALID_HANDLE;
h_kernel = INVALID_HANDLE;
h_bx = INVALID_HANDLE;
h_by = INVALID_HANDLE;
h_btrees = INVALID_HANDLE;
h_broots = INVALID_HANDLE;
}
bool createOpenCL(const int device = CL_USE_CPU_ONLY)
{
h_context = CLContextCreate(device);
if (h_context == INVALID_HANDLE) {
return false;
}
int r, h;
int root[];
ArrayResize(root, m_ntrees);
ArrayResize(out, m_ntrees);
root[0] = 0;
for (r = 1; r < m_ntrees; r++) {
h = root[r-1];
root[r] = h + (int)(m_trees[h] + 0.3);
}
// root test _CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
h_program = CLProgramCreate(h_context,
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable \r\n"
"kernel void predict(const global double *restrict x, global double *restrict y, \r\n"
" const global double *restrict m_trees, constant int *restrict roots) \r\n"
"{ \r\n"
" const double bscore = " + (string)(m_nclasses > 1 ? m_base_score : 0.0) + "; \r\n"
" const int idx = get_global_id(0); \r\n"
" const int root = roots[idx]; \r\n"
" int k = root + 1; \r\n"
" while (true) { \r\n"
" if (m_trees[k] == -1.0) { \r\n"
" y[idx] = (float)(m_trees[k+1] + bscore); \r\n"
" break; \r\n"
" } \r\n"
" if ((float)(x[(int)(m_trees[k] + 0.3)]) < (float)(m_trees[k+1])) { \r\n"
" k += 3; \r\n"
" } else { \r\n"
" k = root + (int)(m_trees[k+2] + 0.3); \r\n"
" } \r\n"
" } \r\n"
"} \r\n"
);
if (h_program == INVALID_HANDLE) {
freeOpenCL();
return false;
}
h_kernel = CLKernelCreate(h_program, "predict");
if (h_kernel == INVALID_HANDLE) {
freeOpenCL();
return false;
}
r = 0;
h_bx = h = CLBufferCreate(h_context, m_nvars * sizeof(double),
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR);
if (h == INVALID_HANDLE ||
CLSetKernelArgMem(h_kernel, r, h) == false) {
freeOpenCL();
return false;
}
r = 1;
h_by = h = CLBufferCreate(h_context, m_ntrees * sizeof(double),
CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_ALLOC_HOST_PTR);
if (h == INVALID_HANDLE ||
CLSetKernelArgMem(h_kernel, r, h) == false) {
freeOpenCL();
return false;
}
r = 2;
h_btrees = h = CLBufferCreate(h_context, m_bufsize * sizeof(double),
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR);
if (h == INVALID_HANDLE ||
CLBufferWrite(h, m_trees) == 0 ||
CLSetKernelArgMem(h_kernel, r, h) == false) {
freeOpenCL();
return false;
}
r = 3;
h_broots = h = CLBufferCreate(h_context, m_ntrees * sizeof(int),
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR);
if (h == INVALID_HANDLE ||
CLBufferWrite(h, root) == 0 ||
CLSetKernelArgMem(h_kernel, r, h) == false) {
freeOpenCL();
return false;
}
return true;
}
/**
* @return XGBOOST_SOFTMAX ? ArrayMaximum(y) : -1
*/
int predict(double &x[], double &y[], const bool margin = false)
{
int i = 0;
int idx = -1;
const uint global_work_offset[1] = {0};
uint global_work_size[1];
//uint local_work_size[1] = {5};
global_work_size[0] = m_ntrees;
CLBufferWrite(h_bx, x, 0, 0, m_nvars);
CLExecute(h_kernel, 1, global_work_offset, global_work_size);
CLBufferRead(h_by, out);
if (ArraySize(y) < m_nclasses) {
ArrayResize(y, m_nclasses);
}
ZeroMemory(y);
// This piece of code can be transferred to OpenCL using a barrier.
if (m_nclasses < 2) {
for (i = m_ntrees - 1; i > -1; i--) {
y[0] += out[i];
}
} else {
for (i = m_ntrees - 1; i > -1; i--) {
idx++;
if (idx == m_nclasses) {
idx = 0;
}
y[idx] += out[i];
}
}
// =========== Copy-Paste from CXGBoost::predict() ===========
if (margin || m_activation == XGBOOST_MARGIN) {
return -1;
}
if (m_activation == XGBOOST_EXP) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = MathExp(y[i]);
}
return -1;
}
if (m_activation == XGBOOST_SIGMOID) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = 1.0 / (1.0 + MathExp(-y[i]));
}
return -1;
}
if (m_activation == XGBOOST_SOFTPROB || m_activation == XGBOOST_SOFTMAX) {
double sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
if (y[i] > sum) {
sum = y[i];
}
}
const float max = (float)(sum);
sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
sum += y[i] = MathExp(y[i] - max);
}
const float sumf = (float)(sum);
for (i = m_nclasses - 1; i > -1; i--) {
y[i] /= sumf;
}
if (m_activation == XGBOOST_SOFTMAX) {
idx = 0;
sum = 0.0;
for (i = m_nclasses - 1; i > -1; i--) {
if (y[i] > sum) {
sum = y[i];
idx = i;
}
}
return idx;
}
return -1;
}
if (m_activation == XGBOOST_BINARY_HINGE) {
for (i = m_nclasses - 1; i > -1; i--) {
y[i] = y[i] > 0.0 ? 1.0 : 0.0;
}
return -1;
}
return -1;
// =========== END Copy-Paste from CXGBoost::predict() ===========
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment