Created
September 15, 2022 11:11
-
-
Save Roffild/5043e14696e7ce198d8b8b0fafc5de14 to your computer and use it in GitHub Desktop.
XGBoost for MQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
* https://github.com/Roffild/RoffildLibrary | |
*/ | |
enum ENUM_XGBOOST_ACTIVATIONS | |
{ | |
XGBOOST_MARGIN = 0, | |
XGBOOST_EXP = 1, | |
XGBOOST_SIGMOID = 2, | |
XGBOOST_SOFTPROB = 3, | |
XGBOOST_SOFTMAX = 4, | |
XGBOOST_BINARY_HINGE = 5 | |
}; | |
/** | |
* Save and load data for the class CDecisionForest (Alglib). | |
Эта реализация выполнения чпи на ьйд5. | |
Создание и обучение моджели чпи ншужно проводить на других языках программировая. | |
Конвертация модели чпи для выполнением этим классом доступно в (ССЫЛКА) для Питон. | |
За основу выбран простой бинарный формат АЛглиб для случайного леса. | |
Имеется потеря точности из-за срабатывания не тех веток. Потому что чпи хранит всё в типе float(float32). | |
Конвертация вщгиду в адщфе имеется в этом классе и снижает погрешность до минимума. | |
ёёёёёёёёёёёёёёёёё | |
double(float64) = 0.366326530612243 (YES) | |
float(float32) = 0.366326541 (NO) | |
338:[f171<0.366326541] yes=483,no=484,missing=483 | |
ёёёёёёёёёёё | |
*/ | |
class CXGBoost | |
{ | |
public: | |
ENUM_XGBOOST_ACTIVATIONS m_activation; | |
int m_nvars; | |
int m_nclasses; | |
int m_ntrees; | |
int m_bufsize; | |
double m_base_score; | |
double m_trees[]; | |
CXGBoost() | |
{ | |
m_activation = XGBOOST_MARGIN; | |
m_nvars = 0; | |
m_nclasses = 0; | |
m_ntrees = 0; | |
m_bufsize = 0; | |
m_base_score = 0.0; | |
ArrayFree(m_trees); | |
} | |
/** | |
* @return XGBOOST_SOFTMAX ? ArrayMaximum(y) : -1 | |
*/ | |
int predict(const double &x[], double &y[], const bool margin = false) | |
{ | |
const double bscore = m_nclasses > 1 ? m_base_score : 0.0; | |
int root = 0; | |
int i = 0; | |
int k = 0; | |
int idx = -1; | |
if (ArraySize(y) < m_nclasses) { | |
ArrayResize(y, m_nclasses); | |
} | |
ZeroMemory(y); | |
for (i = m_ntrees - 1; i > -1; i--) { | |
k = root + 1; | |
idx++; | |
if (idx == m_nclasses) { | |
idx = 0; | |
} | |
while (true) { | |
if (m_trees[k] == -1.0) { | |
y[idx] += (float)(m_trees[k+1] + bscore); | |
break; | |
} | |
if ((float)(x[(int)(m_trees[k] + 0.3)]) < (float)(m_trees[k+1])) { | |
k += 3; | |
} else { | |
k = root + (int)(m_trees[k+2] + 0.3); | |
} | |
} | |
root += (int)(m_trees[root] + 0.3); | |
} | |
if (margin || m_activation == XGBOOST_MARGIN) { | |
return -1; | |
} | |
if (m_activation == XGBOOST_EXP) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = MathExp(y[i]); | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_SIGMOID) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = 1.0 / (1.0 + MathExp(-y[i])); | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_SOFTPROB || m_activation == XGBOOST_SOFTMAX) { | |
double sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
if (y[i] > sum) { | |
sum = y[i]; | |
} | |
} | |
const float max = (float)(sum); | |
sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
sum += y[i] = MathExp(y[i] - max); | |
} | |
const float sumf = (float)(sum); | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] /= sumf; | |
} | |
if (m_activation == XGBOOST_SOFTMAX) { | |
idx = 0; | |
sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
if (y[i] > sum) { | |
sum = y[i]; | |
idx = i; | |
} | |
} | |
return idx; | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_BINARY_HINGE) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = y[i] > 0.0 ? 1.0 : 0.0; | |
} | |
return -1; | |
} | |
return -1; | |
} | |
/** | |
* Load from the binary format (fast). | |
*/ | |
void load(int hfile) | |
{ | |
m_activation = (ENUM_XGBOOST_ACTIVATIONS)FileReadInteger(hfile); | |
m_nvars = FileReadInteger(hfile); | |
m_nclasses = FileReadInteger(hfile); | |
m_ntrees = FileReadInteger(hfile); | |
m_bufsize = FileReadInteger(hfile); | |
m_base_score = FileReadDouble(hfile); | |
ArrayResize(m_trees, m_bufsize); | |
FileReadArray(hfile, m_trees, 0, m_bufsize); | |
} | |
/** | |
* Load from the binary format (fast). | |
*/ | |
bool load(const string filename, const bool common = true) | |
{ | |
int hfile = FileOpen(filename, FILE_BIN|FILE_READ|FILE_SHARE_READ|(common ? FILE_COMMON : 0)); | |
if (hfile == INVALID_HANDLE) { | |
return false; | |
} | |
load(hfile); | |
FileClose(hfile); | |
return true; | |
} | |
/** | |
* Dump model into a text or JSON file. | |
* Simulating xgboost.Booster.dump_model() without statistics. | |
* @param filename File name to save. | |
* @param json Save as JSON? | |
* @param common Use FILE_COMMON flag when saving file? | |
* @param tree_index Use the whole forest (WHOLE_ARRAY) or just one tree. | |
*/ | |
bool dump(const string filename, const bool json = false, | |
const bool common = true, const int tree_index = WHOLE_ARRAY) | |
{ | |
string fnames[]; | |
return dump(filename, fnames, json, common, tree_index); | |
} | |
/** | |
* Dump model into a text or JSON file. | |
* Simulating xgboost.Booster.dump_model() without statistics. | |
* @param filename File name to save. | |
* @param fnames An array of feature names. If the array is empty, then the index is used. | |
* @param json Save as JSON? | |
* @param common Use FILE_COMMON flag when saving file? | |
* @param tree_index Use the whole forest (WHOLE_ARRAY) or just one tree. | |
*/ | |
bool dump(const string filename, const string &fnames[], const bool json = false, | |
const bool common = true, const int tree_index = WHOLE_ARRAY) | |
{ | |
const int hFile = FileOpen(filename, FILE_WRITE|FILE_ANSI|(common ? FILE_COMMON : 0), "", CP_UTF8); | |
if (hFile == INVALID_HANDLE) { | |
return false; | |
} | |
int root = 0; | |
int i = 0; | |
int k = 0; | |
int ndsize = 0; | |
const int fnsize = ArraySize(fnames); | |
ulong nodeid[]; | |
ArrayResize(nodeid, 100, 100); | |
if (json) { | |
FileWrite(hFile, "["); | |
} | |
for (i = 0; i < m_ntrees; i++) { | |
if (tree_index == WHOLE_ARRAY || i == tree_index) { | |
if (json == false) { | |
FileWrite(hFile, "booster[", i, "]:"); | |
} | |
ZeroMemory(nodeid); | |
dumpNode(nodeid, root, root + 1, 1, ArraySize(nodeid)); | |
for (k = 1, ndsize = ArraySize(nodeid); k < ndsize; k++) { | |
nodeid[k] += nodeid[k-1]; | |
} | |
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, root + 1, 0, | |
tree_index == WHOLE_ARRAY && i < (m_ntrees - 1)); | |
} | |
root += (int)(m_trees[root] + 0.3); | |
} | |
if (json) { | |
FileWriteString(hFile, "]"); | |
} | |
FileClose(hFile); | |
return true; | |
} | |
protected: | |
/** | |
* Генерация nodeid для удобста сравнения в текстовом формате. | |
*/ | |
void dumpNode(ulong &nodeid[], const int root, const int k, const int depth, const int size) | |
{ | |
int ndsize = size; | |
const int depth1 = depth + 1; | |
if (size < depth1) { | |
ndsize = ArrayResize(nodeid, depth1, 100); | |
nodeid[depth] = 0; | |
} else { | |
nodeid[depth]++; | |
} | |
if (m_trees[k] == -1.0) { | |
return; | |
} | |
dumpNode(nodeid, root, k + 3, depth1, ndsize); | |
dumpNode(nodeid, root, root + (int)(m_trees[k+2] + 0.3), depth1, ndsize); | |
} | |
/** | |
* Запись в файл. | |
*/ | |
void dumpToFile(const int hFile, ulong &nodeid[], const string &fnames[], const int fnsize, | |
const bool json, const int root, const int k, const int depth, const bool first) | |
{ | |
int i = 0; | |
const int depth1 = depth + 1; | |
const ulong yes = nodeid[depth1]; | |
const ulong no = yes + 1; | |
string tabs = ""; | |
if (json) { | |
for (i = depth1; i > 0; i--) { | |
tabs += " "; | |
} | |
} else { | |
for (i = depth; i > 0; i--) { | |
tabs += "\t"; | |
} | |
} | |
if (m_trees[k] == -1.0) { | |
if (json) { | |
FileWrite(hFile, tabs, | |
"{ \"nodeid\": ", nodeid[depth]++, | |
", \"leaf\": ", m_trees[k+1], | |
first ? " }, " : " }"); | |
} else { | |
FileWrite(hFile, tabs, nodeid[depth]++, ":leaf=", m_trees[k+1]); | |
} | |
return; | |
} | |
if (json) { | |
FileWrite(hFile, tabs, | |
"{ \"nodeid\": ", nodeid[depth]++, | |
", \"depth\": ", depth, | |
", \"split\": ", fnsize > 0 ? "\"" + fnames[(int)(m_trees[k] + 0.3)] + "\"" | |
: (string)((int)(m_trees[k] + 0.3)), | |
", \"split_condition\": ", m_trees[k+1], | |
", \"yes\": ", yes, ", \"no\": ", no, ", \"missing\": ", yes, " , \"children\": ["); | |
} else { | |
FileWrite(hFile, tabs, nodeid[depth]++, | |
":[", fnsize > 0 ? fnames[(int)(m_trees[k] + 0.3)] : "f" + (string)((int)(m_trees[k] + 0.3)), | |
"<", m_trees[k+1], | |
"] yes=", yes, ",no=", no, ",missing=", yes); | |
} | |
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, k + 3, depth1, true); | |
dumpToFile(hFile, nodeid, fnames, fnsize, json, root, root + (int)(m_trees[k+2] + 0.3), depth1, false); | |
if (json) { | |
FileWrite(hFile, tabs, first ? "]}," + (depth > 0 ? " " : "") : "]}"); | |
} | |
} | |
}; | |
#define CL_MEM_HOST_WRITE_ONLY (1 << 7) | |
#define CL_MEM_HOST_READ_ONLY (1 << 8) | |
#define CL_MEM_HOST_NO_ACCESS (1 << 9) | |
class CXGBoostOpenCL : public CXGBoost | |
{ | |
protected: | |
double out[]; | |
int h_context; | |
int h_program; | |
int h_kernel; | |
int h_bx; | |
int h_by; | |
int h_btrees; | |
int h_broots; | |
public: | |
CXGBoostOpenCL() : CXGBoost() | |
{ | |
h_context = INVALID_HANDLE; | |
h_program = INVALID_HANDLE; | |
h_kernel = INVALID_HANDLE; | |
h_bx = INVALID_HANDLE; | |
h_by = INVALID_HANDLE; | |
h_btrees = INVALID_HANDLE; | |
h_broots = INVALID_HANDLE; | |
} | |
~CXGBoostOpenCL() | |
{ | |
freeOpenCL(); | |
} | |
void freeOpenCL() | |
{ | |
if (h_bx != INVALID_HANDLE) { | |
CLBufferFree(h_btrees); | |
} | |
if (h_by != INVALID_HANDLE) { | |
CLBufferFree(h_by); | |
} | |
if (h_btrees != INVALID_HANDLE) { | |
CLBufferFree(h_btrees); | |
} | |
if (h_broots != INVALID_HANDLE) { | |
CLBufferFree(h_broots); | |
} | |
if (h_kernel != INVALID_HANDLE) { | |
CLKernelFree(h_kernel); | |
} | |
if (h_program != INVALID_HANDLE) { | |
CLProgramFree(h_program); | |
} | |
if (h_context != INVALID_HANDLE) { | |
CLContextFree(h_context); | |
} | |
h_context = INVALID_HANDLE; | |
h_program = INVALID_HANDLE; | |
h_kernel = INVALID_HANDLE; | |
h_bx = INVALID_HANDLE; | |
h_by = INVALID_HANDLE; | |
h_btrees = INVALID_HANDLE; | |
h_broots = INVALID_HANDLE; | |
} | |
bool createOpenCL(const int device = CL_USE_CPU_ONLY) | |
{ | |
h_context = CLContextCreate(device); | |
if (h_context == INVALID_HANDLE) { | |
return false; | |
} | |
int r, h; | |
int root[]; | |
ArrayResize(root, m_ntrees); | |
ArrayResize(out, m_ntrees); | |
root[0] = 0; | |
for (r = 1; r < m_ntrees; r++) { | |
h = root[r-1]; | |
root[r] = h + (int)(m_trees[h] + 0.3); | |
} | |
// root test _CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE | |
h_program = CLProgramCreate(h_context, | |
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable \r\n" | |
"kernel void predict(const global double *restrict x, global double *restrict y, \r\n" | |
" const global double *restrict m_trees, constant int *restrict roots) \r\n" | |
"{ \r\n" | |
" const double bscore = " + (string)(m_nclasses > 1 ? m_base_score : 0.0) + "; \r\n" | |
" const int idx = get_global_id(0); \r\n" | |
" const int root = roots[idx]; \r\n" | |
" int k = root + 1; \r\n" | |
" while (true) { \r\n" | |
" if (m_trees[k] == -1.0) { \r\n" | |
" y[idx] = (float)(m_trees[k+1] + bscore); \r\n" | |
" break; \r\n" | |
" } \r\n" | |
" if ((float)(x[(int)(m_trees[k] + 0.3)]) < (float)(m_trees[k+1])) { \r\n" | |
" k += 3; \r\n" | |
" } else { \r\n" | |
" k = root + (int)(m_trees[k+2] + 0.3); \r\n" | |
" } \r\n" | |
" } \r\n" | |
"} \r\n" | |
); | |
if (h_program == INVALID_HANDLE) { | |
freeOpenCL(); | |
return false; | |
} | |
h_kernel = CLKernelCreate(h_program, "predict"); | |
if (h_kernel == INVALID_HANDLE) { | |
freeOpenCL(); | |
return false; | |
} | |
r = 0; | |
h_bx = h = CLBufferCreate(h_context, m_nvars * sizeof(double), | |
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR); | |
if (h == INVALID_HANDLE || | |
CLSetKernelArgMem(h_kernel, r, h) == false) { | |
freeOpenCL(); | |
return false; | |
} | |
r = 1; | |
h_by = h = CLBufferCreate(h_context, m_ntrees * sizeof(double), | |
CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_ALLOC_HOST_PTR); | |
if (h == INVALID_HANDLE || | |
CLSetKernelArgMem(h_kernel, r, h) == false) { | |
freeOpenCL(); | |
return false; | |
} | |
r = 2; | |
h_btrees = h = CLBufferCreate(h_context, m_bufsize * sizeof(double), | |
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR); | |
if (h == INVALID_HANDLE || | |
CLBufferWrite(h, m_trees) == 0 || | |
CLSetKernelArgMem(h_kernel, r, h) == false) { | |
freeOpenCL(); | |
return false; | |
} | |
r = 3; | |
h_broots = h = CLBufferCreate(h_context, m_ntrees * sizeof(int), | |
CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR); | |
if (h == INVALID_HANDLE || | |
CLBufferWrite(h, root) == 0 || | |
CLSetKernelArgMem(h_kernel, r, h) == false) { | |
freeOpenCL(); | |
return false; | |
} | |
return true; | |
} | |
/** | |
* @return XGBOOST_SOFTMAX ? ArrayMaximum(y) : -1 | |
*/ | |
int predict(double &x[], double &y[], const bool margin = false) | |
{ | |
int i = 0; | |
int idx = -1; | |
const uint global_work_offset[1] = {0}; | |
uint global_work_size[1]; | |
//uint local_work_size[1] = {5}; | |
global_work_size[0] = m_ntrees; | |
CLBufferWrite(h_bx, x, 0, 0, m_nvars); | |
CLExecute(h_kernel, 1, global_work_offset, global_work_size); | |
CLBufferRead(h_by, out); | |
if (ArraySize(y) < m_nclasses) { | |
ArrayResize(y, m_nclasses); | |
} | |
ZeroMemory(y); | |
// This piece of code can be transferred to OpenCL using a barrier. | |
if (m_nclasses < 2) { | |
for (i = m_ntrees - 1; i > -1; i--) { | |
y[0] += out[i]; | |
} | |
} else { | |
for (i = m_ntrees - 1; i > -1; i--) { | |
idx++; | |
if (idx == m_nclasses) { | |
idx = 0; | |
} | |
y[idx] += out[i]; | |
} | |
} | |
// =========== Copy-Paste from CXGBoost::predict() =========== | |
if (margin || m_activation == XGBOOST_MARGIN) { | |
return -1; | |
} | |
if (m_activation == XGBOOST_EXP) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = MathExp(y[i]); | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_SIGMOID) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = 1.0 / (1.0 + MathExp(-y[i])); | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_SOFTPROB || m_activation == XGBOOST_SOFTMAX) { | |
double sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
if (y[i] > sum) { | |
sum = y[i]; | |
} | |
} | |
const float max = (float)(sum); | |
sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
sum += y[i] = MathExp(y[i] - max); | |
} | |
const float sumf = (float)(sum); | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] /= sumf; | |
} | |
if (m_activation == XGBOOST_SOFTMAX) { | |
idx = 0; | |
sum = 0.0; | |
for (i = m_nclasses - 1; i > -1; i--) { | |
if (y[i] > sum) { | |
sum = y[i]; | |
idx = i; | |
} | |
} | |
return idx; | |
} | |
return -1; | |
} | |
if (m_activation == XGBOOST_BINARY_HINGE) { | |
for (i = m_nclasses - 1; i > -1; i--) { | |
y[i] = y[i] > 0.0 ? 1.0 : 0.0; | |
} | |
return -1; | |
} | |
return -1; | |
// =========== END Copy-Paste from CXGBoost::predict() =========== | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment