-
-
Save ytsaig/a596feec53d2a024ac69f5ae5a83d8f7 to your computer and use it in GitHub Desktop.
import numpy as np | |
from sklearn import datasets | |
from sklearn.metrics import confusion_matrix | |
from sklearn.preprocessing import OneHotEncoder | |
import xgboost as xgb | |
def softmax(z): | |
z -= np.max(z) | |
sm = (np.exp(z).T / np.sum(np.exp(z), axis=1)).T | |
return sm | |
def softmaxobj(preds, dtrain): | |
"""Softmax objective. | |
Args: | |
preds: (N, K) array, N = #data, K = #classes. | |
dtrain: DMatrix object with training data. | |
Returns: | |
grad: N*K array with gradient values. | |
hess: N*K array with second-order gradient values. | |
""" | |
# Label is a vector of class indices for each input example | |
labels = dtrain.get_label() | |
# When objective=softprob, preds has shape (N, K) | |
labels = OneHotEncoder(sparse=False).fit_transform(labels.reshape(-1, 1)) | |
grad = preds - labels | |
hess = 2.0 * preds * (1.0-preds) | |
# Return as 1-d vectors | |
return grad.flatten(), hess.flatten() | |
# Iris dataset | |
iris = datasets.load_iris() | |
X, Ymc = iris.data, iris.target | |
Y = OneHotEncoder(sparse=False).fit_transform(Ymc.reshape(-1, 1)) | |
"""xgboost softmax regression""" | |
dtrain = xgb.DMatrix(X, label=Ymc) | |
params = {'max_depth': 2, 'eta': 0.1, 'silent': 1, | |
'objective': 'multi:softprob', 'num_class': len(np.unique(Ymc))} | |
# Fit | |
model = xgb.train(params, dtrain, 100) | |
# Evalute | |
yhat = model.predict(dtrain) | |
yhat_labels = np.argmax(yhat, axis=1) | |
confusion_matrix(Ymc, yhat_labels) | |
"""xgboost softmax regression via custom loss""" | |
dtrain = xgb.DMatrix(X, label=Ymc) | |
params = {'max_depth': 2, 'eta': 0.1, 'silent': 1, | |
'objective': 'multi:softprob', 'num_class': len(np.unique(Ymc))} | |
# Fit | |
model = xgb.Booster(params, [dtrain]) | |
for _ in range(100): | |
pred = model.predict(dtrain) | |
g, h = softmaxobj(pred, dtrain) | |
model.boost(dtrain, g, h) | |
# Evalute | |
yhat1 = model.predict(dtrain) | |
yhat1_labels = np.argmax(yhat, axis=1) | |
print(confusion_matrix(Ymc, yhat1_labels)) | |
# Compare the two approaches | |
print(confusion_matrix(yhat_labels, yhat1_labels)) | |
np.sum((yhat-yhat1)**2) | |
Also setting gradient and hessian to zero in softmaxobj
grad = 0 * labels * (1.0 - preds)
hess = 0*2.0 * labels * preds * (1.0 - preds)
didn't change overall result
Here the Hessian happens to be diagonal, and therefore could be represented by a 1D array (per instance). What about the general case, where the Hessian is not diagonal?
It seems that something is off in the custom softmax
objective functions that I have seen online. I tried to replicate the result of the multi:softprob
but it does not work. Here is a reproducible example similar to above:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import log_loss
import xgboost as xgb
# Iris dataset
iris = datasets.load_iris()
X, Ymc = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, Ymc, test_size=0.30, random_state=2019)
def softmaxobj(preds, dtrain):
labels = dtrain.get_label()
labels = labels.reshape(-1, 1)
labels = OneHotEncoder(sparse=False, categories='auto').fit_transform(labels)
grad = preds - labels
hess = 2.0 * preds * (1.0 - preds)
return grad.flatten('F'), hess.flatten('F')
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_test, label=y_test)
dtest = xgb.DMatrix(X_test)
eval_set=[(dvalid, 'eval')]
param = {'max_depth': '6', 'objective':'multi:softprob',
'tree_method':'hist', 'num_class': 3,
'eval_metric': 'mlogloss'}
bst = xgb.train(param, dtrain, num_boost_round=100,
verbose_eval=10, evals=eval_set)
preds=bst.predict(dtest)
print(log_loss(y_test, preds))
[0] eval-mlogloss:0.749971
[10] eval-mlogloss:0.111583
[20] eval-mlogloss:0.107522
[30] eval-mlogloss:0.116207
[40] eval-mlogloss:0.119493
[50] eval-mlogloss:0.125152
[60] eval-mlogloss:0.130457
[70] eval-mlogloss:0.134239
[80] eval-mlogloss:0.138156
[90] eval-mlogloss:0.141714
[99] eval-mlogloss:0.144838
custom objective logloss0.14483839407071677
#train xgboost with same custom loss function
param = {'max_depth': '6', 'objective':'multi:softprob',
'tree_method':'hist', 'num_class': 3,
'eval_metric': 'mlogloss'}
bst_cust = xgb.train(param, dtrain, num_boost_round=100,
verbose_eval=10, evals=eval_set,
obj=softmaxobj)
preds_cust=bst_cust.predict(dtest)
print('custom objective {}'.format(log_loss(y_test, preds_cust)))
[0] eval-mlogloss:1.16279
[10] eval-mlogloss:2.63613
[20] eval-mlogloss:5.60595
[30] eval-mlogloss:10.83
[40] eval-mlogloss:15.2783
[50] eval-mlogloss:17.3254
[60] eval-mlogloss:18.8124
[70] eval-mlogloss:19.3257
[80] eval-mlogloss:18.956
[90] eval-mlogloss:17.9979
[99] eval-mlogloss:17.567
custom objective 16.74834724995825
Does any one see anything wrong with my implementation? I was able to get the binary
custom objective function to work with no issues.
I figured out the issue with my softmaxobj
it seems that converting grad.flatten()
and hess.flatten()
to grad.flatten('F')
and hess.flatten('F')
greatly impact the results.
There is a error in line 65:
Correct line is yhat1_labels = np.argmax(yhat1, axis=1)