Skip to content

Instantly share code, notes, and snippets.

@Chandrak1907 Chandrak1907/xgboost.py
Last active Oct 1, 2019

Embed
What would you like to do?
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
# import lightgbm as lgb
from sklearn.metrics import log_loss
import xgboost as xgb
# Iris dataset
iris = datasets.load_iris()
X, y = iris.data, iris.target
from sklearn.model_selection import train_test_split
seed = 111
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_test, label=y_test)
dtest = xgb.DMatrix(X_test)
eval_set=[(dvalid, 'eval')]
COST_MATRIX = np.matrix([[0, 2, 4],
[2, 0, 2],
[4, 2, 0]])
def softmaxobj(preds, dtrain):
labels = dtrain.get_label()
labels = labels.reshape(-1, 1)
labels = OneHotEncoder(sparse=False, categories='auto').fit_transform(labels)
grad = preds - labels
weights = np.dot(labels,COST_MATRIX)
grad = np.array(np.multiply(grad, weights) )
hess = 2.0 * preds * (1.0 - preds)
hess = np.array(np.multiply(hess, weights))
return grad.flatten(), hess.flatten()
#train xgboost with same custom loss function
param = {'max_depth': '6', 'objective':'multi:softprob',
'tree_method':'hist', 'num_class': 3,
'eval_metric': 'mlogloss'}
param = {
'max_depth': 3, # the maximum depth of each tree
'eta': 0.3, # the training step for each iteration
'silent': 1, # logging mode - quiet
'objective': 'multi:softprob', # error evaluation for multiclass training
'num_class': 3} # the number of classes that exist in this datset
num_round = 5 # the number of training iterations
bst_cust = xgb.train(param, dtrain, num_boost_round=10,
verbose_eval=1, evals=eval_set,
obj=softmaxobj)
preds_cust=bst_cust.predict(dtest)
print('custom objective {}'.format(log_loss(y_test, preds_cust)))
import numpy as np
best_preds = np.asarray([np.argmax(line) for line in preds_cust])
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, best_preds)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print(confusion_matrix(y_test, best_preds))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.