kkmatsuo/gist:e77e78a0346280d5570829164760132f Secret

## gistfile1.txt
# Use patients data. 17 clinical factors with 1 outcome in last column.
data= pd.read_csv("my_file.csv")

# preparation for missing value imputation
use_df = data.iloc[:, :18]

# K-Nearest neighbors imputation
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=4, weights="distance")
imputed_df = imputer.fit_transform(use_df)

# Standardization
scaler = StandardScaler()
scaler.fit(imputed_df.iloc[:, :-1])
imputed_df.iloc[:, :-1] = scaler.transform(imputed_df.iloc[:, :-1])
imputed_df.iloc[:, 17] = imputed_df.iloc[:, 17].astype("uint8")
print (imputed_df)

# Split the dataset
test_df = imputed_df.loc["test"]
train_df = imputed_df.loc["train"]
print('number of Train data:', len(train_df))
print('number of Test data:', len(test_df))

# create X and y
X_train = train_df.iloc[:, :-1]
y_train = train_df.iloc[:, -1:].values.ravel()
X_test = test_df.iloc[:, :-1]
y_test = test_df.iloc[:, -1:].values.ravel()

# Evaluate scikit-learn models with 10-repeated 5-fold stratified cross validation
#　to re-define sensitivity and specificity for cross validaion
def sensitivity1(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    recall = tp / (tp+fn)
    return recall

def specificity1(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    specificity = tn / (tn+fp)
    return specificity

from sklearn.model_selection import RepeatedStratifiedKFold
def score_calc (clf, cross_k_val = 5, repeat = 10, seed = 77):
    sensitivity2 = make_scorer(sensitivity1, greater_is_better=True)
    specificity2 = make_scorer(specificity1, greater_is_better=True)

    # set 7 scores
    scores = ["roc_auc", sensitivity2, specificity2, 'precision', "accuracy", "neg_log_loss"]

    # 10times Stratified k-fold cross-validation
    stratifiedkfold = RepeatedStratifiedKFold(n_splits=cross_k_val,
                                              n_repeats=repeat,
                                              random_state=seed
                                             )
    print (clf)
    print ("------------------------------------------------------------")
    # calculate each score
    for score in scores:
        answer = cross_val_score(clf, X_train, y_train, cv = stratifiedkfold, scoring= score)
        print ("eval metric：" + str(score))
        mean_val = np.mean(answer)
        std = np.std(answer)
        print ('Average score (SD):', mean_val, '±', std)
        # 95% CI
        alpha = 0.95
        sem_val  = stats.sem(answer)
        ci = stats.t.interval(alpha, len(answer)-1, loc = mean_val, scale = sem_val)
        print ('95% CI: {}'.format(ci))
        print ("---------------------------------------------------------------")

# For optimization of scikit-learn models
def param_tune(clf, X_train, y_train, score, param):
    cv5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=55)
    gsearch = GridSearchCV(
        estimator = clf,
        param_grid = param,
        scoring=score,
        n_jobs=2, iid=False, cv=cv5
    )
    gsearch.fit(X_train, y_train)
    print('Best parameters: {}'.format(gsearch.best_params_))
    print('Best cross-validation: {}'.format(gsearch.best_score_))

# set parameters on grid-search
score = "neg_log_loss"

# Example of hyperparameter tuning
LR = LogisticRegression(
    C=10, fit_intercept=True, penalty="l2", solver='lbfgs', random_state=27
)
param_test1 = {
 'C': [0.01, 0.1, 1, 10, 100],
 'solver':['newton-cg', 'sag', 'saga', 'lbfgs']
}
param_tune(clf1, X_train, y_train, score, param_test1)

# Example of cross-validation on tuned regression model
LR3 = LogisticRegression(
    C=0.075, fit_intercept=True, penalty="l2", solver='saga', random_state=27
)
score_calc(LR3)

# Bootstrap evaluation on test datasets in scikit learn models
from scipy import stats
def boot_eval(clf, data, X_train, y_train, cnt=1100):
    scores_acc=[]
    scores_recall=[]
    scores_spec=[]
    scores_ppv=[]

    print("Model is ", clf)
    # fit the model
    clf.fit(X_train, y_train)
    count = 0

    for i in range(cnt):
        df = data.sample(n=len(data.values), replace=True, random_state=i)
        y = df.iloc[:, -1]
        X = df.iloc[:, 0:-1]

        Bootpredictions = clf.predict(X)
        cm = confusion_matrix(y, Bootpredictions).ravel()
        if not cm.shape[0] == 4:
            print("Excluded because the data is non-uniform and cannot be calculated", cm.shape[0])
            count +=1
            continue
        tn = cm[0]
        fp = cm[1]
        fn = cm[2]
        tp = cm[3]
        acc=(tp+tn)/(tn+fp+fn+tp)
        specificity = tn / (tn+fp)
        recall = tp / (fn+tp)
        precision = tp / (tp+fp)
        if (tp+fp) is 0:
            count +=1
            continue
        # Calculate each metrics
        scores_acc.append(acc)
        scores_recall.append(recall)
        scores_spec.append(specificity)
        scores_ppv.append(precision)

        if len(scores_acc) == 1000:
            break

    print("====================================================")
    print ('Skipped for Nan answer', count, 'times...')
    print (len(scores_acc), 'times bootstrap caluculation was done.')

    mean_acc = np.nanmean(scores_acc)
    se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
    ci_acc = [mean_acc - 1.96*se_acc, mean_acc + 1.96*se_acc]
    print ('Accuracy mean：', mean_acc)
    print ('95%CI: ', ci_acc)

    mean_recall = np.nanmean(scores_recall)
    se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall)) # standared error
    ci_recall = [mean_recall - 1.96*se_recall, mean_recall + 1.96*se_recall]
    print ('Sensitivity mean：', mean_recall)
    print ('95%CI: ', ci_recall)

    mean_spec = np.nanmean(scores_spec)
    se_spec = np.nanstd(scores_spec, ddof=1) / np.sqrt(len(scores_spec)) # standared error
    ci_spec = [mean_spec - 1.96*se_spec, mean_spec + 1.96*se_spec]
    print ('Specificity mean：', mean_spec)
    print ('95%CI: ', ci_spec)

    mean_ppv = np.nanmean(scores_ppv)
    se_ppv = np.nanstd(scores_ppv, ddof=1) / np.sqrt(len(scores_ppv)) # standared error
    ci_ppv = [mean_ppv - 1.96*se_ppv, mean_ppv + 1.96*se_ppv]
    print ('ppv mean：', mean_ppv)
    print ('95%CI: ', ci_ppv)

    print("=======================================================")

# Example of bootstrap evaluation on regression model
boot_eval(LR3, test_df, X_train, y_train, cnt=1100)

# Cross validation of XGBoost model
def xgb_modelfit_all(alg, X_train, y_train, X_test, y_test, useTrainCV=True, cv_folds=5, early_stopping_rounds=20):
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=y_train)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics= ['auc', 'logloss'], stratified = 'TRUE', early_stopping_rounds=early_stopping_rounds)

    #Fit the algorithm on the data
    evals_result = {}
    alg.fit(X_train, y_train,
            # objective function = logloss, auc
            eval_metric= ['auc', 'logloss'],
            eval_set=[(X_train, y_train), (X_test, y_test)],
            early_stopping_rounds=early_stopping_rounds,
            callbacks=[xgb.callback.record_evaluation(evals_result)]
           )

    #Predict training set:
    dval_predictions = alg.predict(X_test)
    dval_predprob = alg.predict_proba(X_test)[:,1]

    acc = metrics.accuracy_score(y_test, dval_predictions)
    roc = metrics.roc_auc_score(y_test, dval_predprob)
    loglossval = log_loss(y_test, dval_predprob)

    tn, fp, fn, tp = confusion_matrix(y_test, dval_predictions).ravel()
    specificity = tn / (tn+fp)
    recall = tp / (tp+fn)
    precision = tp / (tp+fp)

    #Print model report:
    print ("\nModel Report")
    print ("Accuracy:",  acc)
    print ("Sensitivity:",  recall)
    print ("Specificity:",  specificity)
    print ("Precision:",  precision)
    print('ROC-AUC :', roc)
    print('log loss :', loglossval)

    return acc, recall, specificity, precision, roc, loglossval

def xgb_on_cross_val(xgb, X_train, y_train, seed=77, stop_n=20):
    scores_acc = []
    scores_recall = []
    scores_specificity = []
    scores_precision = []
    scores_roc = []
    scores_logloss = []

    # 10times Stratified k-fold cross-validation
    stratifiedkfold = RepeatedStratifiedKFold(n_splits=5,
                                              n_repeats=10,
                                              random_state=seed
                                             )

    for tr_idx, va_idx in stratifiedkfold.split(X_train, y_train):
        tr_x, va_x = X_train.iloc[tr_idx], X_train.iloc[va_idx]
        tr_y, va_y = y_train[tr_idx], y_train[va_idx]
        print("Divide...")
        acc, recall, specificity, precision, roc, loglossval = xgb_modelfit_all(
            xgb, tr_x, tr_y, va_x, va_y, early_stopping_rounds=stop_n)
        scores_acc.append(acc)
        scores_recall.append(recall)
        scores_specificity.append(specificity)
        scores_precision.append(precision)
        scores_roc.append(roc)
        scores_logloss.append(loglossval)

    # Calculate the mean
    print('\n Mean results of 5-fold cross validation on XGBoost: \n')
    print(xgb)
    acc_avg = np.nanmean(scores_acc)
    recall_avg = np.nanmean(scores_recall)
    specificity_avg = np.nanmean(scores_specificity)
    precision_avg = np.nanmean(scores_precision)
    roc_avg = np.nanmean(scores_roc)
    logloss_avg = np.nanmean(scores_logloss)

    se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
    se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall))
    se_specificity = np.nanstd(scores_specificity, ddof=1) / np.sqrt(len(scores_specificity))
    se_precision = np.nanstd(scores_precision, ddof=1) / np.sqrt(len(scores_precision))
    se_roc = np.nanstd(scores_roc, ddof=1) / np.sqrt(len(scores_roc))

    # calculate with 95% CI (t = 2.776 in 5 sample)     mean ± 2.776*SE
    print('AUC-ROC mean (95%CI)：', roc_avg, ",", roc_avg - 1.96*se_roc, ",", roc_avg + 1.96*se_roc)
    print('Accuracymean (95%CI)：', acc_avg, ",", acc_avg - 1.96*se_acc, ",", acc_avg + 1.96*se_acc)
    print('Sensitivity mean (95%CI)：', recall_avg, ",", recall_avg - 1.96*se_recall, ",", recall_avg + 1.96*se_recall)
    print('Specificity mean (95%CI)：', specificity_avg, ",", specificity_avg - 1.96*se_specificity, ",", specificity_avg + 1.96*se_specificity)
    print('PPV mean (95%CI)：', precision_avg, ",", precision_avg - 1.96*se_precision, ",", precision_avg + 1.96*se_precision)
    print('logloss mean：', logloss_avg)

 # evaluation of XGBoost model

xgb01 = xgb.XGBClassifier(
 learning_rate =0.3,
 n_estimators=360,
 max_depth=1,
 min_child_weight=6.5,
 gamma=0.95,
 subsample=0.65,
 colsample_bytree=0.65,
 objective= 'binary:logistic',
 reg_alpha=0.001,
 reg_lambda=0.1,
 max_delta_step=1,
 tree_method='hist',
 nthread=4,
 seed=27)

xgb_on_cross_val(xgb01, X_train, y_train, stop_n=20)

# Bootstrap evaluation of XGBoost on test dataset
def boot_xgb_eval(alg, data, X_train, y_train, cnt=1100):
    scores_acc=[]
    scores_recall=[]
    scores_spec=[]
    scores_ppv=[]

    print("XGBoost model is", alg)
    # model fit
    evals_result = {}
    alg.fit(X_train, y_train,
            early_stopping_rounds=10,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            eval_metric= ['auc', 'logloss'],
            callbacks=[xgb.callback.record_evaluation(evals_result)]
           )
    count = 0

    for i in range(cnt):
        df = data.sample(n=len(data.values), replace=True, random_state=i)
        y_ = df.iloc[:, -1]
        X_ = df.iloc[:, 0:-1]
        y_pred = alg.predict(X_)

        cm = confusion_matrix(y_, y_pred).ravel()
        if not cm.shape[0] == 4:
            print("Excluded because the data is non-uniform and cannot be calculated", cm.shape[0])
            count +=1
            continue
        tn = cm[0]
        fp = cm[1]
        fn = cm[2]
        tp = cm[3]
        acc=(tp+tn)/(tn+fp+fn+tp)
        specificity = tn / (tn+fp)
        recall = tp / (fn+tp)
        precision = tp / (tp+fp)
        # append the answers
        scores_acc.append(acc)
        scores_recall.append(recall)
        scores_spec.append(specificity)
        scores_ppv.append(precision)

        if len(scores_acc) == 1000:
            break

    print("====================================================")
    print ('Skipped by Nan answer', count, 'times.')
    print (len(scores_acc), 'times calculation was done.')

    mean_acc = np.nanmean(scores_acc)
    se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
    ci_acc = [mean_acc - 1.96*se_acc, mean_acc + 1.96*se_acc]
    print ('Accuracy mean：', mean_acc)
    print ('95%CI: ', ci_acc)

    mean_recall = np.nanmean(scores_recall)
    se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall)) # standared error
    ci_recall = [mean_recall - 1.96*se_recall, mean_recall + 1.96*se_recall]
    print ('Sensitivity mean：', mean_recall)
    print ('95%CI: ', ci_recall)

    mean_spec = np.nanmean(scores_spec)
    se_spec = np.nanstd(scores_spec, ddof=1) / np.sqrt(len(scores_spec)) # standared error
    ci_spec = [mean_spec - 1.96*se_spec, mean_spec + 1.96*se_spec]
    print ('Specificity mean：', mean_spec)
    print ('95%CI: ', ci_spec)

    mean_ppv = np.nanmean(scores_ppv)
    se_ppv = np.nanstd(scores_ppv, ddof=1) / np.sqrt(len(scores_ppv)) # standared error
    ci_ppv = [mean_ppv - 1.96*se_ppv, mean_ppv + 1.96*se_ppv]
    print ('PPV mean：', mean_ppv)
    print ('95%CI: ', ci_ppv)

    print("=======================================================")

 # Bootstrap evaluation of XGBoost on test dataset, example
 boot_xgb_eval(
    xgb10, test_df, X_train, y_train, cnt=1100
)

# Feature importance measurement according to XGBoost

def xgb_modelfit_feat2(alg, X_train, y_train, X_test, y_test, useTrainCV=True, cv_folds=5, early_stopping_rounds=20):

    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=y_train)
        xgtest = xgb.DMatrix(X_test, label=y_test)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics= ['logloss', 'auc'], early_stopping_rounds=early_stopping_rounds)

    #Fit the algorithm on the data
    evals_result = {}
    alg.fit(X_train, y_train,
            eval_metric= ['auc', 'logloss'],
            eval_set=[(X_train, y_train), (X_eval, y_eval)],
            early_stopping_rounds=early_stopping_rounds,
            callbacks=[xgb.callback.record_evaluation(evals_result)]
           )

    feat_imp = pd.Series(alg.get_booster().get_score(importance_type='total_gain')).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    return feat_imp

    # Example
    feat_imp = xgb_modelfit_feat(xgb8, X_train, y_train, X_test, y_test)
	# Use patients data. 17 clinical factors with 1 outcome in last column.
	data= pd.read_csv("my_file.csv")

	# preparation for missing value imputation
	use_df = data.iloc[:, :18]

	# K-Nearest neighbors imputation
	from sklearn.impute import KNNImputer
	imputer = KNNImputer(n_neighbors=4, weights="distance")
	imputed_df = imputer.fit_transform(use_df)

	# Standardization
	scaler = StandardScaler()
	scaler.fit(imputed_df.iloc[:, :-1])
	imputed_df.iloc[:, :-1] = scaler.transform(imputed_df.iloc[:, :-1])
	imputed_df.iloc[:, 17] = imputed_df.iloc[:, 17].astype("uint8")
	print (imputed_df)

	# Split the dataset
	test_df = imputed_df.loc["test"]
	train_df = imputed_df.loc["train"]
	print('number of Train data:', len(train_df))
	print('number of Test data:', len(test_df))

	# create X and y
	X_train = train_df.iloc[:, :-1]
	y_train = train_df.iloc[:, -1:].values.ravel()
	X_test = test_df.iloc[:, :-1]
	y_test = test_df.iloc[:, -1:].values.ravel()

	# Evaluate scikit-learn models with 10-repeated 5-fold stratified cross validation
	#　to re-define sensitivity and specificity for cross validaion
	def sensitivity1(y_test, y_pred):
	tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
	recall = tp / (tp+fn)
	return recall

	def specificity1(y_test, y_pred):
	tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
	specificity = tn / (tn+fp)
	return specificity

	from sklearn.model_selection import RepeatedStratifiedKFold
	def score_calc (clf, cross_k_val = 5, repeat = 10, seed = 77):
	sensitivity2 = make_scorer(sensitivity1, greater_is_better=True)
	specificity2 = make_scorer(specificity1, greater_is_better=True)

	# set 7 scores
	scores = ["roc_auc", sensitivity2, specificity2, 'precision', "accuracy", "neg_log_loss"]

	# 10times Stratified k-fold cross-validation
	stratifiedkfold = RepeatedStratifiedKFold(n_splits=cross_k_val,
	n_repeats=repeat,
	random_state=seed
	)
	print (clf)
	print ("------------------------------------------------------------")
	# calculate each score
	for score in scores:
	answer = cross_val_score(clf, X_train, y_train, cv = stratifiedkfold, scoring= score)
	print ("eval metric：" + str(score))
	mean_val = np.mean(answer)
	std = np.std(answer)
	print ('Average score (SD):', mean_val, '±', std)
	# 95% CI
	alpha = 0.95
	sem_val = stats.sem(answer)
	ci = stats.t.interval(alpha, len(answer)-1, loc = mean_val, scale = sem_val)
	print ('95% CI: {}'.format(ci))
	print ("---------------------------------------------------------------")

	# For optimization of scikit-learn models
	def param_tune(clf, X_train, y_train, score, param):
	cv5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=55)
	gsearch = GridSearchCV(
	estimator = clf,
	param_grid = param,
	scoring=score,
	n_jobs=2, iid=False, cv=cv5
	)
	gsearch.fit(X_train, y_train)
	print('Best parameters: {}'.format(gsearch.best_params_))
	print('Best cross-validation: {}'.format(gsearch.best_score_))

	# set parameters on grid-search
	score = "neg_log_loss"

	# Example of hyperparameter tuning
	LR = LogisticRegression(
	C=10, fit_intercept=True, penalty="l2", solver='lbfgs', random_state=27
	)
	param_test1 = {
	'C': [0.01, 0.1, 1, 10, 100],
	'solver':['newton-cg', 'sag', 'saga', 'lbfgs']
	}
	param_tune(clf1, X_train, y_train, score, param_test1)

	# Example of cross-validation on tuned regression model
	LR3 = LogisticRegression(
	C=0.075, fit_intercept=True, penalty="l2", solver='saga', random_state=27
	)
	score_calc(LR3)

	# Bootstrap evaluation on test datasets in scikit learn models
	from scipy import stats
	def boot_eval(clf, data, X_train, y_train, cnt=1100):
	scores_acc=[]
	scores_recall=[]
	scores_spec=[]
	scores_ppv=[]

	print("Model is ", clf)
	# fit the model
	clf.fit(X_train, y_train)
	count = 0

	for i in range(cnt):
	df = data.sample(n=len(data.values), replace=True, random_state=i)
	y = df.iloc[:, -1]
	X = df.iloc[:, 0:-1]

	Bootpredictions = clf.predict(X)
	cm = confusion_matrix(y, Bootpredictions).ravel()
	if not cm.shape[0] == 4:
	print("Excluded because the data is non-uniform and cannot be calculated", cm.shape[0])
	count +=1
	continue
	tn = cm[0]
	fp = cm[1]
	fn = cm[2]
	tp = cm[3]
	acc=(tp+tn)/(tn+fp+fn+tp)
	specificity = tn / (tn+fp)
	recall = tp / (fn+tp)
	precision = tp / (tp+fp)
	if (tp+fp) is 0:
	count +=1
	continue
	# Calculate each metrics
	scores_acc.append(acc)
	scores_recall.append(recall)
	scores_spec.append(specificity)
	scores_ppv.append(precision)

	if len(scores_acc) == 1000:
	break

	print("====================================================")
	print ('Skipped for Nan answer', count, 'times...')
	print (len(scores_acc), 'times bootstrap caluculation was done.')

	mean_acc = np.nanmean(scores_acc)
	se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
	ci_acc = [mean_acc - 1.96se_acc, mean_acc + 1.96se_acc]
	print ('Accuracy mean：', mean_acc)
	print ('95%CI: ', ci_acc)

	mean_recall = np.nanmean(scores_recall)
	se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall)) # standared error
	ci_recall = [mean_recall - 1.96se_recall, mean_recall + 1.96se_recall]
	print ('Sensitivity mean：', mean_recall)
	print ('95%CI: ', ci_recall)

	mean_spec = np.nanmean(scores_spec)
	se_spec = np.nanstd(scores_spec, ddof=1) / np.sqrt(len(scores_spec)) # standared error
	ci_spec = [mean_spec - 1.96se_spec, mean_spec + 1.96se_spec]
	print ('Specificity mean：', mean_spec)
	print ('95%CI: ', ci_spec)

	mean_ppv = np.nanmean(scores_ppv)
	se_ppv = np.nanstd(scores_ppv, ddof=1) / np.sqrt(len(scores_ppv)) # standared error
	ci_ppv = [mean_ppv - 1.96se_ppv, mean_ppv + 1.96se_ppv]
	print ('ppv mean：', mean_ppv)
	print ('95%CI: ', ci_ppv)

	print("=======================================================")

	# Example of bootstrap evaluation on regression model
	boot_eval(LR3, test_df, X_train, y_train, cnt=1100)

	# Cross validation of XGBoost model
	def xgb_modelfit_all(alg, X_train, y_train, X_test, y_test, useTrainCV=True, cv_folds=5, early_stopping_rounds=20):
	if useTrainCV:
	xgb_param = alg.get_xgb_params()
	xgtrain = xgb.DMatrix(X_train, label=y_train)
	cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
	metrics= ['auc', 'logloss'], stratified = 'TRUE', early_stopping_rounds=early_stopping_rounds)

	#Fit the algorithm on the data
	evals_result = {}
	alg.fit(X_train, y_train,
	# objective function = logloss, auc
	eval_metric= ['auc', 'logloss'],
	eval_set=[(X_train, y_train), (X_test, y_test)],
	early_stopping_rounds=early_stopping_rounds,
	callbacks=[xgb.callback.record_evaluation(evals_result)]
	)

	#Predict training set:
	dval_predictions = alg.predict(X_test)
	dval_predprob = alg.predict_proba(X_test)[:,1]

	acc = metrics.accuracy_score(y_test, dval_predictions)
	roc = metrics.roc_auc_score(y_test, dval_predprob)
	loglossval = log_loss(y_test, dval_predprob)

	tn, fp, fn, tp = confusion_matrix(y_test, dval_predictions).ravel()
	specificity = tn / (tn+fp)
	recall = tp / (tp+fn)
	precision = tp / (tp+fp)

	#Print model report:
	print ("\nModel Report")
	print ("Accuracy:", acc)
	print ("Sensitivity:", recall)
	print ("Specificity:", specificity)
	print ("Precision:", precision)
	print('ROC-AUC :', roc)
	print('log loss :', loglossval)

	return acc, recall, specificity, precision, roc, loglossval

	def xgb_on_cross_val(xgb, X_train, y_train, seed=77, stop_n=20):
	scores_acc = []
	scores_recall = []
	scores_specificity = []
	scores_precision = []
	scores_roc = []
	scores_logloss = []

	# 10times Stratified k-fold cross-validation
	stratifiedkfold = RepeatedStratifiedKFold(n_splits=5,
	n_repeats=10,
	random_state=seed
	)

	for tr_idx, va_idx in stratifiedkfold.split(X_train, y_train):
	tr_x, va_x = X_train.iloc[tr_idx], X_train.iloc[va_idx]
	tr_y, va_y = y_train[tr_idx], y_train[va_idx]
	print("Divide...")
	acc, recall, specificity, precision, roc, loglossval = xgb_modelfit_all(
	xgb, tr_x, tr_y, va_x, va_y, early_stopping_rounds=stop_n)
	scores_acc.append(acc)
	scores_recall.append(recall)
	scores_specificity.append(specificity)
	scores_precision.append(precision)
	scores_roc.append(roc)
	scores_logloss.append(loglossval)

	# Calculate the mean
	print('\n Mean results of 5-fold cross validation on XGBoost: \n')
	print(xgb)
	acc_avg = np.nanmean(scores_acc)
	recall_avg = np.nanmean(scores_recall)
	specificity_avg = np.nanmean(scores_specificity)
	precision_avg = np.nanmean(scores_precision)
	roc_avg = np.nanmean(scores_roc)
	logloss_avg = np.nanmean(scores_logloss)

	se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
	se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall))
	se_specificity = np.nanstd(scores_specificity, ddof=1) / np.sqrt(len(scores_specificity))
	se_precision = np.nanstd(scores_precision, ddof=1) / np.sqrt(len(scores_precision))
	se_roc = np.nanstd(scores_roc, ddof=1) / np.sqrt(len(scores_roc))

	# calculate with 95% CI (t = 2.776 in 5 sample) mean ± 2.776*SE
	print('AUC-ROC mean (95%CI)：', roc_avg, ",", roc_avg - 1.96se_roc, ",", roc_avg + 1.96se_roc)
	print('Accuracymean (95%CI)：', acc_avg, ",", acc_avg - 1.96se_acc, ",", acc_avg + 1.96se_acc)
	print('Sensitivity mean (95%CI)：', recall_avg, ",", recall_avg - 1.96se_recall, ",", recall_avg + 1.96se_recall)
	print('Specificity mean (95%CI)：', specificity_avg, ",", specificity_avg - 1.96se_specificity, ",", specificity_avg + 1.96se_specificity)
	print('PPV mean (95%CI)：', precision_avg, ",", precision_avg - 1.96se_precision, ",", precision_avg + 1.96se_precision)
	print('logloss mean：', logloss_avg)

	# evaluation of XGBoost model

	xgb01 = xgb.XGBClassifier(
	learning_rate =0.3,
	n_estimators=360,
	max_depth=1,
	min_child_weight=6.5,
	gamma=0.95,
	subsample=0.65,
	colsample_bytree=0.65,
	objective= 'binary:logistic',
	reg_alpha=0.001,
	reg_lambda=0.1,
	max_delta_step=1,
	tree_method='hist',
	nthread=4,
	seed=27)

	xgb_on_cross_val(xgb01, X_train, y_train, stop_n=20)

	# Bootstrap evaluation of XGBoost on test dataset
	def boot_xgb_eval(alg, data, X_train, y_train, cnt=1100):
	scores_acc=[]
	scores_recall=[]
	scores_spec=[]
	scores_ppv=[]

	print("XGBoost model is", alg)
	# model fit
	evals_result = {}
	alg.fit(X_train, y_train,
	early_stopping_rounds=10,
	eval_set=[(X_train, y_train), (X_test, y_test)],
	eval_metric= ['auc', 'logloss'],
	callbacks=[xgb.callback.record_evaluation(evals_result)]
	)
	count = 0

	for i in range(cnt):
	df = data.sample(n=len(data.values), replace=True, random_state=i)
	y_ = df.iloc[:, -1]
	X_ = df.iloc[:, 0:-1]
	y_pred = alg.predict(X_)

	cm = confusion_matrix(y_, y_pred).ravel()
	if not cm.shape[0] == 4:
	print("Excluded because the data is non-uniform and cannot be calculated", cm.shape[0])
	count +=1
	continue
	tn = cm[0]
	fp = cm[1]
	fn = cm[2]
	tp = cm[3]
	acc=(tp+tn)/(tn+fp+fn+tp)
	specificity = tn / (tn+fp)
	recall = tp / (fn+tp)
	precision = tp / (tp+fp)
	# append the answers
	scores_acc.append(acc)
	scores_recall.append(recall)
	scores_spec.append(specificity)
	scores_ppv.append(precision)

	if len(scores_acc) == 1000:
	break

	print("====================================================")
	print ('Skipped by Nan answer', count, 'times.')
	print (len(scores_acc), 'times calculation was done.')

	mean_acc = np.nanmean(scores_acc)
	se_acc = np.nanstd(scores_acc, ddof=1) / np.sqrt(len(scores_acc)) # standared error
	ci_acc = [mean_acc - 1.96se_acc, mean_acc + 1.96se_acc]
	print ('Accuracy mean：', mean_acc)
	print ('95%CI: ', ci_acc)

	mean_recall = np.nanmean(scores_recall)
	se_recall = np.nanstd(scores_recall, ddof=1) / np.sqrt(len(scores_recall)) # standared error
	ci_recall = [mean_recall - 1.96se_recall, mean_recall + 1.96se_recall]
	print ('Sensitivity mean：', mean_recall)
	print ('95%CI: ', ci_recall)

	mean_spec = np.nanmean(scores_spec)
	se_spec = np.nanstd(scores_spec, ddof=1) / np.sqrt(len(scores_spec)) # standared error
	ci_spec = [mean_spec - 1.96se_spec, mean_spec + 1.96se_spec]
	print ('Specificity mean：', mean_spec)
	print ('95%CI: ', ci_spec)

	mean_ppv = np.nanmean(scores_ppv)
	se_ppv = np.nanstd(scores_ppv, ddof=1) / np.sqrt(len(scores_ppv)) # standared error
	ci_ppv = [mean_ppv - 1.96se_ppv, mean_ppv + 1.96se_ppv]
	print ('PPV mean：', mean_ppv)
	print ('95%CI: ', ci_ppv)

	print("=======================================================")

	# Bootstrap evaluation of XGBoost on test dataset, example
	boot_xgb_eval(
	xgb10, test_df, X_train, y_train, cnt=1100
	)

	# Feature importance measurement according to XGBoost

	def xgb_modelfit_feat2(alg, X_train, y_train, X_test, y_test, useTrainCV=True, cv_folds=5, early_stopping_rounds=20):

	if useTrainCV:
	xgb_param = alg.get_xgb_params()
	xgtrain = xgb.DMatrix(X_train, label=y_train)
	xgtest = xgb.DMatrix(X_test, label=y_test)
	cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
	metrics= ['logloss', 'auc'], early_stopping_rounds=early_stopping_rounds)

	#Fit the algorithm on the data
	evals_result = {}
	alg.fit(X_train, y_train,
	eval_metric= ['auc', 'logloss'],
	eval_set=[(X_train, y_train), (X_eval, y_eval)],
	early_stopping_rounds=early_stopping_rounds,
	callbacks=[xgb.callback.record_evaluation(evals_result)]
	)

	feat_imp = pd.Series(alg.get_booster().get_score(importance_type='total_gain')).sort_values(ascending=False)
	feat_imp.plot(kind='bar', title='Feature Importances')
	plt.ylabel('Feature Importance Score')
	return feat_imp

	# Example
	feat_imp = xgb_modelfit_feat(xgb8, X_train, y_train, X_test, y_test)