Skip to content

Instantly share code, notes, and snippets.

@snakers4
Last active May 27, 2022 07:12
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save snakers4/493eda44bf2869b71b2137224fc1b2f7 to your computer and use it in GitHub Desktop.
Save snakers4/493eda44bf2869b71b2137224fc1b2f7 to your computer and use it in GitHub Desktop.
My XGB boilerplate
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
#Import libraries:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import cross_validation, metrics #Additional scklearn functions
from sklearn.grid_search import GridSearchCV #Perforing grid search
import matplotlib.pylab as plt
%matplotlib inline
from matplotlib.pylab import rcParams
def modelfit_w_test(alg,
dtrain,
dtest,
predictors,
useTrainCV=True,
cv_folds=5,
early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
xgtest = xgb.DMatrix(dtest[predictors].values)
cvresult = xgb.cv(xgb_param,
xgtrain,
num_boost_round=alg.get_params()['n_estimators'],
nfold=cv_folds,
metrics='auc',
early_stopping_rounds=early_stopping_rounds,
verbose_eval = False)
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
alg.fit(dtrain[predictors], dtrain[target],eval_metric='auc')
#Predict training set:
dtrain_predictions = alg.predict(dtrain[predictors])
dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
#Print model report:
print ("\nModel Report")
print ("Accuracy : %.4g" % metrics.accuracy_score(dtrain[target].values, dtrain_predictions))
print ("Precision : %.4g" % metrics.precision_score(dtrain[target].values, dtrain_predictions))
print ("Recall : %.4g" % metrics.recall_score(dtrain[target].values, dtrain_predictions))
print ("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain[target], dtrain_predprob))
print ("\n")
# Predict on testing data:
dtest_predictions = alg.predict(dtest[predictors])
dtest_predprob = alg.predict_proba(dtest[predictors])[:,1]
# results = test_results.merge(dtest[['ID','predprob']], on='ID')
print ("Accuracy : %.4g" % metrics.accuracy_score(dtest[target].values, dtest_predictions))
print ("Precision : %.4g" % metrics.precision_score(dtest[target].values, dtest_predictions))
print ("Recall : %.4g" % metrics.recall_score(dtest[target].values, dtest_predictions))
print ('AUC Score (Test): %f' % metrics.roc_auc_score(dtest[target], dtest_predprob))
xgb.plot_importance(alg, max_num_features=100, height=0.8)
return alg
def modelfit(alg,
dtrain,
predictors,
useTrainCV=True,
cv_folds=5,
early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
cvresult = xgb.cv(
xgb_param,
xgtrain,
num_boost_round=alg.get_params()['n_estimators'],
nfold=cv_folds,
metrics='auc',
early_stopping_rounds=early_stopping_rounds,
verbose_eval = False
)
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
alg.fit(dtrain[predictors], dtrain[target],eval_metric='auc')
#Predict training set:
dtrain_predictions = alg.predict(dtrain[predictors])
dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
#Print model report:
print ("\nModel Report")
print ("Accuracy : %.4g" % metrics.accuracy_score(dtrain[target].values, dtrain_predictions))
print ("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain[target], dtrain_predprob))
feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
from sklearn.grid_search import GridSearchCV #Perforing grid search
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
param_test1 = {
'max_depth':range(3,10,2),
'min_child_weight':range(1,6,2)
}
gsearch1 = GridSearchCV(estimator = XGBClassifier(learning_rate=0.1,
n_estimators=140,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective= 'binary:logistic',
nthread=4,
scale_pos_weight=1,
seed=27),
param_grid = param_test1,
scoring='roc_auc',
n_jobs=4,
iid=False,
cv=5)
gsearch1.fit(train[predictors],train[target])
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
predictors = list(set(photo_meta_data + ae_score) )
# predictors = list(set(predictors) - set(['name_popularity', 'surname_popularity' 'name', 'surname']))
train_ind = list(annotation_df[(annotation_df.view_date < '2017-12-15')&(annotation_df.followings_count_y>1)].index)
val_ind = list(annotation_df[(annotation_df.view_date > '2017-12-15')&(annotation_df.followings_count_y>1)].index)
target = 'has_interaction'
rcParams['figure.figsize'] = 12, 4
xgb1 = XGBClassifier(
learning_rate =0.1,
n_estimators=100,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective= 'binary:logistic',
nthread=7,
scale_pos_weight=1,
silent = False,
seed=27)
alg = modelfit_w_test(xgb1,
annotation_df.filter(items=train_ind,axis=0),
annotation_df.filter(items=val_ind,axis=0),
predictors)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment