Skip to content

Instantly share code, notes, and snippets.

@thuijskens
Created January 8, 2020 17:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thuijskens/ae0c608af41a833f6d110da1e24ed2f5 to your computer and use it in GitHub Desktop.
Save thuijskens/ae0c608af41a833f6d110da1e24ed2f5 to your computer and use it in GitHub Desktop.
XGBoost incremental training
import numpy as np
import pandas as pd
from memory_profiler import profile
from xgboost.core import XGBoostError
from xgboost import XGBClassifier
@profile
def iterative_train(n_iterations, X, y):
model = XGBClassifier(n_estimators=5, nthread=1)
for _ in range(n_iterations):
try:
# if the model is already fitted, then do another XGB_RESOURCE_UNIT rounds of boosting
booster = model.get_booster()
except XGBoostError:
# if the model hasn't been fitted before, do the first round of boosting
booster = None
model.fit(X, y, xgb_model=booster)
print(f"Number of trees: {len(model._Booster.trees_to_dataframe().Tree.unique())}")
return model
if __name__ == "__main__":
data = pd.read_csv("~/data.csv")
X = pd.get_dummies(data.drop(columns=["target"])).values
y = data["target"].values
iterative_train(10, X, y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment