thuijskens/example.py

## example.py
import numpy as np
import pandas as pd

from memory_profiler import profile
from xgboost.core import XGBoostError
from xgboost import XGBClassifier


@profile
def iterative_train(n_iterations, X, y):
    model = XGBClassifier(n_estimators=5, nthread=1)

    for _ in range(n_iterations):
        try:
            # if the model is already fitted, then do another XGB_RESOURCE_UNIT rounds of boosting
            booster = model.get_booster()
        except XGBoostError:
            # if the model hasn't been fitted before, do the first round of boosting
            booster = None

        model.fit(X, y, xgb_model=booster)
        print(f"Number of trees: {len(model._Booster.trees_to_dataframe().Tree.unique())}")

    return model

if __name__ == "__main__":
    data = pd.read_csv("~/data.csv")

    X = pd.get_dummies(data.drop(columns=["target"])).values
    y = data["target"].values

    iterative_train(10, X, y)
	import numpy as np
	import pandas as pd

	from memory_profiler import profile
	from xgboost.core import XGBoostError
	from xgboost import XGBClassifier


	@profile
	def iterative_train(n_iterations, X, y):
	model = XGBClassifier(n_estimators=5, nthread=1)

	for _ in range(n_iterations):
	try:
	# if the model is already fitted, then do another XGB_RESOURCE_UNIT rounds of boosting
	booster = model.get_booster()
	except XGBoostError:
	# if the model hasn't been fitted before, do the first round of boosting
	booster = None

	model.fit(X, y, xgb_model=booster)
	print(f"Number of trees: {len(model._Booster.trees_to_dataframe().Tree.unique())}")

	return model

	if __name__ == "__main__":
	data = pd.read_csv("~/data.csv")

	X = pd.get_dummies(data.drop(columns=["target"])).values
	y = data["target"].values

	iterative_train(10, X, y)