Lianne & Justin @ Just into Data liannewriting

## plot-feature-importance.py
from xgboost import plot_importance

xgboost_step = opt.best_estimator_.steps[1]
xgboost_model = xgboost_step[1]
plot_importance(xgboost_model)

## print-best-estimator.py
opt.best_estimator_.steps

## predict-probability.py
opt.predict(X_test)

opt.predict_proba(X_test)

## evaluate-score.py
opt.best_score_

opt.score(X_test, y_test)

## print-best-estimator.py
opt.best_estimator_

## fit_xgboost.py
opt.fit(X_train, y_train)

## set-up-hyperparameter-tuning.py
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

search_space = {
    'clf__max_depth': Integer(2,8),
    'clf__learning_rate': Real(0.001, 1.0, prior='log-uniform'),
    'clf__subsample': Real(0.5, 1.0),
    'clf__colsample_bytree': Real(0.5, 1.0),
    'clf__colsample_bylevel': Real(0.5, 1.0),
    'clf__colsample_bynode' : Real(0.5, 1.0),

## set-up-pipeline.py
from sklearn.pipeline import Pipeline
from category_encoders.target_encoder import TargetEncoder
from xgboost import XGBClassifier

estimators = [
    ('encoder', TargetEncoder()),
    ('clf', XGBClassifier(random_state=8)) # can customize objective function with the objective parameter
]
pipe = Pipeline(estimators)
pipe

## split_train_test.py
from sklearn.model_selection import train_test_split

X = df.drop(columns='result')
y = df['result']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=8)

## explore_data.py
df.info()

df['result'].value_counts()
	from xgboost import plot_importance

	xgboost_step = opt.best_estimator_.steps[1]
	xgboost_model = xgboost_step[1]
	plot_importance(xgboost_model)
	from skopt import BayesSearchCV
	from skopt.space import Real, Categorical, Integer

	search_space = {
	'clf__max_depth': Integer(2,8),
	'clf__learning_rate': Real(0.001, 1.0, prior='log-uniform'),
	'clf__subsample': Real(0.5, 1.0),
	'clf__colsample_bytree': Real(0.5, 1.0),
	'clf__colsample_bylevel': Real(0.5, 1.0),
	'clf__colsample_bynode' : Real(0.5, 1.0),
	from sklearn.pipeline import Pipeline
	from category_encoders.target_encoder import TargetEncoder
	from xgboost import XGBClassifier

	estimators = [
	('encoder', TargetEncoder()),
	('clf', XGBClassifier(random_state=8)) # can customize objective function with the objective parameter
	]
	pipe = Pipeline(estimators)
	pipe
	from sklearn.model_selection import train_test_split

	X = df.drop(columns='result')
	y = df['result']

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=8)