Skip to content

Instantly share code, notes, and snippets.

@humamfauzi
Created August 21, 2018 12:21
Show Gist options
  • Save humamfauzi/31cdd60ea9f9c94e4b11f6bbe8645477 to your computer and use it in GitHub Desktop.
Save humamfauzi/31cdd60ea9f9c94e4b11f6bbe8645477 to your computer and use it in GitHub Desktop.
Using Generated Data and Ensemble Learning in Python sklearn
# Please use Jupyter notebook to test this.
from sklearn.datasets import samples_generator
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, VotingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split
import numpy as np
import pandas as pd
X, y = samples_generator.make_classification(n_samples = 5000,
n_features = 20,
n_informative = 13,
n_redundant = 0,
n_classes = 2,
n_clusters_per_class = 2)
X, y = pd.DataFrame(X), pd.DataFrame(y)
rstate = 43
kf = StratifiedKFold(n_splits=10, shuffle=True)
dtc = DecisionTreeClassifier()
dtc_params = {}
dtc_params['max_depth']=[19]
dtc_params['max_features']=[15,18]
dtc_params['min_samples_leaf']=[6]
dtc_params['min_samples_split']=np.logspace(-3,0,10)
dtc_params['random_state']=[rstate]
dtcGS = GridSearchCV(dtc, dtc_params, cv=kf, scoring='accuracy', verbose=1)
dtcGS.fit(X, y)
print(dtcGS.best_score_)
abc = AdaBoostClassifier(dtcGS.best_estimator_, n_estimators=200)
x_train, x_test, y_train, y_test = train_test_split(X, np.array(y).reshape(5000), test_size=0.2, random_state=rstate)
abc.fit(x_train, y_train).score(x_test, y_test)
# 0.95099999999999996
bc = BaggingClassifier(dtcGS.best_estimator_, n_estimators=200, random_state=rstate)
bc.fit(x_train, y_train).score(x_test, y_test)
# 0.90600000000000003
vc = VotingClassifier(estimators=[('adaBoost',abc), ('bagging', bc)], voting='soft')
vc.fit(x_train, y_train).score(x_test, y_test)
# 0.94799999999999995
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment