Skip to content

Instantly share code, notes, and snippets.

@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:05
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
#dataset
cancer=load_breast_cancer()
cancer_data =cancer.data
cancer_target =cancer.target
#classifiers
lr = LogisticRegression()
dt = DecisionTreeClassifier()
svm= SVC(probability=True)
knn= KNeighborsClassifier()
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:16
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
kf = KFold(n_splits=3)
bagging = BaggingClassifier(SVC(), n_estimators=500,
max_samples=0.8, bootstrap=True, n_jobs=-1)
bagging_result = cross_val_score(bagging,cancer_data,cancer_target,cv=kf,n_jobs=-1)
print("bagging results",bagging_result)
print("average of bagging:",np.mean(bagging_result))
print("********************************************")
pasting = BaggingClassifier(SVC(), n_estimators=500,
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:26
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
#dataset
carset=pd.read_csv('CarPrice_Assignment.csv')
carset=carset.drop(['car_ID','symboling','CarName','doornumber','carbody','enginelocation'],axis=1)
car_target=carset['price']
car_data=carset.iloc[:,0:19]
#data preprocessing
kf = KFold(n_splits=4)
bins = KBinsDiscretizer(n_bins=5, encode='onehot-dense', strategy='uniform')
numeric_cols=car_data.select_dtypes(include=np.number).columns
print("numeric_cols",numeric_cols)
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:31
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
bagging_oob = BaggingClassifier(DecisionTreeClassifier(), n_estimators=250,
bootstrap=True, n_jobs=-1, oob_score=True)
oob_result = cross_val_score(bagging_oob,cancer_data,cancer_target,cv=kf,n_jobs=-1)
print("bagging oob results:",oob_result)
print("average of oob:",np.mean(oob_result))
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:35
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
x_train,x_test,y_train,y_test = train_test_split(cancer_data,cancer_target,
test_size=0.2,random_state=2021)
randomsub_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=250,
max_features=0.6,bootstrap_features=True, bootstrap=False, n_jobs=-1)
randomsub_clf.fit(x_train, y_train)
randomsub_score = randomsub_clf.score(x_test,y_test)
print("randomsub",randomsub_score)
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:36
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
patch_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=250,
max_samples=0.5, bootstrap=False, bootstrap_features=True, n_jobs=-1)
patch_clf.fit(x_train, y_train)
patch_score = patch_clf.score(x_test,y_test)
print("patch",patch_score)
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:40
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
kf = KFold(n_splits=3)
random_forest = RandomForestClassifier(n_estimators=250, max_depth=7, n_jobs=-1)
rf_results = cross_val_score(random_forest,cancer_data,cancer_target,cv=kf,n_jobs=-1)
print("random forest results:",rf_results)
print("average of rf:",np.mean(rf_results))
print("********************************************")
bagging_rf = BaggingClassifier(random_forest,n_estimators=250,max_samples=0.8,bootstrap=True,n_jobs=-1)
bagging_rf_result = cross_val_score(bagging_rf,cancer_data,cancer_target,cv=kf,n_jobs=-1)
@theibrr
theibrr / wisdom_of_the_crowd.py
Created August 13, 2021 14:42
Wisdom of the Crowd -Voting Classifier, Bagging-Pasting, Random Forest and Extra Trees-
kf = KFold(n_splits=3)
extra_tree =ExtraTreesClassifier(n_estimators=250,max_depth=7, bootstrap=True,n_jobs=-1)
extra_tree_result = cross_val_score(extra_tree,cancer_data,cancer_target,cv=kf,n_jobs=-1)
print(" extra tree results:",extra_tree_result)
print("average of extra tree:",np.mean(extra_tree_result))
@theibrr
theibrr / boosting.py
Created August 16, 2021 13:01
An Overview of Boosting Methods: CatBoost, XGBoost, AdaBoost, LightBoost, Histogram-Based Gradient Boost-1
import numpy as np
from time import time
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.model_selection import KFold
x,y = make_classification(n_samples=100000,n_features=30, n_informative=10,
n_redundant=5,random_state=2021)
@theibrr
theibrr / boosting.py
Created August 16, 2021 13:06
An Overview of Boosting Methods: CatBoost, XGBoost, AdaBoost, LightBoost, Histogram-Based Gradient Boost-ada
from sklearn.ensemble import AdaBoostClassifier
start_ada = time()
ada = AdaBoostClassifier()
kf=KFold(n_splits=5,shuffle=True,random_state=2021)
ada_score=cross_val_score(ada,x,y,cv=kf,n_jobs=-1)
print("ada", np.round(time()-start_ada,5),"sec")
print("acc", np.mean(ada_score).round(3))
print("***************************")