Created
November 16, 2016 14:43
-
-
Save TomHortons/dc11b7e2c86619c1123a04358abb94d7 to your computer and use it in GitHub Desktop.
アンサンブル学習(Stacked generalization)のサンプルプログラムと実行例 ref: http://qiita.com/TomHortons/items/2a05b72be180eb83a204
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ans = np.loadtxt('./3GB_0.338917307945_16482164617e7c9d188bc75bafc06a08_test.txt') | |
print "Acc: ", accuracy_score(test[:, -1], ans) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array([[-0.96155185, -0.49879683, 0.65487916, 1. ], | |
[-0.95225926, -1.00853786, -0.97598077, 0. ], | |
[-0.11578056, 2.51579129, -1.23724233, 0. ], | |
..., | |
[-0.93715662, 0.41894292, -1.56002152, 0. ], | |
[-0.69759832, -0.20810317, -0.01283087, 0. ], | |
[ 0.31519506, -1.75498218, 0.89115054, 1. ]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2GB_0.37311622448_16482164617e7c9d188bc75bafc06a08_test.npy | |
2GB_0.37311622448_16482164617e7c9d188bc75bafc06a08_train.npy | |
2Ne_0.784523345103_cddd24af66706c9fa26f6601910c92c5_test.npy | |
2Ne_0.784523345103_cddd24af66706c9fa26f6601910c92c5_train.npy | |
2an_0.421335902473_825e1ad5956801c2225da656822caebb_test.npy | |
2an_0.421335902473_825e1ad5956801c2225da656822caebb_train.npy | |
2au_1.9348828025_4b57dac04bbc037494cb592143a1c09c_test.npy | |
2au_1.9348828025_4b57dac04bbc037494cb592143a1c09c_train.npy | |
2ra_0.292331269114_a0cb35c894f0ad378f6bb824e1019748_test.npy | |
2ra_0.292331269114_a0cb35c894f0ad378f6bb824e1019748_train.npy | |
2xt_0.451990280749_e130a295809821efc1db2f64c228169c_test.npy | |
2xt_0.451990280749_e130a295809821efc1db2f64c228169c_train.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acc: 0.90 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acc: 0.90 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1GB_0.303855837305_16482164617e7c9d188bc75bafc06a08_test.npy | |
1GB_0.303855837305_16482164617e7c9d188bc75bafc06a08_train.npy | |
1Ne_0.455167671362_cddd24af66706c9fa26f6601910c92c5_test.npy | |
1Ne_0.455167671362_cddd24af66706c9fa26f6601910c92c5_train.npy | |
1an_0.249015612417_825e1ad5956801c2225da656822caebb_test.npy | |
1an_0.249015612417_825e1ad5956801c2225da656822caebb_train.npy | |
1au_0.22545173232_4b57dac04bbc037494cb592143a1c09c_test.npy | |
1au_0.22545173232_4b57dac04bbc037494cb592143a1c09c_train.npy | |
1ra_0.207753858339_a0cb35c894f0ad378f6bb824e1019748_test.npy | |
1ra_0.207753858339_a0cb35c894f0ad378f6bb824e1019748_train.npy | |
1xt_0.270981174382_e130a295809821efc1db2f64c228169c_test.npy | |
1xt_0.270981174382_e130a295809821efc1db2f64c228169c_train.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array([[ 1.07884407e-04, 9.99892116e-01, 0.00000000e+00, ..., | |
9.93333333e-01, 2.50875433e-04, 9.99749125e-01], | |
[ 9.96784627e-01, 3.21540073e-03, 9.76666667e-01, ..., | |
2.00000000e-02, 9.53099981e-01, 4.69000190e-02], | |
[ 5.11407852e-05, 9.99948859e-01, 5.33333333e-02, ..., | |
9.06666667e-01, 1.66652470e-06, 9.99998333e-01], | |
..., | |
[ 4.93575096e-01, 5.06424904e-01, 6.30000000e-01, ..., | |
4.03333333e-01, 9.49199952e-01, 5.08000478e-02], | |
[ 3.96782160e-03, 9.96032178e-01, 2.66666667e-02, ..., | |
9.46666667e-01, 2.46422552e-06, 9.99997536e-01], | |
[ 9.99466836e-01, 5.33185899e-04, 9.03333333e-01, ..., | |
8.00000000e-02, 9.54109081e-01, 4.58909185e-02]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import accuracy_score | |
nbrs = KNeighborsClassifier().fit(train[:, :-1], train[:, -1]) | |
print "Acc: ", accuracy_score(test[:, -1], nbrs.predict(test[:, :-1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import make_classification | |
import pandas as pd | |
import numpy as np | |
n_features = 3 | |
n_samples = 2000 | |
data = np.c_[make_classification(n_samples=n_samples, n_features=n_features, n_redundant=1, n_informative=10,n_clusters_per_class=2, n_classes=2, flip_y=0)] | |
train = test = np.empty((0,n_features+1), float) | |
for d in [data[data[:, n_features]==0], data[data[:, n_features]==1]]: | |
np.random.shuffle(d) | |
train = np.append(train, d[:(n_samples/4)], axis=0) | |
test = np.append(test, d[(n_samples/4):], axis=0) | |
map(lambda x: np.random.shuffle(x), [train, test]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys,os | |
def read_npy(tr_p, te_p): | |
train_file_names = map(lambda x: tr_p + x, os.listdir(tr_p)) | |
test_file_names = map(lambda x: te_p + x, os.listdir(te_p)) | |
list_train, list_test = [], [] | |
for path_train, path_test in zip(train_file_names, test_file_names): | |
frame_train, frame_test = np.load(path_train), np.load(path_test) | |
list_train.append(frame_train) | |
list_test.append(frame_test) | |
l_train, l_test = list_train[0], list_test[0] | |
for train_, test_ in zip(list_train[1:], list_test[1:]): | |
l_train = np.concatenate([l_train, train_], axis=1) | |
l_test = np.concatenate([l_test, test_], axis=1) | |
return l_train, l_test | |
first_train, first_test = read_npy('./first/train/', './first/test/') | |
print first_train |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.naive_bayes import GaussianNB | |
from xgboost import XGBClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
clfs = [RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), | |
ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), | |
GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=50), | |
KNeighborsClassifier(n_neighbors=10, n_jobs=-1), | |
GaussianNB(), | |
XGBClassifier(learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1, | |
gamma=0, subsample=0.8, colsample_bytree=0.5, objective= 'binary:logistic', | |
scale_pos_weight=1, seed=0 | |
) | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import blend_proba as bp | |
[bp.blend_proba(clf, X_train=train[:, :-1], y=train[:, -1], X_test=test[:, :-1], save_preds="1", nfolds=3) for clf in clfs] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[bp.blend_proba(clf, X_train=first_train, y=train[:, -1], X_test=first_test, save_preds="2", nfolds=3) for clf in clfs] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
second_train, second_test = read_data('./second/train/', './second/test/') | |
clf = XGBClassifier(learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1, | |
gamma=0, subsample=0.8, colsample_bytree=0.5, objective= 'binary:logistic', | |
scale_pos_weight=1, seed=0 | |
) | |
bp.blend_proba(clf, X_train=second_train, y=second_train[:, -1], X_test=second_test, save_test_only="3", nfolds=3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment