Skip to content

Instantly share code, notes, and snippets.

@TomHortons
Created November 16, 2016 14:43
Show Gist options
  • Save TomHortons/dc11b7e2c86619c1123a04358abb94d7 to your computer and use it in GitHub Desktop.
Save TomHortons/dc11b7e2c86619c1123a04358abb94d7 to your computer and use it in GitHub Desktop.
アンサンブル学習(Stacked generalization)のサンプルプログラムと実行例 ref: http://qiita.com/TomHortons/items/2a05b72be180eb83a204
ans = np.loadtxt('./3GB_0.338917307945_16482164617e7c9d188bc75bafc06a08_test.txt')
print "Acc: ", accuracy_score(test[:, -1], ans)
array([[-0.96155185, -0.49879683, 0.65487916, 1. ],
[-0.95225926, -1.00853786, -0.97598077, 0. ],
[-0.11578056, 2.51579129, -1.23724233, 0. ],
...,
[-0.93715662, 0.41894292, -1.56002152, 0. ],
[-0.69759832, -0.20810317, -0.01283087, 0. ],
[ 0.31519506, -1.75498218, 0.89115054, 1. ]])
2GB_0.37311622448_16482164617e7c9d188bc75bafc06a08_test.npy
2GB_0.37311622448_16482164617e7c9d188bc75bafc06a08_train.npy
2Ne_0.784523345103_cddd24af66706c9fa26f6601910c92c5_test.npy
2Ne_0.784523345103_cddd24af66706c9fa26f6601910c92c5_train.npy
2an_0.421335902473_825e1ad5956801c2225da656822caebb_test.npy
2an_0.421335902473_825e1ad5956801c2225da656822caebb_train.npy
2au_1.9348828025_4b57dac04bbc037494cb592143a1c09c_test.npy
2au_1.9348828025_4b57dac04bbc037494cb592143a1c09c_train.npy
2ra_0.292331269114_a0cb35c894f0ad378f6bb824e1019748_test.npy
2ra_0.292331269114_a0cb35c894f0ad378f6bb824e1019748_train.npy
2xt_0.451990280749_e130a295809821efc1db2f64c228169c_test.npy
2xt_0.451990280749_e130a295809821efc1db2f64c228169c_train.npy
1GB_0.303855837305_16482164617e7c9d188bc75bafc06a08_test.npy
1GB_0.303855837305_16482164617e7c9d188bc75bafc06a08_train.npy
1Ne_0.455167671362_cddd24af66706c9fa26f6601910c92c5_test.npy
1Ne_0.455167671362_cddd24af66706c9fa26f6601910c92c5_train.npy
1an_0.249015612417_825e1ad5956801c2225da656822caebb_test.npy
1an_0.249015612417_825e1ad5956801c2225da656822caebb_train.npy
1au_0.22545173232_4b57dac04bbc037494cb592143a1c09c_test.npy
1au_0.22545173232_4b57dac04bbc037494cb592143a1c09c_train.npy
1ra_0.207753858339_a0cb35c894f0ad378f6bb824e1019748_test.npy
1ra_0.207753858339_a0cb35c894f0ad378f6bb824e1019748_train.npy
1xt_0.270981174382_e130a295809821efc1db2f64c228169c_test.npy
1xt_0.270981174382_e130a295809821efc1db2f64c228169c_train.npy
array([[ 1.07884407e-04, 9.99892116e-01, 0.00000000e+00, ...,
9.93333333e-01, 2.50875433e-04, 9.99749125e-01],
[ 9.96784627e-01, 3.21540073e-03, 9.76666667e-01, ...,
2.00000000e-02, 9.53099981e-01, 4.69000190e-02],
[ 5.11407852e-05, 9.99948859e-01, 5.33333333e-02, ...,
9.06666667e-01, 1.66652470e-06, 9.99998333e-01],
...,
[ 4.93575096e-01, 5.06424904e-01, 6.30000000e-01, ...,
4.03333333e-01, 9.49199952e-01, 5.08000478e-02],
[ 3.96782160e-03, 9.96032178e-01, 2.66666667e-02, ...,
9.46666667e-01, 2.46422552e-06, 9.99997536e-01],
[ 9.99466836e-01, 5.33185899e-04, 9.03333333e-01, ...,
8.00000000e-02, 9.54109081e-01, 4.58909185e-02]])
from sklearn.metrics import accuracy_score
nbrs = KNeighborsClassifier().fit(train[:, :-1], train[:, -1])
print "Acc: ", accuracy_score(test[:, -1], nbrs.predict(test[:, :-1]))
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np
n_features = 3
n_samples = 2000
data = np.c_[make_classification(n_samples=n_samples, n_features=n_features, n_redundant=1, n_informative=10,n_clusters_per_class=2, n_classes=2, flip_y=0)]
train = test = np.empty((0,n_features+1), float)
for d in [data[data[:, n_features]==0], data[data[:, n_features]==1]]:
np.random.shuffle(d)
train = np.append(train, d[:(n_samples/4)], axis=0)
test = np.append(test, d[(n_samples/4):], axis=0)
map(lambda x: np.random.shuffle(x), [train, test])
import sys,os
def read_npy(tr_p, te_p):
train_file_names = map(lambda x: tr_p + x, os.listdir(tr_p))
test_file_names = map(lambda x: te_p + x, os.listdir(te_p))
list_train, list_test = [], []
for path_train, path_test in zip(train_file_names, test_file_names):
frame_train, frame_test = np.load(path_train), np.load(path_test)
list_train.append(frame_train)
list_test.append(frame_test)
l_train, l_test = list_train[0], list_test[0]
for train_, test_ in zip(list_train[1:], list_test[1:]):
l_train = np.concatenate([l_train, train_], axis=1)
l_test = np.concatenate([l_test, test_], axis=1)
return l_train, l_test
first_train, first_test = read_npy('./first/train/', './first/test/')
print first_train
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
clfs = [RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=50),
KNeighborsClassifier(n_neighbors=10, n_jobs=-1),
GaussianNB(),
XGBClassifier(learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1,
gamma=0, subsample=0.8, colsample_bytree=0.5, objective= 'binary:logistic',
scale_pos_weight=1, seed=0
)
]
import blend_proba as bp
[bp.blend_proba(clf, X_train=train[:, :-1], y=train[:, -1], X_test=test[:, :-1], save_preds="1", nfolds=3) for clf in clfs]
[bp.blend_proba(clf, X_train=first_train, y=train[:, -1], X_test=first_test, save_preds="2", nfolds=3) for clf in clfs]
second_train, second_test = read_data('./second/train/', './second/test/')
clf = XGBClassifier(learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1,
gamma=0, subsample=0.8, colsample_bytree=0.5, objective= 'binary:logistic',
scale_pos_weight=1, seed=0
)
bp.blend_proba(clf, X_train=second_train, y=second_train[:, -1], X_test=second_test, save_test_only="3", nfolds=3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment