Skip to content

Instantly share code, notes, and snippets.

@sh16ma
Last active January 19, 2022 10:13
Show Gist options
  • Save sh16ma/c8ca409a18aa05a49471700eaae28151 to your computer and use it in GitHub Desktop.
Save sh16ma/c8ca409a18aa05a49471700eaae28151 to your computer and use it in GitHub Desktop.
#🐍 #Python #評価指標 #二値分類 #混同行列 #confusion_matrix
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
# preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
# algorithm
from sklearn.svm import SVC # サポートベクトルマシン
from sklearn.linear_model import LogisticRegression # ロジスティック回帰
from sklearn.neighbors import KNeighborsClassifier # K近傍方
from sklearn.ensemble import RandomForestClassifier # ランダムフォレスト
from sklearn.ensemble import GradientBoostingClassifier # 勾配ブースティング
from sklearn.neural_network import MLPClassifier
# evaluation
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
'''
# Datasets
データセットは省略
'''
# (準備)訓練データをテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
# (準備)パイプラインを用いてデータのスケール変換と機械学習アルゴリズムによるモデル構築
pipe_svc = Pipeline([("scl", StandardScaler()), ("clf", SVC(random_state=1))])
pipe_knn = Pipeline([("scl", StandardScaler()), ("clf", KNeighborsClassifier(n_neighbors=10))])
pipe_logistic = Pipeline([("scl", StandardScaler()), ("clf", LogisticRegression())])
pipe_rf = Pipeline([("scl", StandardScaler()), ("clf", RandomForestClassifier(random_state=1))])
pipe_gb = Pipeline([("scl", StandardScaler()), ("clf", GradientBoostingClassifier(random_state=1))])
pipe_mlp = Pipeline([("scl", StandardScaler()), ("clf", MLPClassifier(hidden_layer_sizes=(5,2), max_iter=500, random_state=1))])
pipe_names = ["SVC", "KNeighbors", "LogisticRegression", "RandomForest", "GradientBoosting", "MLP"]
pipe_lines = [pipe_svc, pipe_knn, pipe_logistic, pipe_rf, pipe_gb, pipe_mlp]
# 機械学習アルゴリズム別に混同行列、正解率、適合率、再現率、F値を出力
for (i, pipe) in enumerate(pipe_lines):
# モデル学習
pipe.fit(X_train, y_train)
# モデル予測
y_pred = pipe.predict(X_test)
# 混同行列(実際のクラス、予測で得たクラスを比較)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
# 混同行列からヒートマップ描画
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for j in range(confmat.shape[0]):
for k in range(confmat.shape[1]):
ax.text(x=k, y=j, s=confmat[j, k], va="center", ha="center", fontsize=25)
plt.title(pipe_names[ i ], fontsize=20)
plt.xlabel("predicted label", fontsize=17)
plt.ylabel("true label", fontsize=17)
plt.show()
# 機械学習アルゴリズム毎に正解率を出力
print(f"{pipe_names[ i ]} accuracy : {accuracy_score(y_test, y_pred):.3f}")
print()
# 適合率、再現率、F値、サンプルの合計数を出力
print(classification_report(y_test, y_pred, target_names=["良性", "悪性"]))
print("_"*40)
print()
@sh16ma
Copy link
Author

sh16ma commented Mar 1, 2021

スクリーンショット 2021-03-01 15 11 06

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment