Created
May 8, 2018 17:04
Coursera Machine LearningをPythonで実装 - [Week3]ロジスティック回帰 (4)正則化ありロジスティック回帰、組み込み
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.linear_model import LogisticRegression | |
X_data = np.array([[0.051267,0.69956],[-0.092742,0.68494],[-0.21371,0.69225],[-0.375,0.50219],[-0.51325,0.46564],[-0.52477,0.2098],[-0.39804,0.034357],[-0.30588,-0.19225],[0.016705,-0.40424],[0.13191,-0.51389],[0.38537,-0.56506],[0.52938,-0.5212],[0.63882,-0.24342],[0.73675,-0.18494],[0.54666,0.48757],[0.322,0.5826],[0.16647,0.53874],[-0.046659,0.81652],[-0.17339,0.69956],[-0.47869,0.63377],[-0.60541,0.59722],[-0.62846,0.33406],[-0.59389,0.005117],[-0.42108,-0.27266],[-0.11578,-0.39693],[0.20104,-0.60161],[0.46601,-0.53582],[0.67339,-0.53582],[-0.13882,0.54605],[-0.29435,0.77997],[-0.26555,0.96272],[-0.16187,0.8019],[-0.17339,0.64839],[-0.28283,0.47295],[-0.36348,0.31213],[-0.30012,0.027047],[-0.23675,-0.21418],[-0.06394,-0.18494],[0.062788,-0.16301],[0.22984,-0.41155],[0.2932,-0.2288],[0.48329,-0.18494],[0.64459,-0.14108],[0.46025,0.012427],[0.6273,0.15863],[0.57546,0.26827],[0.72523,0.44371],[0.22408,0.52412],[0.44297,0.67032],[0.322,0.69225],[0.13767,0.57529],[-0.0063364,0.39985],[-0.092742,0.55336],[-0.20795,0.35599],[-0.20795,0.17325],[-0.43836,0.21711],[-0.21947,-0.016813],[-0.13882,-0.27266],[0.18376,0.93348],[0.22408,0.77997],[0.29896,0.61915],[0.50634,0.75804],[0.61578,0.7288],[0.60426,0.59722],[0.76555,0.50219],[0.92684,0.3633],[0.82316,0.27558],[0.96141,0.085526],[0.93836,0.012427],[0.86348,-0.082602],[0.89804,-0.20687],[0.85196,-0.36769],[0.82892,-0.5212],[0.79435,-0.55775],[0.59274,-0.7405],[0.51786,-0.5943],[0.46601,-0.41886],[0.35081,-0.57968],[0.28744,-0.76974],[0.085829,-0.75512],[0.14919,-0.57968],[-0.13306,-0.4481],[-0.40956,-0.41155],[-0.39228,-0.25804],[-0.74366,-0.25804],[-0.69758,0.041667],[-0.75518,0.2902],[-0.69758,0.68494],[-0.4038,0.70687],[-0.38076,0.91886],[-0.50749,0.90424],[-0.54781,0.70687],[0.10311,0.77997],[0.057028,0.91886],[-0.10426,0.99196],[-0.081221,1.1089],[0.28744,1.087],[0.39689,0.82383],[0.63882,0.88962],[0.82316,0.66301],[0.67339,0.64108],[1.0709,0.10015],[-0.046659,-0.57968],[-0.23675,-0.63816],[-0.15035,-0.36769],[-0.49021,-0.3019],[-0.46717,-0.13377],[-0.28859,-0.060673],[-0.61118,-0.067982],[-0.66302,-0.21418],[-0.59965,-0.41886],[-0.72638,-0.082602],[-0.83007,0.31213],[-0.72062,0.53874],[-0.59389,0.49488],[-0.48445,0.99927],[-0.0063364,0.99927],[0.63265,-0.030612]]) | |
y = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]) | |
## 以下はオーバーフィッティングを確かめるため用 | |
# オーバーフィッティングさせるためのパラメーター行列を作る | |
def map_feature(X1, X2): | |
degree = 6 | |
out = np.ones(len(X1)) | |
for i in range(1, degree+1): | |
for j in range(i+1): | |
out = np.c_[out, X1**(i-j) * X2**j] | |
return out | |
# 決定境界のプロット | |
def plot_decision_boundary(regr, X, y, title): | |
u = np.linspace(-1, 1.5, 50) | |
v = np.linspace(-1, 1.5, 50) | |
z = np.zeros((len(v), len(u))) | |
for i in range(len(u)): | |
for j in range(len(v)): | |
#グラフにするときは転置するように代入するのに注意 | |
z[j, i] = regr.predict_proba(map_feature(np.array([u[i]]), np.array([v[j]])))[0][1] | |
plt.plot(X_data[y==1, 0], X_data[y==1, 1], "b+", label="y = 1") | |
plt.plot(X_data[y==0, 0], X_data[y==0, 1], "yo", label="y = 0") | |
plt.contour(u, v, z, levels=[0.5],label="Decision boundary") | |
plt.xlabel("Microchip Test 1") | |
plt.ylabel("Microchip Test 2") | |
plt.legend() | |
plt.title(title) | |
plt.figure(figsize=(10,8)) | |
plt.subplots_adjust(wspace=0.2, hspace=0.3) | |
## ここからメイン | |
X = map_feature(X_data[:,0], X_data[:,1]) | |
CC = np.array([1e10, 100, 1, 0.01]) | |
for i, c in enumerate(CC): | |
regr = LogisticRegression(C = c, solver="newton-cg") | |
regr.fit(X, y) | |
accur = np.around(regr.score(X, y) * 100, 2) | |
#Cの値ごとに4分割プロット | |
plt.subplot(2, 2, i+1) | |
plot_decision_boundary(regr, X, y, f"built-in, λ-1={c}, accuracy={accur}") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment