koshian2/ex2_reg.py

## ex2_reg.py
import numpy as np
import matplotlib.pyplot as plt

from scipy.optimize import fmin_bfgs

X_data = np.array([[0.051267,0.69956],[-0.092742,0.68494],[-0.21371,0.69225],[-0.375,0.50219],[-0.51325,0.46564],[-0.52477,0.2098],[-0.39804,0.034357],[-0.30588,-0.19225],[0.016705,-0.40424],[0.13191,-0.51389],[0.38537,-0.56506],[0.52938,-0.5212],[0.63882,-0.24342],[0.73675,-0.18494],[0.54666,0.48757],[0.322,0.5826],[0.16647,0.53874],[-0.046659,0.81652],[-0.17339,0.69956],[-0.47869,0.63377],[-0.60541,0.59722],[-0.62846,0.33406],[-0.59389,0.005117],[-0.42108,-0.27266],[-0.11578,-0.39693],[0.20104,-0.60161],[0.46601,-0.53582],[0.67339,-0.53582],[-0.13882,0.54605],[-0.29435,0.77997],[-0.26555,0.96272],[-0.16187,0.8019],[-0.17339,0.64839],[-0.28283,0.47295],[-0.36348,0.31213],[-0.30012,0.027047],[-0.23675,-0.21418],[-0.06394,-0.18494],[0.062788,-0.16301],[0.22984,-0.41155],[0.2932,-0.2288],[0.48329,-0.18494],[0.64459,-0.14108],[0.46025,0.012427],[0.6273,0.15863],[0.57546,0.26827],[0.72523,0.44371],[0.22408,0.52412],[0.44297,0.67032],[0.322,0.69225],[0.13767,0.57529],[-0.0063364,0.39985],[-0.092742,0.55336],[-0.20795,0.35599],[-0.20795,0.17325],[-0.43836,0.21711],[-0.21947,-0.016813],[-0.13882,-0.27266],[0.18376,0.93348],[0.22408,0.77997],[0.29896,0.61915],[0.50634,0.75804],[0.61578,0.7288],[0.60426,0.59722],[0.76555,0.50219],[0.92684,0.3633],[0.82316,0.27558],[0.96141,0.085526],[0.93836,0.012427],[0.86348,-0.082602],[0.89804,-0.20687],[0.85196,-0.36769],[0.82892,-0.5212],[0.79435,-0.55775],[0.59274,-0.7405],[0.51786,-0.5943],[0.46601,-0.41886],[0.35081,-0.57968],[0.28744,-0.76974],[0.085829,-0.75512],[0.14919,-0.57968],[-0.13306,-0.4481],[-0.40956,-0.41155],[-0.39228,-0.25804],[-0.74366,-0.25804],[-0.69758,0.041667],[-0.75518,0.2902],[-0.69758,0.68494],[-0.4038,0.70687],[-0.38076,0.91886],[-0.50749,0.90424],[-0.54781,0.70687],[0.10311,0.77997],[0.057028,0.91886],[-0.10426,0.99196],[-0.081221,1.1089],[0.28744,1.087],[0.39689,0.82383],[0.63882,0.88962],[0.82316,0.66301],[0.67339,0.64108],[1.0709,0.10015],[-0.046659,-0.57968],[-0.23675,-0.63816],[-0.15035,-0.36769],[-0.49021,-0.3019],[-0.46717,-0.13377],[-0.28859,-0.060673],[-0.61118,-0.067982],[-0.66302,-0.21418],[-0.59965,-0.41886],[-0.72638,-0.082602],[-0.83007,0.31213],[-0.72062,0.53874],[-0.59389,0.49488],[-0.48445,0.99927],[-0.0063364,0.99927],[0.63265,-0.030612]])
y = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])

# データのプロット
plt.plot(X_data[y==1, 0], X_data[y==1, 1], "b+", label="y = 1")
plt.plot(X_data[y==0, 0], X_data[y==0, 1], "yo", label="y = 0")
plt.xlabel("Microchip Test 1")
plt.ylabel("Microchip Test 2")
plt.legend()
plt.show()

# オーバーフィッティングさせるためのパラメーター行列を作る
def map_feature(X1, X2):
    degree = 6
    out = np.ones(len(X1))
    for i in range(1, degree+1):
        for j in range(i+1):
            out = np.c_[out, X1**(i-j) * X2**j]
    return out

X = map_feature(X_data[:,0], X_data[:,1])
initial_theta = np.zeros(X.shape[1])
lambda_ = 1


# 正規化ありのコスト関数
def cost_function_reg(theta, X, y, lambda_):
    h_theta = 1 / (1 + np.exp(np.dot(-X, theta)))
    J = np.sum(-y * np.log(h_theta) - (1 - y) * np.log(1 - h_theta)) / len(y)
    J += lambda_ / 2 / len(y) * np.sum(theta[1:] ** 2) #θ0を正則化しない

    grad = np.zeros(len(theta))
    #θ0を正則化しない
    grad[0] = np.sum((h_theta - y) * X[:, 0]) / len(y)
    for j in range(1, len(theta)):
        grad[j] = np.sum((h_theta - y) * X[:, j]) / len(y) + lambda_ / len(y) * theta[j]
    return J, grad


# 正規化ありの初期コストと勾配
cost, grad = cost_function_reg(initial_theta, X, y, lambda_)
print("Cost at initial theta (zeros):", cost)
print("Expected cost (approx): 0.693")
print("Gradient at initial theta (zeros) - first five values only:")
print(grad[:5])
print("Expected gradients (approx) - first five values only:")
print(" 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n")

# λ＝1、θが全部1の場合のコストと勾配
test_theta = np.ones(X.shape[1])
cost, grad = cost_function_reg(test_theta, X, y, 10)
print("Cost at test theta (with lambda = 10):", cost)
print("Expected cost (approx): 3.16")
print("Gradient at test theta - first five values only:")
print(grad[:5])
print("Expected gradients (approx) - first five values only:")
print(" 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n")

# 最適化
# ここのλのパラメーターを変える
lambda_ = 1
cost_wrap = lambda theta, X, y, lambda_ : cost_function_reg(theta, X, y, lambda_)[0]
# 詳細を見たければdisp=Trueにする
result = fmin_bfgs(cost_wrap, initial_theta, args=(X, y, lambda_, ), full_output=True, disp=False)
theta, cost = result[0], result[1]

# 決定境界のプロット
def plot_decision_boundary(theta, X, y, title):
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((len(v), len(u)))
    for i in range(len(u)):
        for j in range(len(v)):
            #グラフにするときは転置するように代入するのに注意
            z[j, i] = np.dot(map_feature(np.array([u[i]]), np.array([v[j]])), theta)

    plt.plot(X_data[y==1, 0], X_data[y==1, 1], "b+", label="y = 1")
    plt.plot(X_data[y==0, 0], X_data[y==0, 1], "yo", label="y = 0")
    plt.contour(u, v, z, levels=[0],label="Decision boundary")
    plt.xlabel("Microchip Test 1")
    plt.ylabel("Microchip Test 2")
    plt.legend()
    plt.title(title)

# シグモイド関数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# 予測用関数
def predict(theta, X):
    prob = sigmoid(np.dot(X, theta))
    return prob >= 0.5

p = predict(theta, X)
# 精度
accur = np.around(np.mean(p == y)*100, 2)
print("Train Accuracy:", accur)
print("Expected accuracy (with lambda = 1, Octave): 83.1 (approx)")

plot_decision_boundary(theta, X, y, f"algo=bfgs, lambda={lambda_}, accuracy={accur}")
plt.show()
	import numpy as np
	import matplotlib.pyplot as plt

	from scipy.optimize import fmin_bfgs

	X_data = np.array([[0.051267,0.69956],[-0.092742,0.68494],[-0.21371,0.69225],[-0.375,0.50219],[-0.51325,0.46564],[-0.52477,0.2098],[-0.39804,0.034357],[-0.30588,-0.19225],[0.016705,-0.40424],[0.13191,-0.51389],[0.38537,-0.56506],[0.52938,-0.5212],[0.63882,-0.24342],[0.73675,-0.18494],[0.54666,0.48757],[0.322,0.5826],[0.16647,0.53874],[-0.046659,0.81652],[-0.17339,0.69956],[-0.47869,0.63377],[-0.60541,0.59722],[-0.62846,0.33406],[-0.59389,0.005117],[-0.42108,-0.27266],[-0.11578,-0.39693],[0.20104,-0.60161],[0.46601,-0.53582],[0.67339,-0.53582],[-0.13882,0.54605],[-0.29435,0.77997],[-0.26555,0.96272],[-0.16187,0.8019],[-0.17339,0.64839],[-0.28283,0.47295],[-0.36348,0.31213],[-0.30012,0.027047],[-0.23675,-0.21418],[-0.06394,-0.18494],[0.062788,-0.16301],[0.22984,-0.41155],[0.2932,-0.2288],[0.48329,-0.18494],[0.64459,-0.14108],[0.46025,0.012427],[0.6273,0.15863],[0.57546,0.26827],[0.72523,0.44371],[0.22408,0.52412],[0.44297,0.67032],[0.322,0.69225],[0.13767,0.57529],[-0.0063364,0.39985],[-0.092742,0.55336],[-0.20795,0.35599],[-0.20795,0.17325],[-0.43836,0.21711],[-0.21947,-0.016813],[-0.13882,-0.27266],[0.18376,0.93348],[0.22408,0.77997],[0.29896,0.61915],[0.50634,0.75804],[0.61578,0.7288],[0.60426,0.59722],[0.76555,0.50219],[0.92684,0.3633],[0.82316,0.27558],[0.96141,0.085526],[0.93836,0.012427],[0.86348,-0.082602],[0.89804,-0.20687],[0.85196,-0.36769],[0.82892,-0.5212],[0.79435,-0.55775],[0.59274,-0.7405],[0.51786,-0.5943],[0.46601,-0.41886],[0.35081,-0.57968],[0.28744,-0.76974],[0.085829,-0.75512],[0.14919,-0.57968],[-0.13306,-0.4481],[-0.40956,-0.41155],[-0.39228,-0.25804],[-0.74366,-0.25804],[-0.69758,0.041667],[-0.75518,0.2902],[-0.69758,0.68494],[-0.4038,0.70687],[-0.38076,0.91886],[-0.50749,0.90424],[-0.54781,0.70687],[0.10311,0.77997],[0.057028,0.91886],[-0.10426,0.99196],[-0.081221,1.1089],[0.28744,1.087],[0.39689,0.82383],[0.63882,0.88962],[0.82316,0.66301],[0.67339,0.64108],[1.0709,0.10015],[-0.046659,-0.57968],[-0.23675,-0.63816],[-0.15035,-0.36769],[-0.49021,-0.3019],[-0.46717,-0.13377],[-0.28859,-0.060673],[-0.61118,-0.067982],[-0.66302,-0.21418],[-0.59965,-0.41886],[-0.72638,-0.082602],[-0.83007,0.31213],[-0.72062,0.53874],[-0.59389,0.49488],[-0.48445,0.99927],[-0.0063364,0.99927],[0.63265,-0.030612]])
	y = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])

	# データのプロット
	plt.plot(X_data[y==1, 0], X_data[y==1, 1], "b+", label="y = 1")
	plt.plot(X_data[y==0, 0], X_data[y==0, 1], "yo", label="y = 0")
	plt.xlabel("Microchip Test 1")
	plt.ylabel("Microchip Test 2")
	plt.legend()
	plt.show()

	# オーバーフィッティングさせるためのパラメーター行列を作る
	def map_feature(X1, X2):
	degree = 6
	out = np.ones(len(X1))
	for i in range(1, degree+1):
	for j in range(i+1):
	out = np.c_[out, X1*(i-j) X2**j]
	return out

	X = map_feature(X_data[:,0], X_data[:,1])
	initial_theta = np.zeros(X.shape[1])
	lambda_ = 1


	# 正規化ありのコスト関数
	def cost_function_reg(theta, X, y, lambda_):
	h_theta = 1 / (1 + np.exp(np.dot(-X, theta)))
	J = np.sum(-y * np.log(h_theta) - (1 - y) * np.log(1 - h_theta)) / len(y)
	J += lambda_ / 2 / len(y) * np.sum(theta[1:] ** 2) #θ0を正則化しない

	grad = np.zeros(len(theta))
	#θ0を正則化しない
	grad[0] = np.sum((h_theta - y) * X[:, 0]) / len(y)
	for j in range(1, len(theta)):
	grad[j] = np.sum((h_theta - y) * X[:, j]) / len(y) + lambda_ / len(y) * theta[j]
	return J, grad


	# 正規化ありの初期コストと勾配
	cost, grad = cost_function_reg(initial_theta, X, y, lambda_)
	print("Cost at initial theta (zeros):", cost)
	print("Expected cost (approx): 0.693")
	print("Gradient at initial theta (zeros) - first five values only:")
	print(grad[:5])
	print("Expected gradients (approx) - first five values only:")
	print(" 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n")

	# λ＝1、θが全部1の場合のコストと勾配
	test_theta = np.ones(X.shape[1])
	cost, grad = cost_function_reg(test_theta, X, y, 10)
	print("Cost at test theta (with lambda = 10):", cost)
	print("Expected cost (approx): 3.16")
	print("Gradient at test theta - first five values only:")
	print(grad[:5])
	print("Expected gradients (approx) - first five values only:")
	print(" 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n")

	# 最適化
	# ここのλのパラメーターを変える
	lambda_ = 1
	cost_wrap = lambda theta, X, y, lambda_ : cost_function_reg(theta, X, y, lambda_)[0]
	# 詳細を見たければdisp=Trueにする
	result = fmin_bfgs(cost_wrap, initial_theta, args=(X, y, lambda_, ), full_output=True, disp=False)
	theta, cost = result[0], result[1]

	# 決定境界のプロット
	def plot_decision_boundary(theta, X, y, title):
	u = np.linspace(-1, 1.5, 50)
	v = np.linspace(-1, 1.5, 50)
	z = np.zeros((len(v), len(u)))
	for i in range(len(u)):
	for j in range(len(v)):
	#グラフにするときは転置するように代入するのに注意
	z[j, i] = np.dot(map_feature(np.array([u[i]]), np.array([v[j]])), theta)

	plt.plot(X_data[y==1, 0], X_data[y==1, 1], "b+", label="y = 1")
	plt.plot(X_data[y==0, 0], X_data[y==0, 1], "yo", label="y = 0")
	plt.contour(u, v, z, levels=[0],label="Decision boundary")
	plt.xlabel("Microchip Test 1")
	plt.ylabel("Microchip Test 2")
	plt.legend()
	plt.title(title)

	# シグモイド関数
	def sigmoid(z):
	return 1 / (1 + np.exp(-z))

	# 予測用関数
	def predict(theta, X):
	prob = sigmoid(np.dot(X, theta))
	return prob >= 0.5

	p = predict(theta, X)
	# 精度
	accur = np.around(np.mean(p == y)*100, 2)
	print("Train Accuracy:", accur)
	print("Expected accuracy (with lambda = 1, Octave): 83.1 (approx)")

	plot_decision_boundary(theta, X, y, f"algo=bfgs, lambda={lambda_}, accuracy={accur}")
	plt.show()