darden1/StudyMachineLearning_Perceptron.py

## StudyMachineLearning_Perceptron.py
# -*- coding: utf-8 -*-
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

def main():
    # ---アヤメデータの取得
    df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header=None)
    y = df.iloc[0:100, 4].values
    y = np.where(y == "Iris-setosa", -1, 1) #教師データとなるアヤメの品種に-1と1をラベリング
    X = df.iloc[0:100, [0, 2]].values

    # ---学習実施
    eta = 0.01 #学習率
    numEpoch = 10 #最大エポック数
    ppn = Perceptron(eta, numEpoch)
    ppn.fit(X, y)

    #---プロットのプロパティ
    markers = ('s', 'o', 'x', '^', 'v')
    colors = ('green', 'yellow','red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    labels = ('setosa', 'versicolor')

    #---アヤメデータのプロット
    plt.clf()
    plt.scatter(x=X[y == -1, 0], y=X[y == -1, 1],alpha=1.0, c=cmap(0),marker=markers[0], label=labels[0])
    plt.scatter(x=X[y ==  1, 0], y=X[y ==  1, 1],alpha=1.0, c=cmap(1),marker=markers[1], label=labels[1])
    plt.title("Sepal length and Petal length")
    plt.xlabel("Sepal length [cm]")#がく片の長さ
    plt.ylabel("Petal length [cm]")#花びらの長さ
    plt.legend(loc="upper left")
    plt.grid()
    plt.savefig(u"アヤメデータ.png", dpi=100)
    #---plt.show()

    #---学習回数と重みのプロット
    trainTimes=range(0,len(ppn.W0_))
    ppn.W_ = np.array(ppn.W_)
    plt.clf()
    plt.plot(trainTimes, ppn.W0_, color="blue", label="w0")
    plt.plot(trainTimes, ppn.W_[:,0], color="red", label="w1")
    plt.plot(trainTimes, ppn.W_[:,1], color="green", label="w2")
    plt.title("Train times and weight")
    plt.xlabel('Train times')
    plt.ylabel('Weight')
    plt.legend(loc="upper left")
    plt.xlim(0, 1000)
    plt.xticks(range(0,1100,100))
    plt.grid()
    plt.savefig(u"学習回数と重みの推移.png", dpi=100)
    #---plt.show()

    #---エポック数と正答率のプロット
    epochTimes=range(1,numEpoch+1)
    plt.clf()
    plt.plot(epochTimes, np.array(ppn.correctAnswerRateEachEpoch_)*100, color="blue", marker="o", label="Correct answer rate")
    plt.title("Epochs and correct answer rate")
    plt.xlabel('Epochs')
    plt.ylabel('Correct answer rate[%]')
    #---plt.legend(loc="upper left")
    plt.grid()
    plt.xlim(0, 10)
    plt.xticks(range(0, 11, 1))
    plt.savefig(u"エポック数と正答率の推移.png", dpi=100)
    #---plt.show()

    # ---決定領域のプロット
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    dx = 0.02
    X1 = np.arange(x1_min, x1_max, dx)
    X2 = np.arange(x2_min, x2_max, dx)
    X1, X2 = np.meshgrid(X1, X2)
    Z = ppn.predict(np.array([X1.ravel(), X2.ravel()]).T)
    Z = Z.reshape(X1.shape)

    #---決定領域
    plt.clf()
    plt.contourf(X1, X2, Z, alpha=0.5, cmap=cmap)
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    # ---アヤメデータ
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=1.0, c=cmap(idx),
                    marker=markers[idx], label=labels[idx])
    plt.title("Decision regions")
    plt.xlabel("Sepal length [cm]")  # がく片の長さ
    plt.ylabel("Petal length [cm]")  # 花びらの長さ
    plt.legend(loc="upper left")
    plt.grid()
    plt.savefig(u"決定領域.png", dpi=100)
    #---plt.show()

class Perceptron(object):

    def __init__(self, eta=0.01, numEpoch=10):
        self.eta = eta #学習率
        self.numEpoch = numEpoch #最大エポック数

        self.W0_=[] #学習回数ごとにしきい値を保存するリスト
        self.W_=[] #学習回数ごとに重みを保存するリスト
        self.correctAnswerRateEachEpoch_=[] #各エポックごとの正答率を保存するリスト

    def __actFunc(self, z):
        """活性化関数(__でプライベート関数)"""
        return np.where(np.array(z) >= 0.0, 1, -1)

    def predict(self, X):
        """予測関数"""
        return self.__actFunc(np.dot(X,self.w_) + self.w0_)

    def fit(self, X, y):
        """学習の実施"""
        numTrainSample = np.shape(X)[0] #トレーニングサンプル数
        numFeature = np.shape(X)[1] #特徴量の個数

        self.w0_ = 0 # しきい値(初期値。取りあえず0)
        self.w_ = np.zeros(numFeature) #重み(初期値。取りあえず全部0)

        # しきい値と重みの初期値を保存
        self.W0_.append(self.w0_)
        self.W_.append(self.w_.tolist())

        for indexEpoch in range(1,self.numEpoch+1): #エポックループ
            correctAnswer=[] #各トレーニングセットに対する教師データと予測値の正誤表。エポックループの最初にイニシャライズ。
            for xi,yi in zip(X,y): #トレーニングセットループ

                #トレーニングセット内の各特徴量と重みをかけて足し合わせ、しきい値をたす
                zi=np.dot(xi,self.w_) + self.w0_

                #活性化関数からパーセプトロンの出力を算出
                yiHat=self.__actFunc(zi)

                #教師データと予測値の正誤表追加
                correctAnswer.append(int(yi==yiHat))

                #しきい値と重みを更新
                dw0=self.eta*(yi-yiHat)
                dw=self.eta*(yi-yiHat)*xi
                self.w0_ =self.w0_ + dw0
                self.w_ = self.w_ + dw

                # しきい値を学習回数ごとに保存
                self.W_.append(self.w_.tolist())
                self.W0_.append(self.w0_)

            #各エポックごとに正誤表から正答率を算出し保存
            self.correctAnswerRateEachEpoch_.append(float(sum(correctAnswer))/len(correctAnswer))

if __name__ == "__main__":
    main()
	# -- coding: utf-8 --
	import sys
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from matplotlib.colors import ListedColormap

	def main():
	# ---アヤメデータの取得
	df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header=None)
	y = df.iloc[0:100, 4].values
	y = np.where(y == "Iris-setosa", -1, 1) #教師データとなるアヤメの品種に-1と1をラベリング
	X = df.iloc[0:100, [0, 2]].values

	# ---学習実施
	eta = 0.01 #学習率
	numEpoch = 10 #最大エポック数
	ppn = Perceptron(eta, numEpoch)
	ppn.fit(X, y)

	#---プロットのプロパティ
	markers = ('s', 'o', 'x', '^', 'v')
	colors = ('green', 'yellow','red', 'blue', 'lightgreen', 'gray', 'cyan')
	cmap = ListedColormap(colors[:len(np.unique(y))])
	labels = ('setosa', 'versicolor')

	#---アヤメデータのプロット
	plt.clf()
	plt.scatter(x=X[y == -1, 0], y=X[y == -1, 1],alpha=1.0, c=cmap(0),marker=markers[0], label=labels[0])
	plt.scatter(x=X[y == 1, 0], y=X[y == 1, 1],alpha=1.0, c=cmap(1),marker=markers[1], label=labels[1])
	plt.title("Sepal length and Petal length")
	plt.xlabel("Sepal length [cm]")#がく片の長さ
	plt.ylabel("Petal length [cm]")#花びらの長さ
	plt.legend(loc="upper left")
	plt.grid()
	plt.savefig(u"アヤメデータ.png", dpi=100)
	#---plt.show()

	#---学習回数と重みのプロット
	trainTimes=range(0,len(ppn.W0_))
	ppn.W_ = np.array(ppn.W_)
	plt.clf()
	plt.plot(trainTimes, ppn.W0_, color="blue", label="w0")
	plt.plot(trainTimes, ppn.W_[:,0], color="red", label="w1")
	plt.plot(trainTimes, ppn.W_[:,1], color="green", label="w2")
	plt.title("Train times and weight")
	plt.xlabel('Train times')
	plt.ylabel('Weight')
	plt.legend(loc="upper left")
	plt.xlim(0, 1000)
	plt.xticks(range(0,1100,100))
	plt.grid()
	plt.savefig(u"学習回数と重みの推移.png", dpi=100)
	#---plt.show()

	#---エポック数と正答率のプロット
	epochTimes=range(1,numEpoch+1)
	plt.clf()
	plt.plot(epochTimes, np.array(ppn.correctAnswerRateEachEpoch_)*100, color="blue", marker="o", label="Correct answer rate")
	plt.title("Epochs and correct answer rate")
	plt.xlabel('Epochs')
	plt.ylabel('Correct answer rate[%]')
	#---plt.legend(loc="upper left")
	plt.grid()
	plt.xlim(0, 10)
	plt.xticks(range(0, 11, 1))
	plt.savefig(u"エポック数と正答率の推移.png", dpi=100)
	#---plt.show()

	# ---決定領域のプロット
	x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
	x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1

	dx = 0.02
	X1 = np.arange(x1_min, x1_max, dx)
	X2 = np.arange(x2_min, x2_max, dx)
	X1, X2 = np.meshgrid(X1, X2)
	Z = ppn.predict(np.array([X1.ravel(), X2.ravel()]).T)
	Z = Z.reshape(X1.shape)

	#---決定領域
	plt.clf()
	plt.contourf(X1, X2, Z, alpha=0.5, cmap=cmap)
	plt.xlim(X1.min(), X1.max())
	plt.ylim(X2.min(), X2.max())
	# ---アヤメデータ
	for idx, cl in enumerate(np.unique(y)):
	plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
	alpha=1.0, c=cmap(idx),
	marker=markers[idx], label=labels[idx])
	plt.title("Decision regions")
	plt.xlabel("Sepal length [cm]") # がく片の長さ
	plt.ylabel("Petal length [cm]") # 花びらの長さ
	plt.legend(loc="upper left")
	plt.grid()
	plt.savefig(u"決定領域.png", dpi=100)
	#---plt.show()

	class Perceptron(object):

	def __init__(self, eta=0.01, numEpoch=10):
	self.eta = eta #学習率
	self.numEpoch = numEpoch #最大エポック数

	self.W0_=[] #学習回数ごとにしきい値を保存するリスト
	self.W_=[] #学習回数ごとに重みを保存するリスト
	self.correctAnswerRateEachEpoch_=[] #各エポックごとの正答率を保存するリスト

	def __actFunc(self, z):
	"""活性化関数(__でプライベート関数)"""
	return np.where(np.array(z) >= 0.0, 1, -1)

	def predict(self, X):
	"""予測関数"""
	return self.__actFunc(np.dot(X,self.w_) + self.w0_)

	def fit(self, X, y):
	"""学習の実施"""
	numTrainSample = np.shape(X)[0] #トレーニングサンプル数
	numFeature = np.shape(X)[1] #特徴量の個数

	self.w0_ = 0 # しきい値(初期値。取りあえず0)
	self.w_ = np.zeros(numFeature) #重み(初期値。取りあえず全部0)

	# しきい値と重みの初期値を保存
	self.W0_.append(self.w0_)
	self.W_.append(self.w_.tolist())

	for indexEpoch in range(1,self.numEpoch+1): #エポックループ
	correctAnswer=[] #各トレーニングセットに対する教師データと予測値の正誤表。エポックループの最初にイニシャライズ。
	for xi,yi in zip(X,y): #トレーニングセットループ

	#トレーニングセット内の各特徴量と重みをかけて足し合わせ、しきい値をたす
	zi=np.dot(xi,self.w_) + self.w0_

	#活性化関数からパーセプトロンの出力を算出
	yiHat=self.__actFunc(zi)

	#教師データと予測値の正誤表追加
	correctAnswer.append(int(yi==yiHat))

	#しきい値と重みを更新
	dw0=self.eta*(yi-yiHat)
	dw=self.eta(yi-yiHat)xi
	self.w0_ =self.w0_ + dw0
	self.w_ = self.w_ + dw

	# しきい値を学習回数ごとに保存
	self.W_.append(self.w_.tolist())
	self.W0_.append(self.w0_)

	#各エポックごとに正誤表から正答率を算出し保存
	self.correctAnswerRateEachEpoch_.append(float(sum(correctAnswer))/len(correctAnswer))

	if __name__ == "__main__":
	main()