oiehot/pydl_mnist_train.py

## pydl_mnist_train.py
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.functions import *
from common.gradient import *
from dataset.mnist import load_mnist

class TwoLayerNet:
    def __init__(self, inputSize, hiddenSize, outputSize, weight=0.01):
        self.w1 = weight * np.random.randn(inputSize, hiddenSize)
        self.b1 = np.zeros(hiddenSize)
        self.w2 = weight * np.random.randn(hiddenSize, outputSize)
        self.b2 = np.zeros(outputSize)
        self.grad_w1 = np.zeros_like(self.w1)
        self.grad_b1 = np.zeros_like(self.b1)
        self.grad_w2 = np.zeros_like(self.w2)
        self.grad_b2 = np.zeros_like(self.b2)

    # 예측하기
    # x: 입력
    def predict(self, x):
        a1 = np.dot(x, self.w1) + self.b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, self.w2) + self.b2
        y  = softmax(a2)
        return y

    # 손실함수(평가함수)
    # x: 입력, t: 정답
    def loss(self, x, t):
        return crossEntropyError( self.predict(x), t )

    # 정확도 계산
    # x: 입력, t: 정답
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1) # 두번째 차원 중에서 가장 큰 값의 index. 즉, 예측하는 숫자.
        t = np.argmax(t, axis=1)
        tn = np.sum(y == t) # 행렬 비교시 True, False 값이 들어가고, 이를 sum 하면 True는 1로 계산되어 합해짐. 즉, 정답 횟수.
        return tn / float(x.shape[0]) # 정확도 = 정답인 항목 수 / 전체 항목 수

    # weight, bias의 변화(편미분)에 따른 손실함수의 기울기를 구한다.
    def numerical_gradient(self, x, t):
        lossFunc = lambda W: self.loss(x, t)
        self.grad_w1 = numerical_gradient_2d(lossFunc, self.w1)
        self.grad_b1 = numerical_gradient_1d(lossFunc, self.b1)
        self.grad_w2 = numerical_gradient_2d(lossFunc, self.w2)
        self.grad_b2 = numerical_gradient_1d(lossFunc, self.b2)

# 1. mnist 데이터를 로드한다.
(trainX,trainT),(testX,testT) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

# 2. 2층 신경망 네트워크를 만든다.
inputSize = trainX.shape[1] # mnist image size 28 * 28 = 784
outputSize = trainT.shape[1] # 10
hiddenSize = 50
net = TwoLayerNet(inputSize, hiddenSize, outputSize)

# 3. 신경망을 훈련한다.
batchSize = 100
iterationNumber = 1000 # 훈련 반복 횟수
lr = 0.1 # weight, bias 수정 폭
for i in range(iterationNumber):
    print( "iteration " + str(i) )

    # 매 훈련마다 다른 이미지 샘플을 사용한다.
    batchMask = np.random.choice(trainX.shape[0], batchSize)
    batchX = trainX[batchMask]
    batchT = trainT[batchMask]

    # weight,bias lossFunc 기울기를 계산한다.
    net.numerical_gradient(batchX, batchT)

    # lossFunc의 값을 줄이는 방향으로 weight, bias를 조정한다.
    net.w1 -= net.grad_w1 * lr
    net.b1 -= net.grad_b1 * lr
    net.w2 -= net.grad_w2 * lr
    net.b2 -= net.grad_b2 * lr

    print( "loss: " + str(net.loss(batchX, batchT)) )
    print( "accuracy: " + str(net.accuracy(batchX, batchT)) )

# End
	import sys, os
	sys.path.append(os.pardir)
	import numpy as np
	from common.functions import *
	from common.gradient import *
	from dataset.mnist import load_mnist

	class TwoLayerNet:
	def __init__(self, inputSize, hiddenSize, outputSize, weight=0.01):
	self.w1 = weight * np.random.randn(inputSize, hiddenSize)
	self.b1 = np.zeros(hiddenSize)
	self.w2 = weight * np.random.randn(hiddenSize, outputSize)
	self.b2 = np.zeros(outputSize)
	self.grad_w1 = np.zeros_like(self.w1)
	self.grad_b1 = np.zeros_like(self.b1)
	self.grad_w2 = np.zeros_like(self.w2)
	self.grad_b2 = np.zeros_like(self.b2)

	# 예측하기
	# x: 입력
	def predict(self, x):
	a1 = np.dot(x, self.w1) + self.b1
	z1 = sigmoid(a1)
	a2 = np.dot(z1, self.w2) + self.b2
	y = softmax(a2)
	return y

	# 손실함수(평가함수)
	# x: 입력, t: 정답
	def loss(self, x, t):
	return crossEntropyError( self.predict(x), t )

	# 정확도 계산
	# x: 입력, t: 정답
	def accuracy(self, x, t):
	y = self.predict(x)
	y = np.argmax(y, axis=1) # 두번째 차원 중에서 가장 큰 값의 index. 즉, 예측하는 숫자.
	t = np.argmax(t, axis=1)
	tn = np.sum(y == t) # 행렬 비교시 True, False 값이 들어가고, 이를 sum 하면 True는 1로 계산되어 합해짐. 즉, 정답 횟수.
	return tn / float(x.shape[0]) # 정확도 = 정답인 항목 수 / 전체 항목 수

	# weight, bias의 변화(편미분)에 따른 손실함수의 기울기를 구한다.
	def numerical_gradient(self, x, t):
	lossFunc = lambda W: self.loss(x, t)
	self.grad_w1 = numerical_gradient_2d(lossFunc, self.w1)
	self.grad_b1 = numerical_gradient_1d(lossFunc, self.b1)
	self.grad_w2 = numerical_gradient_2d(lossFunc, self.w2)
	self.grad_b2 = numerical_gradient_1d(lossFunc, self.b2)

	# 1. mnist 데이터를 로드한다.
	(trainX,trainT),(testX,testT) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

	# 2. 2층 신경망 네트워크를 만든다.
	inputSize = trainX.shape[1] # mnist image size 28 * 28 = 784
	outputSize = trainT.shape[1] # 10
	hiddenSize = 50
	net = TwoLayerNet(inputSize, hiddenSize, outputSize)

	# 3. 신경망을 훈련한다.
	batchSize = 100
	iterationNumber = 1000 # 훈련 반복 횟수
	lr = 0.1 # weight, bias 수정 폭
	for i in range(iterationNumber):
	print( "iteration " + str(i) )

	# 매 훈련마다 다른 이미지 샘플을 사용한다.
	batchMask = np.random.choice(trainX.shape[0], batchSize)
	batchX = trainX[batchMask]
	batchT = trainT[batchMask]

	# weight,bias lossFunc 기울기를 계산한다.
	net.numerical_gradient(batchX, batchT)

	# lossFunc의 값을 줄이는 방향으로 weight, bias를 조정한다.
	net.w1 -= net.grad_w1 * lr
	net.b1 -= net.grad_b1 * lr
	net.w2 -= net.grad_w2 * lr
	net.b2 -= net.grad_b2 * lr

	print( "loss: " + str(net.loss(batchX, batchT)) )
	print( "accuracy: " + str(net.accuracy(batchX, batchT)) )

	# End