Last active
November 11, 2015 07:18
-
-
Save nzw0301/363b803268c2ece127f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import numpy as np | |
import sys | |
from sklearn.feature_extraction.text import CountVectorizer | |
def logistic_function(mat): | |
return 1/(1+np.exp(-mat)) | |
# ユニットの活性化関数の微分を引数(行列)に適用 活性化関数:tanh | |
def df(u_l): | |
return logistic_function(u_l)*(1-logistic_function(u_l)) | |
def forward(network, X): | |
# ユニットの入力と出力を初期化 | |
U = [X.toarray()] | |
Z = [X.toarray()] | |
for i, (w, b) in enumerate(network): | |
u_l = w.dot(Z[i]) + b.dot(np.ones([1, X.shape[1]])) | |
U.append(u_l) | |
Z.append(logistic_function(u_l)) | |
predict_Y = Z[-1] | |
return U, Z, predict_Y | |
def backpropagation(network, delta_L, U): | |
delta = [delta_L] | |
for l in range(len(network)-1, -1, -1): | |
w = network[l][0] | |
u = U[l] | |
delta_l = df(u) * w.transpose().dot(delta[0]) | |
delta.insert(0, delta_l) | |
return delta | |
# 並列化できるがここでは逐次的に | |
def update_network(network, N, delta, Z): | |
epsion = 0.1 | |
for l in range(len(network)-1, -1, -1): | |
w, b = network[l] | |
w -= (1/N)*(epsion * delta[l+1].dot(Z[l].transpose())) | |
b -= (1/N)*(epsion * delta[l+1].dot(np.ones([N, 1]))) | |
network[l] = [w, b] | |
return network | |
# 対数尤度関数 | |
def err(D, Y): | |
err = 0.0 | |
for i in range(Y.shape[1]): | |
err += D[i] * np.log2(Y[0, i]) + ((1-D[i]) * np.log2(1-Y[0, i])) | |
return -err/len(D) | |
# c = 0 | |
# for i in range(Y.shape[1]): | |
# r = 0 | |
# if Y[0, i] >= 0.5: | |
# r = 1 | |
# if D[i] == r: | |
# c += 1 | |
# print(c/Y.shape[1]) | |
fname = sys.argv[1] | |
D = [] | |
with open(fname) as f: | |
cv = CountVectorizer() | |
doc = [] | |
for l in f: | |
data = l.strip().split(" ", 1) | |
D.append(int(data[0])) | |
doc.append(data[1]) | |
D = np.array(D) | |
Docs = cv.fit_transform(doc).transpose() | |
hidden_layer = [200, 100, 1] # 各層のユニットの数 | |
# ネットワークの重みとバイアスを初期化 | |
net = [] | |
for h in range(len(hidden_layer)): | |
if h: | |
W = np.random.rand(hidden_layer[h], hidden_layer[h-1]) - 0.5 | |
else: | |
W = np.random.rand(hidden_layer[h], Docs.shape[0])-0.5 | |
b = np.random.rand(hidden_layer[h], 1) - 0.5 | |
net.append([W, b]) | |
for i in range(600): | |
# create minibatch | |
randints = np.random.randint(0, Docs.shape[1], size=100) | |
X = Docs[:, randints] | |
mini_D = D[randints] | |
U, Z, predict_Y = forward(net, X) | |
delta_L = np.array(-mini_D + predict_Y) | |
delta = backpropagation(net, delta_L, U) | |
print(err(mini_D, predict_Y)) | |
net = update_network(net, X.shape[1], delta, Z) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import numpy as np | |
import sys | |
from sklearn.feature_extraction.text import CountVectorizer | |
# 活性化関数 | |
def logistic_function(mat): | |
return 1/(1+np.exp(-mat)) | |
# ユニットの活性化関数の微分を引数(行列)に適用 | |
def df(u_l): | |
return logistic_function(u_l)*(1-logistic_function(u_l)) | |
# 順伝播 | |
def forward(network, X): | |
# 初期化 | |
U = [X.toarray()] | |
Z = [X.toarray()] | |
for i, (w, b) in enumerate(network): | |
u_l = w.dot(Z[i]) + b.dot(np.ones([1, X.shape[1]])) | |
U.append(u_l) | |
Z.append(logistic_function(u_l)) | |
predict_Y = Z[-1] | |
return U, Z, predict_Y | |
# 逆伝播 | |
def backpropagation(network, delta_L, U): | |
delta = [delta_L] | |
for l in range(len(network)-1, -1, -1): | |
w = network[l][0] | |
u = U[l] | |
delta_l = df(u) * w.transpose().dot(delta[0]) | |
delta.insert(0, delta_l) | |
return delta | |
# 重み更新,並列化できるがここでは逐次的 | |
def update_network(network, N, delta, Z): | |
epsion = 0.2 # 学習率 | |
for l in range(len(network)-1, -1, -1): | |
w, b = network[l] | |
w -= (1/N)*(epsion * delta[l+1].dot(Z[l].transpose())) | |
b -= (1/N)*(epsion * delta[l+1].dot(np.ones([N, 1]))) | |
network[l] = [w, b] | |
return network | |
# 対数尤度関数 | |
def err(D, Y): | |
err = 0.0 | |
for i in range(Y.shape[1]): | |
err += D[i] * np.log2(Y[0, i]) + ((1-D[i]) * np.log2(1-Y[0, i])) | |
return -err | |
fname = sys.argv[1] | |
D = [] | |
with open(fname) as f: | |
cv = CountVectorizer() | |
doc = [] | |
for l in f: | |
data = l.strip().split(" ", 1) | |
D.append(int(data[0])) | |
doc.append(data[1]) | |
D = np.array(D) | |
X = cv.fit_transform(doc).transpose() | |
hidden_layer = [100, 75, 1] # 各層のユニットの数 2層目のユニットが100,3層目が75,出力層が1 | |
# ネットワークの重みとバイアスを初期化 | |
net = [] | |
for h in range(len(hidden_layer)): | |
if h: | |
W = np.random.rand(hidden_layer[h], hidden_layer[h-1]) - 0.5 | |
else: | |
W = np.random.rand(hidden_layer[h], X.shape[0])-0.5 | |
b = np.random.rand(hidden_layer[h], 1) - 0.5 | |
net.append([W, b]) | |
# 学習回数1000 | |
result = [[],[]] | |
for i in range(1000): | |
U, Z, predict_Y = forward(net, X) | |
delta_L = np.array(-D + predict_Y) | |
delta = backpropagation(net, delta_L, U) | |
print(err(D, predict_Y)) | |
net = update_network(net, X.shape[1], delta, Z) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python 3で動きます.
nn.pyが確率的勾配法の多層パーセプトロンです.
入力データが以下のような文書のように0か1を先頭,それ以降が単語とし,2値分類を行います.
実行
$ pyton nn.py training_data.txt