Created
April 29, 2020 06:54
-
-
Save xiupos/dc7972a9b9fae1c53a02d4bfa41cb441 to your computer and use it in GitHub Desktop.
Feedforward neural network on Nim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math, random, sugar | |
type | |
Matrix[T; M, N: static[int]] = array[1..M, array[1..N, T]] | |
# 加法 | |
proc `+`[T, I, J](a: Matrix[T, I, J], b: T): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = a[i][j] + b | |
result = c | |
# 減法 | |
proc `-`[T, I, J](a: Matrix[T, I, J], b: T): Matrix[T, I, J] = | |
result = a + (-b) | |
# 乗法 | |
proc `*`[T, I, J](a: Matrix[T, I, J], b: T): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = a[i][j] * b | |
result = c | |
# 加法 | |
proc `+`[T, I, J](a: T, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
result = b + a | |
# 減法 | |
proc `-`[T, I, J](a: T, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
result = b - a | |
# 乗法 | |
proc `*`[T, I, J](a: T, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
result = b * a | |
# 加法 | |
proc `+`[T, I, J](a, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = a[i][j] + b[i][j] | |
result = c | |
# 減法 | |
proc `-`[T, I, J](a, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = a[i][j] - b[i][j] | |
result = c | |
# 行列の乗法 | |
proc `*`[T, I, K, J](a: Matrix[T, I, K], b: Matrix[T, K, J]): | |
Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
for k in 1..K: | |
c[i][j] += a[i][k] * b[k][j] | |
result = c | |
# アダマール積 | |
proc `|*|`[T, I, J](a, b: Matrix[T, I, J]): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = a[i][j] * b[i][j] | |
result = c | |
proc t[T, I, J](a: Matrix[T, I, J]): Matrix[T, J, I] = | |
var c: Matrix[T, J, I] | |
for i in 1..I: | |
for j in 1..J: | |
c[j][i] = a[i][j] | |
result = c | |
randomize() | |
proc toRandom[T, I, J](a: var Matrix[T, I, J], min: T = 0, max: T = 1): | |
Matrix[T, I, J] {.discardable.} = | |
for i in 1..I: | |
for j in 1..J: | |
a[i][j] = rand(max - min) + min | |
################################################################################ | |
const | |
dim_in = 1 # 入力は1次元 | |
dim_out = 1 # 出力は1次元 | |
hidden_count = 1024 # 隠れ層のノードは1024個 | |
learn_rate = 0.005 # 学習率 | |
# 訓練データは x は -1 ~ 1, y は 2 * x ^ 2 - 1 | |
const train_count = 64 # 訓練データ数 | |
let | |
train_x = collect(newSeq): | |
for i in 0..<train_count: 2 * i / (train_count - 1) - 1 | |
train_y = collect(newSeq): | |
for x in train_x: 2 * x ^ 2 - 1 | |
# 重みパラメータ. この行列の値を学習する. | |
var | |
w1: Matrix[float, hidden_count, dim_in] | |
w2: Matrix[float, dim_out, hidden_count] | |
b1: Matrix[float, hidden_count, 1] | |
b2: Matrix[float, dim_out, 1] | |
# -0.5 ~ 0.5 でランダムに初期化. | |
w1.toRandom(-0.5, 0.5) | |
w2.toRandom(-0.5, 0.5) | |
b1.toRandom(-0.5, 0.5) | |
b2.toRandom(-0.5, 0.5) | |
# 活性化関数は ReLU | |
proc activation[T](x: T): T = | |
x.max(0) | |
proc activation[T, I, J](x: Matrix[T, I, J]): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = x[i][j].activation | |
result = c | |
# 活性化関数の微分 | |
proc activation_dash[T](x: T): T = | |
(x.abs / x + 1) / 2 | |
proc activation_dash[T, I, J](x: Matrix[T, I, J]): Matrix[T, I, J] = | |
var c: Matrix[T, I, J] | |
for i in 1..I: | |
for j in 1..J: | |
c[i][j] = x[i][j].activation_dash | |
result = c | |
# 順方向. 学習結果の利用. | |
proc forward(x: float): float = | |
(b2 + w2 * activation(w1 * x + b1))[1][1] | |
# 逆方向. 学習. | |
proc backward(x: float, diff: float) {.discardable.} = | |
let | |
v1 = (w2.t * diff) |*| activation_dash(w1 * x + b1) | |
v2 = activation(w1 * x + b1) | |
w1 = w1 - v1 * x * learn_rate | |
b1 = b1 - v1 * learn_rate | |
w2 = w2 - v2.t * diff * learn_rate | |
b2 = b2 - diff * learn_rate | |
# メイン処理 | |
var | |
idxes = collect(newSeq): | |
for i in 0..<train_count: i # idxes は 0 ~ 63 | |
error, y, diff: float | |
for epoc in 1..1000: # 1000 エポック | |
idxes.shuffle # 確率的勾配降下法のため, エポックごとにランダムにシャッフルする | |
error = 0 # 二乗和誤差 | |
for idx in idxes: | |
y = forward(train_x[idx]) # 順方向で x から y を計算する | |
diff = y - train_y[idx] # 訓練データとの誤差 | |
error += diff ^ 2 # 二乗和誤差に蓄積 | |
backward(train_x[idx], diff) # 誤差を学習 | |
echo error # エポックごとに二乗和誤差を出力 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment