Created
May 22, 2022 07:28
-
-
Save nariaki3551/ad7bf5e37da3e2fbcd1a400dab676062 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
## Function | |
def softmax(x): | |
""" | |
Args: | |
x np.array | |
""" | |
if x.ndim == 2: | |
x = x - x.max(axis=1, keepdims=True) | |
x = np.exp(x) | |
x /= x.sum(axis=1, keepdims=True) | |
elif x.ndim == 1: | |
x = x - np.max(x) | |
x = np.exp(x) / np.sum(np.exp(x)) | |
return x | |
def cross_entropy_error(y, t): | |
""" | |
Args: | |
y np.array: softmax値 (batch_size, num_of_classes) or (num_of_classes, ) | |
t array like object: 正解ラベル (batch_size, ) or int | |
""" | |
if y.ndim == 1: | |
t = t.reshape(1, t.size) | |
y = y.reshape(1, y.size) | |
batch_size = y.shape[0] | |
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size | |
## Loss layer | |
class CrossEntropyLoss: | |
def __init__(self): | |
self.y = None # softmaxの出力 | |
self.t = None # 教師ラベル | |
def forward(self, x, t): | |
y = softmax(x) | |
if self.y is None: | |
self.t = t | |
self.y = y | |
else: | |
self.t = np.vstack([self.t, t]).reshape(-1, ) | |
self.y = np.vstack([self.y, y]) | |
loss = cross_entropy_error(y, t) | |
return loss | |
def backward(self, dout=1): | |
batch_size = self.t.shape[0] | |
dx = self.y.copy() | |
dx[np.arange(batch_size), self.t] -= 1 | |
dx *= dout | |
dx /= batch_size | |
self.reset() | |
return dx | |
def reset(self): | |
self.y = None | |
self.t = None | |
## Layer | |
class Linear: | |
def __init__(self, input_dim, output_dim): | |
self.W = np.random.randn(input_dim, output_dim) | |
self.b = np.random.randn(output_dim) | |
self.params = [self.W, self.b] | |
self.grads = [np.zeros_like(self.W), np.zeros_like(self.b)] | |
self.x = None | |
def forward(self, x): | |
W, b = self.params | |
if self.x is None: | |
self.x = x | |
else: | |
self.x = np.vstack([self.x, x]) | |
return np.dot(x, W) + b | |
def backward(self, dout): | |
W, b = self.params | |
dx = np.dot(dout, W.T) | |
dW = np.dot(self.x.T, dout) | |
db = np.sum(dout, axis=0) | |
self.grads[0][...] += dW | |
self.grads[1][...] += db | |
self.reset() | |
return dx | |
def reset(self): | |
self.x = None | |
## Activations | |
class ReLU: | |
def __init__(self): | |
self.params = [] | |
self.grads = [] | |
self.mask = None | |
def forward(self, x): | |
mask = (x >= 0) | |
if self.mask is None: | |
self.mask = mask | |
else: | |
self.mask = np.vstack([self.mask, mask]) | |
return np.maximum(x, 0) | |
def backward(self, dout): | |
dout = dout * self.mask | |
self.reset() | |
return dout | |
def reset(self): | |
self.mask = None | |
## Optimizer | |
class SGD: | |
def __init__(self, lr=0.01): | |
self.lr = lr | |
def update(self, params, grads): | |
for i in range(len(params)): | |
params[i] -= self.lr * grads[i] | |
class Adam: | |
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): | |
self.lr = lr | |
self.beta1 = beta1 | |
self.beta2 = beta2 | |
self.iter = 0 | |
self.m = None | |
self.v = None | |
def update(self, params, grads): | |
if self.m is None: | |
self.m, self.v = [], [] | |
for param in params: | |
self.m.append(np.zeros_like(param)) | |
self.v.append(np.zeros_like(param)) | |
self.iter += 1 | |
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) | |
for i in range(len(params)): | |
self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) | |
self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) | |
params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7) | |
## Embedding | |
class Embedding: | |
def __init__(self, vocab): | |
self.vocab = vocab | |
self.dim = len(vocab) | |
def get_dim(self): | |
return self.dim | |
def get_vecs_by_tokens(self, tokens): | |
"""one_hotを直接返す | |
Args: | |
tokens np.array: (batch_size, length) | |
""" | |
batch_size, length = tokens.shape | |
one_hots = np.zeros((batch_size, length, self.dim)) | |
for i, _tokens in enumerate(tokens): | |
indices = self.vocab.lookup_indices(_tokens.tolist()) | |
one_hots[i][np.arange(length), indices] = 1 | |
return one_hots | |
## Encoder | |
class Encoder: | |
def __init__(self, vocab, length): | |
self.length = length | |
self.embedding = Embedding(vocab) | |
self.params = [] | |
self.grads = [] | |
def forward(self, tokens): | |
""" | |
Args: | |
tokens: (batch_size, length) | |
""" | |
batch_size, length = tokens.shape | |
x = self.embedding.get_vecs_by_tokens(tokens) # (batch_size, length, embedding_dim) | |
x = x.reshape(batch_size, -1) # (batch_size, length*embedding_dim) | |
return x | |
def backward(self, dout): | |
pass | |
def zero_grad(self): | |
for grads in self.grads: | |
grads[...] = np.zeros_like(grads) | |
def reset(self): | |
self.zero_grad() | |
## Decoder | |
class Decoder: | |
def __init__(self, vocab, length): | |
self.length = length | |
self.embedding = Embedding(vocab) | |
self.layer = Linear( | |
input_dim=2*self.length*self.embedding.get_dim(), | |
output_dim=len(vocab), | |
) | |
self.params = self.layer.params | |
self.grads = self.layer.grads | |
def forward(self, tokens, encoder_output): | |
""" | |
Args: | |
tokens: (batch_size, length) | |
encoder_output: (batch_size, feature_size) | |
""" | |
batch_size, length = tokens.shape | |
x = self.embedding.get_vecs_by_tokens(tokens) | |
x = x.reshape(batch_size, -1) | |
x = np.hstack([x, encoder_output]) | |
x = self.layer.forward(x) | |
return x | |
def backward(self, dout): | |
dout = self.layer.backward(dout) | |
def zero_grad(self): | |
for grads in self.grads: | |
grads[...] = np.zeros_like(grads) | |
def reset(self): | |
self.zero_grad() | |
self.layer.reset() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment