Skip to content

Instantly share code, notes, and snippets.

@nariaki3551
Created May 22, 2022 07:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nariaki3551/ad7bf5e37da3e2fbcd1a400dab676062 to your computer and use it in GitHub Desktop.
Save nariaki3551/ad7bf5e37da3e2fbcd1a400dab676062 to your computer and use it in GitHub Desktop.
import numpy as np
## Function
def softmax(x):
"""
Args:
x np.array
"""
if x.ndim == 2:
x = x - x.max(axis=1, keepdims=True)
x = np.exp(x)
x /= x.sum(axis=1, keepdims=True)
elif x.ndim == 1:
x = x - np.max(x)
x = np.exp(x) / np.sum(np.exp(x))
return x
def cross_entropy_error(y, t):
"""
Args:
y np.array: softmax値 (batch_size, num_of_classes) or (num_of_classes, )
t array like object: 正解ラベル (batch_size, ) or int
"""
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
## Loss layer
class CrossEntropyLoss:
def __init__(self):
self.y = None # softmaxの出力
self.t = None # 教師ラベル
def forward(self, x, t):
y = softmax(x)
if self.y is None:
self.t = t
self.y = y
else:
self.t = np.vstack([self.t, t]).reshape(-1, )
self.y = np.vstack([self.y, y])
loss = cross_entropy_error(y, t)
return loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx *= dout
dx /= batch_size
self.reset()
return dx
def reset(self):
self.y = None
self.t = None
## Layer
class Linear:
def __init__(self, input_dim, output_dim):
self.W = np.random.randn(input_dim, output_dim)
self.b = np.random.randn(output_dim)
self.params = [self.W, self.b]
self.grads = [np.zeros_like(self.W), np.zeros_like(self.b)]
self.x = None
def forward(self, x):
W, b = self.params
if self.x is None:
self.x = x
else:
self.x = np.vstack([self.x, x])
return np.dot(x, W) + b
def backward(self, dout):
W, b = self.params
dx = np.dot(dout, W.T)
dW = np.dot(self.x.T, dout)
db = np.sum(dout, axis=0)
self.grads[0][...] += dW
self.grads[1][...] += db
self.reset()
return dx
def reset(self):
self.x = None
## Activations
class ReLU:
def __init__(self):
self.params = []
self.grads = []
self.mask = None
def forward(self, x):
mask = (x >= 0)
if self.mask is None:
self.mask = mask
else:
self.mask = np.vstack([self.mask, mask])
return np.maximum(x, 0)
def backward(self, dout):
dout = dout * self.mask
self.reset()
return dout
def reset(self):
self.mask = None
## Optimizer
class SGD:
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for i in range(len(params)):
params[i] -= self.lr * grads[i]
class Adam:
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.iter = 0
self.m = None
self.v = None
def update(self, params, grads):
if self.m is None:
self.m, self.v = [], []
for param in params:
self.m.append(np.zeros_like(param))
self.v.append(np.zeros_like(param))
self.iter += 1
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
for i in range(len(params)):
self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)
## Embedding
class Embedding:
def __init__(self, vocab):
self.vocab = vocab
self.dim = len(vocab)
def get_dim(self):
return self.dim
def get_vecs_by_tokens(self, tokens):
"""one_hotを直接返す
Args:
tokens np.array: (batch_size, length)
"""
batch_size, length = tokens.shape
one_hots = np.zeros((batch_size, length, self.dim))
for i, _tokens in enumerate(tokens):
indices = self.vocab.lookup_indices(_tokens.tolist())
one_hots[i][np.arange(length), indices] = 1
return one_hots
## Encoder
class Encoder:
def __init__(self, vocab, length):
self.length = length
self.embedding = Embedding(vocab)
self.params = []
self.grads = []
def forward(self, tokens):
"""
Args:
tokens: (batch_size, length)
"""
batch_size, length = tokens.shape
x = self.embedding.get_vecs_by_tokens(tokens) # (batch_size, length, embedding_dim)
x = x.reshape(batch_size, -1) # (batch_size, length*embedding_dim)
return x
def backward(self, dout):
pass
def zero_grad(self):
for grads in self.grads:
grads[...] = np.zeros_like(grads)
def reset(self):
self.zero_grad()
## Decoder
class Decoder:
def __init__(self, vocab, length):
self.length = length
self.embedding = Embedding(vocab)
self.layer = Linear(
input_dim=2*self.length*self.embedding.get_dim(),
output_dim=len(vocab),
)
self.params = self.layer.params
self.grads = self.layer.grads
def forward(self, tokens, encoder_output):
"""
Args:
tokens: (batch_size, length)
encoder_output: (batch_size, feature_size)
"""
batch_size, length = tokens.shape
x = self.embedding.get_vecs_by_tokens(tokens)
x = x.reshape(batch_size, -1)
x = np.hstack([x, encoder_output])
x = self.layer.forward(x)
return x
def backward(self, dout):
dout = self.layer.backward(dout)
def zero_grad(self):
for grads in self.grads:
grads[...] = np.zeros_like(grads)
def reset(self):
self.zero_grad()
self.layer.reset()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment