Skip to content

Instantly share code, notes, and snippets.

@matheusmv
Created May 21, 2023 18:46
Show Gist options
  • Save matheusmv/c074232680f7f3550d232cce7f9ba510 to your computer and use it in GitHub Desktop.
Save matheusmv/c074232680f7f3550d232cce7f9ba510 to your computer and use it in GitHub Desktop.
LSTM study
from math import exp, tanh
from typing import List, Tuple
Matrix = List[List[float]]
def build_matrix(number_of_rows: int, number_of_columns: int) -> Matrix:
return [[0.0] * number_of_columns for _ in range(number_of_rows)]
def matrix_sum(matrix_a: Matrix, matrix_b: Matrix) -> Matrix:
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0])
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0])
if number_of_rows_in_A != number_of_rows_in_B or number_of_columns_in_A != number_of_columns_in_B:
raise ValueError("The matrices must have the same dimensions.")
result = build_matrix(number_of_rows_in_A, number_of_columns_in_A)
for i in range(number_of_rows_in_A):
for j in range(number_of_columns_in_A):
result[i][j] = matrix_a[i][j] + matrix_b[i][j]
return result
def matrix_sum_all(matrices: List[Matrix]) -> Matrix:
number_of_rows_in_the_first_matrix = len(matrices[0])
number_of_columns_in_the_first_matrix = len(matrices[0][0])
result = build_matrix(
number_of_rows_in_the_first_matrix,
number_of_columns_in_the_first_matrix
)
for matrix in matrices:
result = matrix_sum(result, matrix)
return result
def matrix_product(matrix_a: Matrix, matrix_b: Matrix) -> Matrix:
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0])
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0])
if number_of_columns_in_A != number_of_rows_in_B:
raise ValueError("The number of columns in matrix A must be equal to the number of rows in matrix B.")
result = build_matrix(number_of_rows_in_A, number_of_columns_in_B)
for i in range(number_of_rows_in_A):
for j in range(number_of_columns_in_B):
for k in range(number_of_columns_in_A):
result[i][j] += matrix_a[i][k] * matrix_b[k][j]
return result
def sigmoid(x):
return 1 / (1 + exp(-x))
def eval_sigmoid_on_row(row: List[float]) -> List[float]:
return [sigmoid(row_value) for row_value in row]
def matrix_sigmoid(matrix: Matrix) -> Matrix:
return [eval_sigmoid_on_row(row) for row in matrix]
def hadamard_product(matrix_a: Matrix, matrix_b: Matrix) -> Matrix:
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0])
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0])
if number_of_rows_in_A != number_of_rows_in_B or number_of_columns_in_A != number_of_columns_in_B:
raise ValueError("The matrices must have the same dimensions.")
result = build_matrix(number_of_rows_in_A, number_of_columns_in_A)
for i in range(number_of_rows_in_A):
for j in range(number_of_columns_in_A):
result[i][j] = matrix_a[i][j] * matrix_b[i][j]
return result
def matrix_tanh(matrix: Matrix) -> Matrix:
return [[tanh(row_value) for row_value in row] for row in matrix]
def lstm_cell(
input_vector: Matrix,
previous_hidden_state: Matrix,
previous_cell_state: Matrix,
weight_forget_gate: Matrix,
weight_input_gate: Matrix,
weight_output_gate: Matrix,
weight_candidate_state: Matrix,
recurrent_weight_forget_gate: Matrix,
recurrent_weight_input_gate: Matrix,
recurrent_weight_output_gate: Matrix,
recurrent_weight_candidate_state: Matrix,
bias_forget_gate: Matrix,
bias_input_gate: Matrix,
bias_output_gate: Matrix,
bias_candidate_state: Matrix
) -> Tuple[Matrix, Matrix]:
forget_gate = matrix_sigmoid(
matrix_sum_all([
matrix_product(weight_forget_gate, input_vector),
matrix_product(recurrent_weight_forget_gate,
previous_hidden_state),
bias_forget_gate
])
)
input_gate = matrix_sigmoid(
matrix_sum_all([
matrix_product(weight_input_gate, input_vector),
matrix_product(recurrent_weight_input_gate, previous_hidden_state),
bias_input_gate
])
)
output_gate = matrix_sigmoid(
matrix_sum_all([
matrix_product(weight_output_gate, input_vector),
matrix_product(recurrent_weight_output_gate,
previous_hidden_state),
bias_output_gate
])
)
candidate_state = matrix_tanh(
matrix_sum_all([
matrix_product(weight_candidate_state, input_vector),
matrix_product(recurrent_weight_candidate_state,
previous_hidden_state),
bias_candidate_state
])
)
current_cell_state = matrix_sum_all([
hadamard_product(forget_gate, previous_cell_state),
hadamard_product(input_gate, candidate_state)
])
current_hidden_state = hadamard_product(
output_gate, matrix_tanh(current_cell_state))
return current_hidden_state, current_cell_state
def make_matrix(number_of_rows: int, number_of_columns: int, elements: List[float]) -> Matrix:
if number_of_rows * number_of_columns != len(elements):
raise ValueError(
"The number of elements does not match the dimensions of the matrix.")
matrix = build_matrix(number_of_rows, number_of_columns)
element_index = 0
for i in range(number_of_rows):
for j in range(number_of_columns):
matrix[i][j] = elements[element_index]
element_index += 1
return matrix
class LSTMConfig:
def __init__(self, Wf, Wi, Wo, Wc, Uf, Ui, Uo, Uc, bf, bi, bo, bc):
self.Wf = Wf
self.Wi = Wi
self.Wo = Wo
self.Wc = Wc
self.Uf = Uf
self.Ui = Ui
self.Uo = Uo
self.Uc = Uc
self.bf = bf
self.bi = bi
self.bo = bo
self.bc = bc
class LSTMNetwork:
def __init__(self,
num_cells: int,
inputs: List[Matrix],
config: LSTMConfig,
enable_log=False) -> None:
self.num_cells = num_cells
self.inputs = inputs
self.config = config
self.enable_log = enable_log
def forward(self, hprev: Matrix, cprev: Matrix) -> Tuple[Matrix, Matrix]:
ht, ct = hprev, cprev
for i in range(self.num_cells):
xt = self.inputs[i]
ht, ct = lstm_cell(
xt, ht, ct,
self.config.Wf, self.config.Wi, self.config.Wo, self.config.Wc,
self.config.Uf, self.config.Ui, self.config.Uo, self.config.Uc,
self.config.bf, self.config.bi, self.config.bo, self.config.bc
)
if self.enable_log:
self.log(ht, ct)
return ht, ct
def log(self, ht: Matrix, ct: Matrix) -> None:
ht_approx = [[round(row_value, 2) for row_value in row] for row in ht]
ct_approx = [[round(row_value, 2) for row_value in row] for row in ct]
print("ct:", ct_approx)
print("ht:", ht_approx)
if __name__ == "__main__":
W_elements = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
Wf = make_matrix(3, 2, W_elements)
Wi = make_matrix(3, 2, W_elements)
Wc = make_matrix(3, 2, W_elements)
Wo = make_matrix(3, 2, W_elements)
U_elements = [0.07, 0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15]
Uf = make_matrix(3, 3, U_elements)
Ui = make_matrix(3, 3, U_elements)
Uc = make_matrix(3, 3, U_elements)
Uo = make_matrix(3, 3, U_elements)
b_elements = [0.16, 0.17, 0.18]
bf = make_matrix(3, 1, b_elements)
bi = make_matrix(3, 1, b_elements)
bc = make_matrix(3, 1, b_elements)
bo = make_matrix(3, 1, b_elements)
lstm_network = LSTMNetwork(
num_cells=2,
inputs=[
make_matrix(2, 1, [1.0, 2.0]),
make_matrix(2, 1, [3.0, 4.0])
],
config=LSTMConfig(
Wf, Wi, Wo, Wc,
Uf, Ui, Uo, Uc,
bf, bi, bo, bc
),
enable_log=True
)
hprev = make_matrix(3, 1, [0.0, 0.0, 0.0])
cprev = make_matrix(3, 1, [0.0, 0.0, 0.0])
lstm_network.forward(hprev, cprev)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment