Created
May 21, 2023 18:46
-
-
Save matheusmv/c074232680f7f3550d232cce7f9ba510 to your computer and use it in GitHub Desktop.
LSTM study
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import exp, tanh | |
from typing import List, Tuple | |
Matrix = List[List[float]] | |
def build_matrix(number_of_rows: int, number_of_columns: int) -> Matrix: | |
return [[0.0] * number_of_columns for _ in range(number_of_rows)] | |
def matrix_sum(matrix_a: Matrix, matrix_b: Matrix) -> Matrix: | |
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0]) | |
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0]) | |
if number_of_rows_in_A != number_of_rows_in_B or number_of_columns_in_A != number_of_columns_in_B: | |
raise ValueError("The matrices must have the same dimensions.") | |
result = build_matrix(number_of_rows_in_A, number_of_columns_in_A) | |
for i in range(number_of_rows_in_A): | |
for j in range(number_of_columns_in_A): | |
result[i][j] = matrix_a[i][j] + matrix_b[i][j] | |
return result | |
def matrix_sum_all(matrices: List[Matrix]) -> Matrix: | |
number_of_rows_in_the_first_matrix = len(matrices[0]) | |
number_of_columns_in_the_first_matrix = len(matrices[0][0]) | |
result = build_matrix( | |
number_of_rows_in_the_first_matrix, | |
number_of_columns_in_the_first_matrix | |
) | |
for matrix in matrices: | |
result = matrix_sum(result, matrix) | |
return result | |
def matrix_product(matrix_a: Matrix, matrix_b: Matrix) -> Matrix: | |
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0]) | |
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0]) | |
if number_of_columns_in_A != number_of_rows_in_B: | |
raise ValueError("The number of columns in matrix A must be equal to the number of rows in matrix B.") | |
result = build_matrix(number_of_rows_in_A, number_of_columns_in_B) | |
for i in range(number_of_rows_in_A): | |
for j in range(number_of_columns_in_B): | |
for k in range(number_of_columns_in_A): | |
result[i][j] += matrix_a[i][k] * matrix_b[k][j] | |
return result | |
def sigmoid(x): | |
return 1 / (1 + exp(-x)) | |
def eval_sigmoid_on_row(row: List[float]) -> List[float]: | |
return [sigmoid(row_value) for row_value in row] | |
def matrix_sigmoid(matrix: Matrix) -> Matrix: | |
return [eval_sigmoid_on_row(row) for row in matrix] | |
def hadamard_product(matrix_a: Matrix, matrix_b: Matrix) -> Matrix: | |
number_of_rows_in_A, number_of_columns_in_A = len(matrix_a), len(matrix_a[0]) | |
number_of_rows_in_B, number_of_columns_in_B = len(matrix_b), len(matrix_b[0]) | |
if number_of_rows_in_A != number_of_rows_in_B or number_of_columns_in_A != number_of_columns_in_B: | |
raise ValueError("The matrices must have the same dimensions.") | |
result = build_matrix(number_of_rows_in_A, number_of_columns_in_A) | |
for i in range(number_of_rows_in_A): | |
for j in range(number_of_columns_in_A): | |
result[i][j] = matrix_a[i][j] * matrix_b[i][j] | |
return result | |
def matrix_tanh(matrix: Matrix) -> Matrix: | |
return [[tanh(row_value) for row_value in row] for row in matrix] | |
def lstm_cell( | |
input_vector: Matrix, | |
previous_hidden_state: Matrix, | |
previous_cell_state: Matrix, | |
weight_forget_gate: Matrix, | |
weight_input_gate: Matrix, | |
weight_output_gate: Matrix, | |
weight_candidate_state: Matrix, | |
recurrent_weight_forget_gate: Matrix, | |
recurrent_weight_input_gate: Matrix, | |
recurrent_weight_output_gate: Matrix, | |
recurrent_weight_candidate_state: Matrix, | |
bias_forget_gate: Matrix, | |
bias_input_gate: Matrix, | |
bias_output_gate: Matrix, | |
bias_candidate_state: Matrix | |
) -> Tuple[Matrix, Matrix]: | |
forget_gate = matrix_sigmoid( | |
matrix_sum_all([ | |
matrix_product(weight_forget_gate, input_vector), | |
matrix_product(recurrent_weight_forget_gate, | |
previous_hidden_state), | |
bias_forget_gate | |
]) | |
) | |
input_gate = matrix_sigmoid( | |
matrix_sum_all([ | |
matrix_product(weight_input_gate, input_vector), | |
matrix_product(recurrent_weight_input_gate, previous_hidden_state), | |
bias_input_gate | |
]) | |
) | |
output_gate = matrix_sigmoid( | |
matrix_sum_all([ | |
matrix_product(weight_output_gate, input_vector), | |
matrix_product(recurrent_weight_output_gate, | |
previous_hidden_state), | |
bias_output_gate | |
]) | |
) | |
candidate_state = matrix_tanh( | |
matrix_sum_all([ | |
matrix_product(weight_candidate_state, input_vector), | |
matrix_product(recurrent_weight_candidate_state, | |
previous_hidden_state), | |
bias_candidate_state | |
]) | |
) | |
current_cell_state = matrix_sum_all([ | |
hadamard_product(forget_gate, previous_cell_state), | |
hadamard_product(input_gate, candidate_state) | |
]) | |
current_hidden_state = hadamard_product( | |
output_gate, matrix_tanh(current_cell_state)) | |
return current_hidden_state, current_cell_state | |
def make_matrix(number_of_rows: int, number_of_columns: int, elements: List[float]) -> Matrix: | |
if number_of_rows * number_of_columns != len(elements): | |
raise ValueError( | |
"The number of elements does not match the dimensions of the matrix.") | |
matrix = build_matrix(number_of_rows, number_of_columns) | |
element_index = 0 | |
for i in range(number_of_rows): | |
for j in range(number_of_columns): | |
matrix[i][j] = elements[element_index] | |
element_index += 1 | |
return matrix | |
class LSTMConfig: | |
def __init__(self, Wf, Wi, Wo, Wc, Uf, Ui, Uo, Uc, bf, bi, bo, bc): | |
self.Wf = Wf | |
self.Wi = Wi | |
self.Wo = Wo | |
self.Wc = Wc | |
self.Uf = Uf | |
self.Ui = Ui | |
self.Uo = Uo | |
self.Uc = Uc | |
self.bf = bf | |
self.bi = bi | |
self.bo = bo | |
self.bc = bc | |
class LSTMNetwork: | |
def __init__(self, | |
num_cells: int, | |
inputs: List[Matrix], | |
config: LSTMConfig, | |
enable_log=False) -> None: | |
self.num_cells = num_cells | |
self.inputs = inputs | |
self.config = config | |
self.enable_log = enable_log | |
def forward(self, hprev: Matrix, cprev: Matrix) -> Tuple[Matrix, Matrix]: | |
ht, ct = hprev, cprev | |
for i in range(self.num_cells): | |
xt = self.inputs[i] | |
ht, ct = lstm_cell( | |
xt, ht, ct, | |
self.config.Wf, self.config.Wi, self.config.Wo, self.config.Wc, | |
self.config.Uf, self.config.Ui, self.config.Uo, self.config.Uc, | |
self.config.bf, self.config.bi, self.config.bo, self.config.bc | |
) | |
if self.enable_log: | |
self.log(ht, ct) | |
return ht, ct | |
def log(self, ht: Matrix, ct: Matrix) -> None: | |
ht_approx = [[round(row_value, 2) for row_value in row] for row in ht] | |
ct_approx = [[round(row_value, 2) for row_value in row] for row in ct] | |
print("ct:", ct_approx) | |
print("ht:", ht_approx) | |
if __name__ == "__main__": | |
W_elements = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06] | |
Wf = make_matrix(3, 2, W_elements) | |
Wi = make_matrix(3, 2, W_elements) | |
Wc = make_matrix(3, 2, W_elements) | |
Wo = make_matrix(3, 2, W_elements) | |
U_elements = [0.07, 0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15] | |
Uf = make_matrix(3, 3, U_elements) | |
Ui = make_matrix(3, 3, U_elements) | |
Uc = make_matrix(3, 3, U_elements) | |
Uo = make_matrix(3, 3, U_elements) | |
b_elements = [0.16, 0.17, 0.18] | |
bf = make_matrix(3, 1, b_elements) | |
bi = make_matrix(3, 1, b_elements) | |
bc = make_matrix(3, 1, b_elements) | |
bo = make_matrix(3, 1, b_elements) | |
lstm_network = LSTMNetwork( | |
num_cells=2, | |
inputs=[ | |
make_matrix(2, 1, [1.0, 2.0]), | |
make_matrix(2, 1, [3.0, 4.0]) | |
], | |
config=LSTMConfig( | |
Wf, Wi, Wo, Wc, | |
Uf, Ui, Uo, Uc, | |
bf, bi, bo, bc | |
), | |
enable_log=True | |
) | |
hprev = make_matrix(3, 1, [0.0, 0.0, 0.0]) | |
cprev = make_matrix(3, 1, [0.0, 0.0, 0.0]) | |
lstm_network.forward(hprev, cprev) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment