Skip to content

Instantly share code, notes, and snippets.

@piEsposito
Created May 24, 2020 22:02
Show Gist options
  • Save piEsposito/6727beb98e3101bab196b21da5ccf23d to your computer and use it in GitHub Desktop.
Save piEsposito/6727beb98e3101bab196b21da5ccf23d to your computer and use it in GitHub Desktop.
import math
import torch
import torch.nn as nn
@fatalfeel
Copy link

import math
import torch
import torch.nn as nn
from torch.autograd import Variable

https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html

https://towardsdatascience.com/building-a-lstm-by-hand-on-pytorch-59c02a4ec091

https://zhuanlan.zhihu.com/p/144132609

class CustomLSTM(nn.Module):
def init(self, i_size: int, h_size: int):
super().init()
self.input_size = i_size
self.hidden_size = h_size

    # i_t
    self.U_i = nn.Parameter(torch.Tensor(i_size, h_size))
    self.V_i = nn.Parameter(torch.Tensor(h_size, h_size))
    self.b_i = nn.Parameter(torch.Tensor(h_size))

    # f_t
    self.U_f = nn.Parameter(torch.Tensor(i_size, h_size))
    self.V_f = nn.Parameter(torch.Tensor(h_size, h_size))
    self.b_f = nn.Parameter(torch.Tensor(h_size))

    # c_t
    self.U_c = nn.Parameter(torch.Tensor(i_size, h_size))
    self.V_c = nn.Parameter(torch.Tensor(h_size, h_size))
    self.b_c = nn.Parameter(torch.Tensor(h_size))

    # o_t
    self.U_o = nn.Parameter(torch.Tensor(i_size, h_size))
    self.V_o = nn.Parameter(torch.Tensor(h_size, h_size))
    self.b_o = nn.Parameter(torch.Tensor(h_size))

    #self.init_weights_old()
    self.apply(self.init_weights)

def init_weights_old(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
        weight.data.uniform_(-stdv, stdv)

def init_weights(self, module):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    # for weight in self.parameters(): same as module.parameters():
    for weight in module.parameters():
        weight.data.uniform_(1.0, 1.0)  # test
        #weight.data.uniform_(-stdv, stdv)

def forward(self, x, init_states=None):
    bs, seq_sz0, seq_sz1 = x.size()
    hidden_seq = []

    if init_states is None:
        h_t, c_t = (torch.zeros(seq_sz0, bs, self.hidden_size),
                    torch.zeros(seq_sz0, bs, self.hidden_size))
    else:
        h_t, c_t = init_states

    for t in range(seq_sz0):
        x_t = x[:, t, :]

        # @ = dot product
        '''/usr/lib/python3.7/site-packages/torch/nn/modules/rnn.py
        i_t = sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi})
        f_t = sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf})
        g_t = tanh( W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) 
        o_t = sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho})
        c_t = f_t dot c_{t-1} + i_t dot g_t
        h_t = o_t dot tanh(c_t)'''
        i_t = torch.sigmoid(self.U_i*x_t + self.V_i*h_t + self.b_i)
        f_t = torch.sigmoid(self.U_f*x_t + self.V_f*h_t + self.b_f)
        g_t = torch.tanh(   self.U_c*x_t + self.V_c*h_t + self.b_c)
        o_t = torch.sigmoid(self.U_o*x_t + self.V_o*h_t + self.b_o)
        c_t = f_t.view(1,-1) @ c_t + i_t.view(1,-1) @ g_t
        h_t = o_t @ torch.tanh(c_t)
        hidden_seq.append(h_t)

    hidden_seq = torch.stack(hidden_seq).reshape(-1, 1, 1)

    return hidden_seq, (h_t, c_t)

def example_1():
testModel = CustomLSTM(1,1)
input = Variable(torch.FloatTensor([1.0, 1.0, 1.0]), requires_grad=True).view(-1, 1, 1)
output = testModel(input)
print(output, '\n')

##########################################################################################
##########################################################################################
##########################################################################################

class StandardLSTM(nn.Module):
def init(self, i_size: int, h_size: int):
super().init()
self.input_size = i_size
self.hidden_size = h_size

    self.rnn = nn.LSTM(input_size=i_size, hidden_size=h_size, num_layers=1, batch_first=True)
    self.rnn.apply(self.init_weights)

'''self.rnn.all_weights include follows
self.rnn.weight_ih_l0
self.rnn.weight_hh_l0
self.rnn.bias_ih_l0
self.rnn.bias_hh_l0'''
def init_weights(self, module):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in module.parameters():
        weight.data.uniform_(1.0, 1.0) #test
        #weight.data.uniform_(-stdv, stdv)

def forward(self, x, init_states=None):
    bs, seq_sz0, seq_sz1 = x.size()

    if init_states is None:
        h_t, c_t = (torch.zeros(seq_sz0, bs, self.hidden_size),
                    torch.zeros(seq_sz0, bs, self.hidden_size))
    else:
        h_t, c_t = init_states

    result = self.rnn(x, (h_t.detach(), c_t.detach()))
    return result

def example_2():
testModel = StandardLSTM(1,1)
input = Variable(torch.FloatTensor([1.0, 1.0, 1.0]), requires_grad=True).view(-1, 1, 1)
output = testModel(input)
print(output, '\n')

if name == "main":
example_1()
example_2()
///////////////////////////////////
(tensor([[[0.8701]],

    [[0.8701]],

    [[0.8701]]], grad_fn=<ViewBackward>), (tensor([[[0.8701],
     [0.8701],
     [0.8701]]], grad_fn=<UnsafeViewBackward>), tensor([[[2.5473]]], grad_fn=<AddBackward0>))) 

(tensor([[[0.7038]],

    [[0.7038]],

    [[0.7038]]], grad_fn=<TransposeBackward0>), (tensor([[[0.7038],
     [0.7038],
     [0.7038]]], grad_fn=<StackBackward>), tensor([[[0.9479],
     [0.9479],
     [0.9479]]], grad_fn=<StackBackward>))) 

why result diffrent plz help

@fatalfeel
Copy link

ok i Got!~~~Done

i_t = torch.sigmoid(self.U_ix_t + self.ub_i + self.V_ih_t + self.vb_i)
f_t = torch.sigmoid(self.U_fx_t + self.ub_i + self.V_fh_t + self.vb_f)
g_t = torch.tanh( self.U_cx_t + self.ub_i + self.V_ch_t + self.vb_c)
o_t = torch.sigmoid(self.U_ox_t + self.ub_i + self.V_oh_t + self.vb_o)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment