Last active
March 12, 2021 07:29
-
-
Save jacKlinc/714678896770b6e798ebcb86bd258b02 to your computer and use it in GitHub Desktop.
The first language model explicitly declares each layer, while the second does the same with a loop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LanguageModel(Module): | |
""" | |
Takes three words as input and returns a probability for the next | |
The 1st layer will use the first word's embedding | |
The 2nd layer will use the 2nd word's embedding and the 1st word's output activations | |
The 3rd layer will use the 3rd word's embedding plus the 2nd word's output activations | |
""" | |
def __init__(self, vocab_sz, n_hidden): | |
self.i_h = nn.Embedding(vocab_sz, n_hidden) # Converts the indices to a vector | |
self.h_h = nn.Linear(n_hidden, n_hidden) # Creates the activations for the successive word | |
self.h_o = nn.Linear(n_hidden, vocab_sz) # Predicts the fourth word | |
def forward(self, x): | |
h = F.relu(self.h_h(self.i_h(x[:, 0]))) # 1st word's embedding | |
h += F.relu(self.i_h(x[:, 1])) # 2nd word's embedding | |
h = F.relu(self.h_h(h)) # 1st word's output | |
h += F.relu(self.i_h(x[:, 2])) # 3rd word's embedding | |
h = F.relu(self.h_h(h)) # 2nd word's output | |
return self.h_o(h) | |
class LanguageModelRecurrent(Module): | |
""" | |
Instead of declaring the same layers three times, iterate over 3 | |
""" | |
def __init__(self, vocab_sz, n_hidden): | |
self.i_h = nn.Embedding(vocab_sz, n_hidden) | |
self.h_h = nn.Linear(n_hidden, n_hidden) | |
self.h_o = nn.Linear(n_hidden, vocab_sz) | |
def forward(self, x): | |
h = 0 | |
for i in range(3): | |
h += self.i_h(x[:, i]) | |
h = F.relu(self.h_h(h)) | |
return self.h_o(h) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment