Created
January 11, 2020 06:19
-
-
Save zhpmatrix/21efc3c16b35924f47b7b817f01277ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NMTModel( | |
(encoder): TransformerEncoder( | |
(embeddings): Embeddings( | |
(make_embedding): Sequential( | |
(emb_luts): Elementwise( | |
(0): Embedding(50004, 512, padding_idx=1) | |
) | |
(pe): PositionalEncoding( | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
) | |
) | |
(transformer): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
(3): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
(4): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
(5): TransformerEncoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
) | |
(decoder): TransformerDecoder( | |
(embeddings): Embeddings( | |
(make_embedding): Sequential( | |
(emb_luts): Elementwise( | |
(0): Embedding(50004, 512, padding_idx=1) | |
) | |
(pe): PositionalEncoding( | |
(dropout): Dropout(p=0.1, inplace=False) | |
) | |
) | |
) | |
(transformer_layers): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
(3): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
(4): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
(5): TransformerDecoderLayer( | |
(self_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(context_attn): MultiHeadedAttention( | |
(linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
(linear_values): Linear(in_features=512, out_features=512, bias=True) | |
(linear_query): Linear(in_features=512, out_features=512, bias=True) | |
(softmax): Softmax(dim=-1) | |
(dropout): Dropout(p=0.1, inplace=False) | |
(final_linear): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(feed_forward): PositionwiseFeedForward( | |
(w_1): Linear(in_features=512, out_features=2048, bias=True) | |
(w_2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(dropout_1): Dropout(p=0.1, inplace=False) | |
(relu): ReLU() | |
(dropout_2): Dropout(p=0.1, inplace=False) | |
) | |
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
(drop): Dropout(p=0.1, inplace=False) | |
) | |
) | |
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment