Skip to content

Instantly share code, notes, and snippets.

@zhpmatrix
Created January 11, 2020 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zhpmatrix/21efc3c16b35924f47b7b817f01277ab to your computer and use it in GitHub Desktop.
Save zhpmatrix/21efc3c16b35924f47b7b817f01277ab to your computer and use it in GitHub Desktop.
NMTModel(
(encoder): TransformerEncoder(
(embeddings): Embeddings(
(make_embedding): Sequential(
(emb_luts): Elementwise(
(0): Embedding(50004, 512, padding_idx=1)
)
(pe): PositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(transformer): ModuleList(
(0): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(2): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(3): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(4): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(5): TransformerEncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
)
(decoder): TransformerDecoder(
(embeddings): Embeddings(
(make_embedding): Sequential(
(emb_luts): Elementwise(
(0): Embedding(50004, 512, padding_idx=1)
)
(pe): PositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(transformer_layers): ModuleList(
(0): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
(1): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
(2): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
(3): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
(4): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
(5): TransformerDecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(context_attn): MultiHeadedAttention(
(linear_keys): Linear(in_features=512, out_features=512, bias=True)
(linear_values): Linear(in_features=512, out_features=512, bias=True)
(linear_query): Linear(in_features=512, out_features=512, bias=True)
(softmax): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(final_linear): Linear(in_features=512, out_features=512, bias=True)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(dropout_1): Dropout(p=0.1, inplace=False)
(relu): ReLU()
(dropout_2): Dropout(p=0.1, inplace=False)
)
(layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(drop): Dropout(p=0.1, inplace=False)
)
)
(layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment