Created
September 13, 2018 08:20
-
-
Save tobyyouup/8426bb216d05482efd0bbdc8dcbd04e5 to your computer and use it in GitHub Desktop.
running log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INFO - 09/13/18 10:01:49 - 0:00:00 - ============ Initialized logger ============ | |
INFO - 09/13/18 10:01:49 - 0:00:00 - attention: True | |
attention_dropout: 0 | |
back_dataset: {} | |
back_directions: [] | |
batch_size: 32 | |
beam_size: 0 | |
clip_grad_norm: 5 | |
command: python ../main.py --exp_name 'endefr' --transformer 'True' --n_enc_layers '4' --n_dec_layers '4' --share_enc '3' --share_dec '3' --share_lang_emb 'True' --share_output_emb 'True' --langs 'de,en,fr' --n_mono '-1' --mono_dataset 'de:./data/mono/de.train.tok.bpe.pth,,;en:./data/mono/en.train.tok.bpe.pth,,;fr:./data/mono/fr.train.tok.bpe.pth,,' --para_dataset 'en-fr:,./data/para/dev/newstest2013-ref.XX.60000.pth,./data/para/dev/newstest2014-fren-src.XX.60000.pth;de-en:,./data/para/dev/newstest2013-ref.XX.60000.pth,./data/para/dev/newstest2014-deen-src.XX.60000.pth' --mono_directions 'de,en,fr' --word_shuffle '3' --word_dropout '0.1' --word_blank '0.2' --pivo_directions 'fr-en-fr,en-fr-en,de-en-de,en-de-en,fr-de-fr,de-fr-de' --pretrained_emb './data/mono/endefr.dim512.vec' --pretrained_out 'True' --lambda_xe_mono '0:1,100000:0.1,300000:0' --lambda_xe_otfd '1' --otf_num_processes '30' --otf_sync_params_every '1000' --enc_optimizer 'adam,lr=0.0001' --epoch_size '500000' --stopping_criterion 'bleu_en_fr_valid,10' --batch_size '32' --exp_id "4qq6ck8npo" | |
dec_optimizer: enc_optimizer | |
decoder_attention_heads: 8 | |
decoder_normalize_before: False | |
dis_clip: 0 | |
dis_dropout: 0 | |
dis_hidden_dim: 128 | |
dis_input_proj: True | |
dis_layers: 3 | |
dis_optimizer: rmsprop,lr=0.0005 | |
dis_smooth: 0 | |
dropout: 0 | |
dump_path: ./dumped/endefr/4qq6ck8npo | |
emb_dim: 512 | |
enc_optimizer: adam,lr=0.0001 | |
encoder_attention_heads: 8 | |
encoder_normalize_before: False | |
epoch_size: 500000 | |
eval_only: False | |
exp_id: 4qq6ck8npo | |
exp_name: endefr | |
freeze_dec_emb: False | |
freeze_enc_emb: False | |
group_by_size: True | |
hidden_dim: 512 | |
id2lang: {0: 'de', 1: 'en', 2: 'fr'} | |
label_smoothing: 0 | |
lambda_dis: 0 | |
lambda_lm: 0 | |
lambda_xe_back: 0 | |
lambda_xe_mono: 0:1,100000:0.1,300000:0 | |
lambda_xe_otfa: 0 | |
lambda_xe_otfd: 1 | |
lambda_xe_para: 0 | |
lang2id: {'de': 0, 'en': 1, 'fr': 2} | |
langs: ['de', 'en', 'fr'] | |
length_penalty: 1.0 | |
lm_after: 0 | |
lm_before: 0 | |
lm_share_dec: 0 | |
lm_share_emb: False | |
lm_share_enc: 0 | |
lm_share_proj: False | |
lstm_proj: False | |
max_epoch: 100000 | |
max_len: 175 | |
max_vocab: -1 | |
mono_dataset: {'de': ('./data/mono/de.train.tok.bpe.pth', '', ''), 'en': ('./data/mono/en.train.tok.bpe.pth', '', ''), 'fr': ('./data/mono/fr.train.tok.bpe.pth', '', '')} | |
mono_directions: ['de', 'en', 'fr'] | |
n_back: 0 | |
n_dec_layers: 4 | |
n_dis: 0 | |
n_enc_layers: 4 | |
n_langs: 3 | |
n_mono: -1 | |
n_para: 0 | |
otf_backprop_temperature: -1 | |
otf_num_processes: 30 | |
otf_sample: -1 | |
otf_sync_params_every: 1000 | |
otf_update_dec: True | |
otf_update_enc: True | |
para_dataset: {('en', 'fr'): ('', './data/para/dev/newstest2013-ref.XX.60000.pth', './data/para/dev/newstest2014-fren-src.XX.60000.pth'), ('de', 'en'): ('', './data/para/dev/newstest2013-ref.XX.60000.pth', './data/para/dev/newstest2014-deen-src.XX.60000.pth')} | |
para_directions: [] | |
pivo_directions: [('fr', 'en', 'fr'), ('en', 'fr', 'en'), ('de', 'en', 'de'), ('en', 'de', 'en'), ('fr', 'de', 'fr'), ('de', 'fr', 'de')] | |
pretrained_emb: ./data/mono/endefr.dim512.vec | |
pretrained_out: True | |
reload_dec: False | |
reload_dis: False | |
reload_enc: False | |
reload_model: | |
relu_dropout: 0 | |
save_periodic: False | |
seed: -1 | |
share_dec: 3 | |
share_decpro_emb: False | |
share_enc: 3 | |
share_encdec_emb: False | |
share_lang_emb: True | |
share_lstm_proj: False | |
share_output_emb: True | |
stopping_criterion: bleu_en_fr_valid,10 | |
transformer: True | |
transformer_ffn_emb_dim: 2048 | |
vocab: {} | |
vocab_min_count: 0 | |
word_blank: 0.2 | |
word_dropout: 0.1 | |
word_shuffle: 3.0 | |
INFO - 09/13/18 10:01:49 - 0:00:00 - The experiment will be stored in ./dumped/endefr/4qq6ck8npo | |
INFO - 09/13/18 10:01:49 - 0:00:00 - Running command: python ../main.py --exp_name 'endefr' --transformer 'True' --n_enc_layers '4' --n_dec_layers '4' --share_enc '3' --share_dec '3' --share_lang_emb 'True' --share_output_emb 'True' --langs 'de,en,fr' --n_mono '-1' --mono_dataset 'de:./data/mono/de.train.tok.bpe.pth,,;en:./data/mono/en.train.tok.bpe.pth,,;fr:./data/mono/fr.train.tok.bpe.pth,,' --para_dataset 'en-fr:,./data/para/dev/newstest2013-ref.XX.60000.pth,./data/para/dev/newstest2014-fren-src.XX.60000.pth;de-en:,./data/para/dev/newstest2013-ref.XX.60000.pth,./data/para/dev/newstest2014-deen-src.XX.60000.pth' --mono_directions 'de,en,fr' --word_shuffle '3' --word_dropout '0.1' --word_blank '0.2' --pivo_directions 'fr-en-fr,en-fr-en,de-en-de,en-de-en,fr-de-fr,de-fr-de' --pretrained_emb './data/mono/endefr.dim512.vec' --pretrained_out 'True' --lambda_xe_mono '0:1,100000:0.1,300000:0' --lambda_xe_otfd '1' --otf_num_processes '30' --otf_sync_params_every '1000' --enc_optimizer 'adam,lr=0.0001' --epoch_size '500000' --stopping_criterion 'bleu_en_fr_valid,10' --batch_size '32' --exp_id "4qq6ck8npo" | |
INFO - 09/13/18 10:01:49 - 0:00:00 - ============ Parallel data (en - fr) | |
INFO - 09/13/18 10:01:49 - 0:00:00 - Loading data from ./data/para/dev/newstest2013-ref.en.60000.pth ... | |
INFO - 09/13/18 10:01:49 - 0:00:00 - 71618 words (61766 unique) in 3000 sentences. 1561 unknown words (7 unique). | |
INFO - 09/13/18 10:01:49 - 0:00:00 - Loading data from ./data/para/dev/newstest2013-ref.fr.60000.pth ... | |
INFO - 09/13/18 10:01:49 - 0:00:00 - 85791 words (61766 unique) in 3000 sentences. 4954 unknown words (5 unique). | |
INFO - 09/13/18 10:01:50 - 0:00:00 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:01:50 - 0:00:00 - Removed 0 too long sentences. | |
INFO - 09/13/18 10:01:50 - 0:00:00 - Loading data from ./data/para/dev/newstest2014-fren-src.en.60000.pth ... | |
INFO - 09/13/18 10:01:50 - 0:00:00 - 78289 words (61766 unique) in 3003 sentences. 2009 unknown words (6 unique). | |
INFO - 09/13/18 10:01:50 - 0:00:00 - Loading data from ./data/para/dev/newstest2014-fren-src.fr.60000.pth ... | |
INFO - 09/13/18 10:01:50 - 0:00:00 - 92906 words (61766 unique) in 3003 sentences. 5294 unknown words (5 unique). | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:01:50 - 0:00:01 - ============ Parallel data (de - en) | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Loading data from ./data/para/dev/newstest2013-ref.de.60000.pth ... | |
INFO - 09/13/18 10:01:50 - 0:00:01 - 77784 words (61766 unique) in 3000 sentences. 1084 unknown words (5 unique). | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Reloading data loaded from ./data/para/dev/newstest2013-ref.en.60000.pth ... | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Removed 0 too long sentences. | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Loading data from ./data/para/dev/newstest2014-deen-src.de.60000.pth ... | |
INFO - 09/13/18 10:01:50 - 0:00:01 - 78145 words (61766 unique) in 3003 sentences. 1200 unknown words (3 unique). | |
INFO - 09/13/18 10:01:50 - 0:00:01 - Loading data from ./data/para/dev/newstest2014-deen-src.en.60000.pth ... | |
INFO - 09/13/18 10:01:51 - 0:00:01 - 74765 words (61766 unique) in 3003 sentences. 1863 unknown words (6 unique). | |
INFO - 09/13/18 10:01:51 - 0:00:02 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:01:51 - 0:00:02 - ============ Monolingual data (de) | |
INFO - 09/13/18 10:01:51 - 0:00:02 - Loading data from ./data/mono/de.train.tok.bpe.pth ... | |
INFO - 09/13/18 10:01:53 - 0:00:04 - 223468998 words (61766 unique) in 10000000 sentences. 0 unknown words (0 unique). | |
INFO - 09/13/18 10:02:02 - 0:00:12 - Removed 2 empty sentences. | |
INFO - 09/13/18 10:02:03 - 0:00:13 - Removed 4404 too long sentences. | |
INFO - 09/13/18 10:02:03 - 0:00:13 - ============ Monolingual data (en) | |
INFO - 09/13/18 10:02:03 - 0:00:13 - Loading data from ./data/mono/en.train.tok.bpe.pth ... | |
INFO - 09/13/18 10:02:10 - 0:00:21 - 254659846 words (61766 unique) in 10000000 sentences. 0 unknown words (0 unique). | |
INFO - 09/13/18 10:02:19 - 0:00:29 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:02:21 - 0:00:31 - Removed 1136 too long sentences. | |
INFO - 09/13/18 10:02:21 - 0:00:31 - ============ Monolingual data (fr) | |
INFO - 09/13/18 10:02:21 - 0:00:31 - Loading data from ./data/mono/fr.train.tok.bpe.pth ... | |
INFO - 09/13/18 10:02:23 - 0:00:33 - 261636793 words (61766 unique) in 10000000 sentences. 0 unknown words (0 unique). | |
INFO - 09/13/18 10:02:28 - 0:00:38 - Removed 0 empty sentences. | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Removed 1212 too long sentences. | |
INFO - 09/13/18 10:02:29 - 0:00:40 - ============ Data summary | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Parallel data - valid - en -> fr: 3000 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Parallel data - test - en -> fr: 3003 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Parallel data - valid - de -> en: 3000 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Parallel data - test - de -> en: 3003 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - train - de: 9995594 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - valid - de: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - test - de: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - train - en: 9998864 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - valid - en: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - test - en: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - train - fr: 9998788 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - valid - fr: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Monolingual data - test - fr: 0 | |
INFO - 09/13/18 10:02:29 - 0:00:40 - ============ Building transformer attention model - Encoder ... | |
INFO - 09/13/18 10:02:29 - 0:00:40 - Sharing encoder input embeddings | |
INFO - 09/13/18 10:02:31 - 0:00:42 - Sharing encoder transformer parameters for layer 1 | |
INFO - 09/13/18 10:02:31 - 0:00:42 - Sharing encoder transformer parameters for layer 2 | |
INFO - 09/13/18 10:02:31 - 0:00:42 - Sharing encoder transformer parameters for layer 3 | |
INFO - 09/13/18 10:02:31 - 0:00:42 - ============ Building transformer attention model - Decoder ... | |
INFO - 09/13/18 10:02:31 - 0:00:42 - Sharing decoder input embeddings | |
INFO - 09/13/18 10:02:32 - 0:00:43 - Sharing decoder transformer parameters for layer 0 | |
INFO - 09/13/18 10:02:33 - 0:00:43 - Sharing decoder transformer parameters for layer 1 | |
INFO - 09/13/18 10:02:33 - 0:00:43 - Sharing decoder transformer parameters for layer 2 | |
INFO - 09/13/18 10:02:35 - 0:00:46 - Sharing decoder projection matrices | |
/home/XXXX/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py:54: UserWarning: size_average and reduce args will be deprecated, please use reduction='elementwise_mean' instead. | |
warnings.warn(warning.format(ret)) | |
INFO - 09/13/18 10:02:38 - 0:00:48 - Reloading embeddings from ./data/mono/endefr.dim512.vec ... | |
INFO - 09/13/18 10:02:47 - 0:00:57 - Reloaded 61289 embeddings. | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Initialized 61151 / 61766 word embeddings for "de" (including 19 after lowercasing). | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Initialized 61151 / 61766 word embeddings for "en" (including 19 after lowercasing). | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Initialized 61151 / 61766 word embeddings for "fr" (including 19 after lowercasing). | |
INFO - 09/13/18 10:02:53 - 0:01:04 - ============ Model summary | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Number of enc+dec parameters: 139072838 | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Encoder: TransformerEncoder( | |
(embeddings): ModuleList( | |
(0): Embedding(61766, 512, padding_idx=2) | |
(1): Embedding(61766, 512, padding_idx=2) | |
(2): Embedding(61766, 512, padding_idx=2) | |
) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
) | |
(1): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
) | |
(2): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
) | |
(3): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
) | |
) | |
) | |
) | |
) | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Decoder: TransformerDecoder( | |
(embeddings): ModuleList( | |
(0): Embedding(61766, 512, padding_idx=2) | |
(1): Embedding(61766, 512, padding_idx=2) | |
(2): Embedding(61766, 512, padding_idx=2) | |
) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
) | |
(1): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
) | |
(2): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
) | |
(3): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=512, out_features=512, bias=True) | |
) | |
(fc1): Linear(in_features=512, out_features=2048, bias=True) | |
(fc2): Linear(in_features=2048, out_features=512, bias=True) | |
(layer_norms): ModuleList( | |
(0): LayerNorm() | |
(1): LayerNorm() | |
(2): LayerNorm() | |
) | |
) | |
) | |
) | |
(proj): ModuleList( | |
(0): Linear(in_features=512, out_features=61766, bias=True) | |
(1): Linear(in_features=512, out_features=61766, bias=True) | |
(2): Linear(in_features=512, out_features=61766, bias=True) | |
) | |
(loss_fn): ModuleList( | |
(0): CrossEntropyLoss() | |
(1): CrossEntropyLoss() | |
(2): CrossEntropyLoss() | |
) | |
) | |
INFO - 09/13/18 10:02:53 - 0:01:04 - Discriminator: None | |
INFO - 09/13/18 10:02:53 - 0:01:04 - LM: None | |
INFO - 09/13/18 10:02:57 - 0:01:07 - Starting subprocesses for OTF generation ... | |
INFO - 09/13/18 10:02:57 - 0:01:08 - Stopping criterion: bleu_en_fr_valid,10 | |
INFO - 09/13/18 10:02:57 - 0:01:08 - Test: Parameters are shared correctly. | |
INFO - 09/13/18 10:03:06 - 0:01:17 - ====================== Starting epoch 0 ... ====================== | |
INFO - 09/13/18 10:03:06 - 0:01:17 - Creating new training encdec,de iterator ... | |
INFO - 09/13/18 10:03:15 - 0:01:26 - Creating new training encdec,en iterator ... | |
INFO - 09/13/18 10:03:19 - 0:01:30 - Creating new training encdec,fr iterator ... | |
INFO - 09/13/18 10:03:28 - 0:01:39 - Populating initial OTF generation cache ... | |
INFO - 09/13/18 10:03:28 - 0:01:39 - Creating new training otf,fr iterator ... | |
INFO - 09/13/18 10:03:33 - 0:01:43 - Creating new training otf,en iterator ... | |
INFO - 09/13/18 10:03:36 - 0:01:47 - Creating new training otf,de iterator ... | |
INFO - 09/13/18 10:07:46 - 0:05:57 - 50 - 49.82 sent/s - 1294.00 words/s - XE-de-de: 8.8901 || XE-en-en: 8.7136 || XE-fr-fr: 8.6850 || XE-fr-en-fr: 8.0422 || XE-en-fr-en: 8.6804 || XE-de-en-de: 9.1853 || XE-en-de-en: 8.3410 || XE-fr-de-fr: 8.3788 || XE-de-fr-de: 9.1030 || ENC-L2-de: 4.0549 || ENC-L2-en: 4.0504 || ENC-L2-fr: 4.0443 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 96.28s (33.31%) | |
INFO - 09/13/18 10:10:32 - 0:08:43 - 100 - 86.58 sent/s - 2237.00 words/s - XE-de-de: 6.6743 || XE-en-en: 6.6406 || XE-fr-fr: 6.2547 || XE-fr-en-fr: 6.1008 || XE-en-fr-en: 6.5419 || XE-de-en-de: 6.8439 || XE-en-de-en: 6.6096 || XE-fr-de-fr: 6.1790 || XE-de-fr-de: 6.8465 || ENC-L2-de: 3.3787 || ENC-L2-en: 3.3580 || ENC-L2-fr: 3.3667 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 14.24s (8.56%) | |
INFO - 09/13/18 10:13:32 - 0:11:42 - 150 - 80.26 sent/s - 2082.00 words/s - XE-de-de: 6.2561 || XE-en-en: 6.2163 || XE-fr-fr: 5.8040 || XE-fr-en-fr: 5.6516 || XE-en-fr-en: 6.2629 || XE-de-en-de: 6.5262 || XE-en-de-en: 6.1712 || XE-fr-de-fr: 5.7679 || XE-de-fr-de: 6.4791 || ENC-L2-de: 3.2320 || ENC-L2-en: 3.1972 || ENC-L2-fr: 3.1214 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 18.99s (10.59%) | |
INFO - 09/13/18 10:16:37 - 0:14:47 - 200 - 77.87 sent/s - 2127.00 words/s - XE-de-de: 6.0646 || XE-en-en: 5.9638 || XE-fr-fr: 5.4925 || XE-fr-en-fr: 5.5546 || XE-en-fr-en: 5.9861 || XE-de-en-de: 6.1658 || XE-en-de-en: 6.0187 || XE-fr-de-fr: 5.6398 || XE-de-fr-de: 6.1942 || ENC-L2-de: 3.1895 || ENC-L2-en: 3.2287 || ENC-L2-fr: 3.1327 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 29.87s (16.15%) | |
INFO - 09/13/18 10:19:44 - 0:17:55 - 250 - 76.78 sent/s - 2038.00 words/s - XE-de-de: 5.8453 || XE-en-en: 5.8409 || XE-fr-fr: 5.3443 || XE-fr-en-fr: 5.3612 || XE-en-fr-en: 5.8776 || XE-de-en-de: 6.0960 || XE-en-de-en: 5.8788 || XE-fr-de-fr: 5.5038 || XE-de-fr-de: 6.0228 || ENC-L2-de: 3.1909 || ENC-L2-en: 3.2264 || ENC-L2-fr: 3.1684 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 32.36s (17.26%) | |
INFO - 09/13/18 10:22:31 - 0:20:41 - 300 - 86.56 sent/s - 2140.00 words/s - XE-de-de: 5.6730 || XE-en-en: 5.5893 || XE-fr-fr: 5.1856 || XE-fr-en-fr: 5.2716 || XE-en-fr-en: 5.8107 || XE-de-en-de: 5.8604 || XE-en-de-en: 5.7802 || XE-fr-de-fr: 5.3309 || XE-de-fr-de: 5.8814 || ENC-L2-de: 3.2137 || ENC-L2-en: 3.2673 || ENC-L2-fr: 3.2072 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 6.37s (3.83%) | |
INFO - 09/13/18 10:25:29 - 0:23:40 - 350 - 80.58 sent/s - 2139.00 words/s - XE-de-de: 5.5069 || XE-en-en: 5.4877 || XE-fr-fr: 5.1344 || XE-fr-en-fr: 5.2188 || XE-en-fr-en: 5.7588 || XE-de-en-de: 5.8482 || XE-en-de-en: 5.6712 || XE-fr-de-fr: 5.3025 || XE-de-fr-de: 5.7880 || ENC-L2-de: 3.3057 || ENC-L2-en: 3.3189 || ENC-L2-fr: 3.1789 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 30.50s (17.07%) | |
INFO - 09/13/18 10:28:27 - 0:26:37 - 400 - 81.22 sent/s - 2081.00 words/s - XE-de-de: 5.3347 || XE-en-en: 5.4069 || XE-fr-fr: 4.8776 || XE-fr-en-fr: 5.1522 || XE-en-fr-en: 5.6761 || XE-de-en-de: 5.7155 || XE-en-de-en: 5.6470 || XE-fr-de-fr: 5.1790 || XE-de-fr-de: 5.7863 || ENC-L2-de: 3.4114 || ENC-L2-en: 3.3871 || ENC-L2-fr: 3.3844 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 15.12s (8.53%) | |
INFO - 09/13/18 10:32:03 - 0:30:13 - 450 - 66.62 sent/s - 1830.00 words/s - XE-de-de: 5.2932 || XE-en-en: 5.2926 || XE-fr-fr: 4.7942 || XE-fr-en-fr: 5.1177 || XE-en-fr-en: 5.6083 || XE-de-en-de: 5.6982 || XE-en-de-en: 5.6095 || XE-fr-de-fr: 5.1948 || XE-de-fr-de: 5.6183 || ENC-L2-de: 3.4797 || ENC-L2-en: 3.5228 || ENC-L2-fr: 3.4050 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 37.36s (17.29%) | |
INFO - 09/13/18 10:35:17 - 0:33:28 - 500 - 73.95 sent/s - 2001.00 words/s - XE-de-de: 5.1607 || XE-en-en: 5.1033 || XE-fr-fr: 4.6967 || XE-fr-en-fr: 5.0802 || XE-en-fr-en: 5.5538 || XE-de-en-de: 5.6648 || XE-en-de-en: 5.5413 || XE-fr-de-fr: 5.0535 || XE-de-fr-de: 5.5970 || ENC-L2-de: 3.6633 || ENC-L2-en: 3.7441 || ENC-L2-fr: 3.6130 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 16.20s (8.32%) | |
INFO - 09/13/18 10:38:48 - 0:36:58 - 550 - 68.51 sent/s - 1879.00 words/s - XE-de-de: 4.8937 || XE-en-en: 4.9019 || XE-fr-fr: 4.5197 || XE-fr-en-fr: 4.9917 || XE-en-fr-en: 5.5631 || XE-de-en-de: 5.5751 || XE-en-de-en: 5.5553 || XE-fr-de-fr: 5.0231 || XE-de-fr-de: 5.5611 || ENC-L2-de: 3.8036 || ENC-L2-en: 3.9735 || ENC-L2-fr: 3.8431 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 40.76s (19.39%) | |
INFO - 09/13/18 10:41:58 - 0:40:09 - 600 - 75.44 sent/s - 2038.00 words/s - XE-de-de: 4.8028 || XE-en-en: 4.7233 || XE-fr-fr: 4.3842 || XE-fr-en-fr: 4.9823 || XE-en-fr-en: 5.4961 || XE-de-en-de: 5.5007 || XE-en-de-en: 5.4986 || XE-fr-de-fr: 4.9895 || XE-de-fr-de: 5.4790 || ENC-L2-de: 4.0057 || ENC-L2-en: 4.1818 || ENC-L2-fr: 4.0520 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 31.15s (16.32%) | |
INFO - 09/13/18 10:45:10 - 0:43:21 - 650 - 75.17 sent/s - 2179.00 words/s - XE-de-de: 4.6469 || XE-en-en: 4.6960 || XE-fr-fr: 4.1445 || XE-fr-en-fr: 4.9446 || XE-en-fr-en: 5.4997 || XE-de-en-de: 5.4824 || XE-en-de-en: 5.4829 || XE-fr-de-fr: 5.0042 || XE-de-fr-de: 5.5506 || ENC-L2-de: 4.2799 || ENC-L2-en: 4.3552 || ENC-L2-fr: 4.3514 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 25.56s (13.34%) | |
INFO - 09/13/18 10:48:14 - 0:46:25 - 700 - 78.10 sent/s - 1992.00 words/s - XE-de-de: 4.2711 || XE-en-en: 4.4212 || XE-fr-fr: 4.0215 || XE-fr-en-fr: 4.9069 || XE-en-fr-en: 5.4045 || XE-de-en-de: 5.4664 || XE-en-de-en: 5.4464 || XE-fr-de-fr: 4.9503 || XE-de-fr-de: 5.4249 || ENC-L2-de: 4.4589 || ENC-L2-en: 4.5450 || ENC-L2-fr: 4.4154 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 25.34s (13.75%) | |
INFO - 09/13/18 10:51:09 - 0:49:19 - 750 - 82.66 sent/s - 2132.00 words/s - XE-de-de: 4.1140 || XE-en-en: 4.3342 || XE-fr-fr: 3.7440 || XE-fr-en-fr: 4.9449 || XE-en-fr-en: 5.3959 || XE-de-en-de: 5.4826 || XE-en-de-en: 5.3269 || XE-fr-de-fr: 5.0057 || XE-de-fr-de: 5.4563 || ENC-L2-de: 4.5799 || ENC-L2-en: 4.6158 || ENC-L2-fr: 4.6007 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 17.70s (10.16%) | |
INFO - 09/13/18 10:52:44 - 0:50:55 - 800 - 150.72 sent/s - 4211.00 words/s - XE-de-de: 3.9479 || XE-en-en: 4.2196 || XE-fr-fr: 3.7239 || XE-fr-en-fr: 4.8764 || XE-en-fr-en: 5.3964 || XE-de-en-de: 5.4079 || XE-en-de-en: 5.3308 || XE-fr-de-fr: 4.8674 || XE-de-fr-de: 5.3820 || ENC-L2-de: 4.6333 || ENC-L2-en: 4.6457 || ENC-L2-fr: 4.5906 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 3.28s (3.44%) | |
INFO - 09/13/18 10:54:08 - 0:52:19 - 850 - 171.56 sent/s - 4612.00 words/s - XE-de-de: 3.8506 || XE-en-en: 4.0473 || XE-fr-fr: 3.5356 || XE-fr-en-fr: 4.8849 || XE-en-fr-en: 5.3637 || XE-de-en-de: 5.3516 || XE-en-de-en: 5.3646 || XE-fr-de-fr: 4.9203 || XE-de-fr-de: 5.4200 || ENC-L2-de: 4.6258 || ENC-L2-en: 4.7140 || ENC-L2-fr: 4.6747 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 2.66s (3.17%) | |
INFO - 09/13/18 10:55:35 - 0:53:45 - 900 - 166.20 sent/s - 4553.00 words/s - XE-de-de: 3.6209 || XE-en-en: 3.8673 || XE-fr-fr: 3.7339 || XE-fr-en-fr: 4.8900 || XE-en-fr-en: 5.3249 || XE-de-en-de: 5.3294 || XE-en-de-en: 5.3512 || XE-fr-de-fr: 4.8599 || XE-de-fr-de: 5.3340 || ENC-L2-de: 4.6400 || ENC-L2-en: 4.7140 || ENC-L2-fr: 4.5952 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 3.13s (3.61%) | |
INFO - 09/13/18 10:57:01 - 0:55:12 - 950 - 167.26 sent/s - 4334.00 words/s - XE-de-de: 3.4321 || XE-en-en: 3.6973 || XE-fr-fr: 3.3935 || XE-fr-en-fr: 4.8307 || XE-en-fr-en: 5.3519 || XE-de-en-de: 5.3140 || XE-en-de-en: 5.2870 || XE-fr-de-fr: 4.7727 || XE-de-fr-de: 5.2901 || ENC-L2-de: 4.6472 || ENC-L2-en: 4.6875 || ENC-L2-fr: 4.6562 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 2.97s (3.45%) | |
INFO - 09/13/18 10:58:29 - 0:56:40 - 1000 - 163.58 sent/s - 4287.00 words/s - XE-de-de: 3.5713 || XE-en-en: 3.5558 || XE-fr-fr: 3.1188 || XE-fr-en-fr: 4.8263 || XE-en-fr-en: 5.3475 || XE-de-en-de: 5.2762 || XE-en-de-en: 5.4129 || XE-fr-de-fr: 4.7637 || XE-de-fr-de: 5.3402 || ENC-L2-de: 4.6211 || ENC-L2-en: 4.6812 || ENC-L2-fr: 4.6744 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 3.57s (4.05%) | |
INFO - 09/13/18 10:59:56 - 0:58:07 - 1050 - 165.50 sent/s - 4427.00 words/s - XE-de-de: 3.4419 || XE-en-en: 3.6319 || XE-fr-fr: 3.2270 || XE-fr-en-fr: 4.7024 || XE-en-fr-en: 5.2101 || XE-de-en-de: 5.3072 || XE-en-de-en: 5.2443 || XE-fr-de-fr: 4.8493 || XE-de-fr-de: 5.2913 || ENC-L2-de: 4.5310 || ENC-L2-en: 4.5651 || ENC-L2-fr: 4.5415 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 2.78s (3.20%) | |
INFO - 09/13/18 11:01:23 - 0:59:34 - 1100 - 165.52 sent/s - 4495.00 words/s - XE-de-de: 3.4210 || XE-en-en: 3.5137 || XE-fr-fr: 3.0194 || XE-fr-en-fr: 4.5383 || XE-en-fr-en: 4.9698 || XE-de-en-de: 5.1637 || XE-en-de-en: 5.0166 || XE-fr-de-fr: 4.5534 || XE-de-fr-de: 5.0715 || ENC-L2-de: 4.5065 || ENC-L2-en: 4.4966 || ENC-L2-fr: 4.4840 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 3.16s (3.63%) | |
INFO - 09/13/18 11:02:52 - 1:01:02 - 1150 - 161.99 sent/s - 4329.00 words/s - XE-de-de: 3.1841 || XE-en-en: 3.5765 || XE-fr-fr: 2.9718 || XE-fr-en-fr: 4.4606 || XE-en-fr-en: 4.9266 || XE-de-en-de: 5.0740 || XE-en-de-en: 4.9775 || XE-fr-de-fr: 4.5375 || XE-de-fr-de: 5.0666 || ENC-L2-de: 4.4863 || ENC-L2-en: 4.5357 || ENC-L2-fr: 4.4629 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 3.35s (3.76%) | |
INFO - 09/13/18 11:04:43 - 1:02:53 - 1200 - 130.00 sent/s - 3452.00 words/s - XE-de-de: 3.1064 || XE-en-en: 3.1424 || XE-fr-fr: 2.8894 || XE-fr-en-fr: 4.4022 || XE-en-fr-en: 4.9726 || XE-de-en-de: 4.9774 || XE-en-de-en: 4.9623 || XE-fr-de-fr: 4.5384 || XE-de-fr-de: 5.0221 || ENC-L2-de: 4.4941 || ENC-L2-en: 4.5096 || ENC-L2-fr: 4.4971 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 4.19s (3.79%) | |
INFO - 09/13/18 11:07:33 - 1:05:43 - 1250 - 84.66 sent/s - 2244.00 words/s - XE-de-de: 3.1384 || XE-en-en: 3.4098 || XE-fr-fr: 2.8260 || XE-fr-en-fr: 4.3513 || XE-en-fr-en: 4.8802 || XE-de-en-de: 4.9761 || XE-en-de-en: 4.8844 || XE-fr-de-fr: 4.5242 || XE-de-fr-de: 5.0332 || ENC-L2-de: 4.4817 || ENC-L2-en: 4.5673 || ENC-L2-fr: 4.4980 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 19.84s (11.67%) | |
INFO - 09/13/18 11:10:25 - 1:08:36 - 1300 - 83.42 sent/s - 2196.00 words/s - XE-de-de: 3.1586 || XE-en-en: 3.2438 || XE-fr-fr: 2.8686 || XE-fr-en-fr: 4.3099 || XE-en-fr-en: 4.8719 || XE-de-en-de: 4.9780 || XE-en-de-en: 4.8575 || XE-fr-de-fr: 4.4215 || XE-de-fr-de: 4.9626 || ENC-L2-de: 4.4625 || ENC-L2-en: 4.5663 || ENC-L2-fr: 4.5007 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 15.88s (9.20%) | |
INFO - 09/13/18 11:13:19 - 1:11:30 - 1350 - 82.75 sent/s - 2147.00 words/s - XE-de-de: 3.0650 || XE-en-en: 3.1073 || XE-fr-fr: 2.7172 || XE-fr-en-fr: 4.3426 || XE-en-fr-en: 4.8547 || XE-de-en-de: 4.9650 || XE-en-de-en: 4.7980 || XE-fr-de-fr: 4.4431 || XE-de-fr-de: 4.9641 || ENC-L2-de: 4.4503 || ENC-L2-en: 4.5002 || ENC-L2-fr: 4.4855 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 11.18s (6.43%) | |
INFO - 09/13/18 11:16:19 - 1:14:30 - 1400 - 80.06 sent/s - 2109.00 words/s - XE-de-de: 2.8554 || XE-en-en: 3.1016 || XE-fr-fr: 2.6694 || XE-fr-en-fr: 4.3223 || XE-en-fr-en: 4.8540 || XE-de-en-de: 4.8630 || XE-en-de-en: 4.8418 || XE-fr-de-fr: 4.4160 || XE-de-fr-de: 4.9373 || ENC-L2-de: 4.4114 || ENC-L2-en: 4.5411 || ENC-L2-fr: 4.4895 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 19.49s (10.84%) | |
INFO - 09/13/18 11:19:20 - 1:17:30 - 1450 - 79.73 sent/s - 2062.00 words/s - XE-de-de: 2.8683 || XE-en-en: 3.0118 || XE-fr-fr: 2.5706 || XE-fr-en-fr: 4.3639 || XE-en-fr-en: 4.8125 || XE-de-en-de: 4.8789 || XE-en-de-en: 4.8695 || XE-fr-de-fr: 4.3726 || XE-de-fr-de: 4.9200 || ENC-L2-de: 4.3972 || ENC-L2-en: 4.5240 || ENC-L2-fr: 4.4721 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 22.11s (12.24%) | |
INFO - 09/13/18 11:22:35 - 1:20:45 - 1500 - 73.81 sent/s - 1930.00 words/s - XE-de-de: 2.8579 || XE-en-en: 2.9988 || XE-fr-fr: 2.6582 || XE-fr-en-fr: 4.3029 || XE-en-fr-en: 4.8013 || XE-de-en-de: 4.8592 || XE-en-de-en: 4.8074 || XE-fr-de-fr: 4.3366 || XE-de-fr-de: 4.9395 || ENC-L2-de: 4.4646 || ENC-L2-en: 4.5254 || ENC-L2-fr: 4.4673 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 34.97s (17.92%) | |
INFO - 09/13/18 11:25:53 - 1:24:04 - 1550 - 72.70 sent/s - 2000.00 words/s - XE-de-de: 2.7525 || XE-en-en: 3.0021 || XE-fr-fr: 2.5203 || XE-fr-en-fr: 4.2504 || XE-en-fr-en: 4.8721 || XE-de-en-de: 4.8798 || XE-en-de-en: 4.8150 || XE-fr-de-fr: 4.3972 || XE-de-fr-de: 4.9819 || ENC-L2-de: 4.4338 || ENC-L2-en: 4.5090 || ENC-L2-fr: 4.4204 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 37.74s (19.05%) | |
INFO - 09/13/18 11:28:50 - 1:27:00 - 1600 - 81.40 sent/s - 2133.00 words/s - XE-de-de: 2.8299 || XE-en-en: 2.9391 || XE-fr-fr: 2.4817 || XE-fr-en-fr: 4.2631 || XE-en-fr-en: 4.8282 || XE-de-en-de: 4.8516 || XE-en-de-en: 4.7445 || XE-fr-de-fr: 4.3456 || XE-de-fr-de: 4.9193 || ENC-L2-de: 4.4355 || ENC-L2-en: 4.4883 || ENC-L2-fr: 4.4194 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 19.26s (10.89%) | |
INFO - 09/13/18 11:31:57 - 1:30:08 - 1650 - 76.88 sent/s - 2036.00 words/s - XE-de-de: 2.7998 || XE-en-en: 2.8384 || XE-fr-fr: 2.4101 || XE-fr-en-fr: 4.2701 || XE-en-fr-en: 4.7609 || XE-de-en-de: 4.8786 || XE-en-de-en: 4.7284 || XE-fr-de-fr: 4.4097 || XE-de-fr-de: 4.8921 || ENC-L2-de: 4.4456 || ENC-L2-en: 4.4935 || ENC-L2-fr: 4.3764 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 28.25s (15.08%) | |
INFO - 09/13/18 11:34:54 - 1:33:05 - 1700 - 81.31 sent/s - 2193.00 words/s - XE-de-de: 2.7688 || XE-en-en: 3.0390 || XE-fr-fr: 2.4022 || XE-fr-en-fr: 4.2467 || XE-en-fr-en: 4.6909 || XE-de-en-de: 4.8881 || XE-en-de-en: 4.8022 || XE-fr-de-fr: 4.3272 || XE-de-fr-de: 4.9089 || ENC-L2-de: 4.4221 || ENC-L2-en: 4.5316 || ENC-L2-fr: 4.4339 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 16.93s (9.56%) | |
INFO - 09/13/18 11:37:13 - 1:35:24 - ====================== End of epoch 0 ====================== | |
INFO - 09/13/18 11:37:13 - 1:35:24 - Evaluating en -> fr (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
/home/XXXX/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py:54: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead. | |
warnings.warn(warning.format(ret)) | |
INFO - 09/13/18 11:41:41 - 1:39:52 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-fr.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.valid.txt : 0.390000 | |
INFO - 09/13/18 11:41:41 - 1:39:52 - Evaluating fr -> en (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 11:45:40 - 1:43:51 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-en.valid.txt ./dumped/endefr/4qq6ck8npo/ref.fr-en.valid.txt : 0.430000 | |
INFO - 09/13/18 11:45:40 - 1:43:51 - Evaluating en -> fr (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 11:49:21 - 1:47:32 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-fr.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.test.txt : 0.400000 | |
INFO - 09/13/18 11:49:21 - 1:47:32 - Evaluating fr -> en (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 11:53:28 - 1:51:39 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-en.test.txt ./dumped/endefr/4qq6ck8npo/ref.fr-en.test.txt : 0.520000 | |
INFO - 09/13/18 11:53:28 - 1:51:39 - Evaluating de -> en (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 11:57:20 - 1:55:31 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-en.valid.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.valid.txt : 0.240000 | |
INFO - 09/13/18 11:57:20 - 1:55:31 - Evaluating en -> de (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 12:01:09 - 1:59:19 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-de.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.valid.txt : 0.260000 | |
INFO - 09/13/18 12:01:09 - 1:59:19 - Evaluating de -> en (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 12:04:25 - 2:02:36 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-en.test.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.test.txt : 0.250000 | |
INFO - 09/13/18 12:04:25 - 2:02:36 - Evaluating en -> de (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 12:07:48 - 2:05:59 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-de.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.test.txt : 0.250000 | |
INFO - 09/13/18 12:07:48 - 2:05:59 - Evaluating fr -> en -> fr (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 12:23:58 - 2:22:09 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-en-fr.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.valid.txt : 0.200000 | |
INFO - 09/13/18 12:23:58 - 2:22:09 - Evaluating fr -> en -> fr (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 12:48:49 - 2:47:00 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-en-fr.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.test.txt : 0.190000 | |
INFO - 09/13/18 12:48:49 - 2:47:00 - Evaluating en -> fr -> en (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 13:10:36 - 3:08:46 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-fr-en.valid.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.valid.txt : 0.310000 | |
INFO - 09/13/18 13:10:36 - 3:08:46 - Evaluating en -> fr -> en (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 13:27:38 - 3:25:49 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-fr-en.test.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.test.txt : 0.250000 | |
INFO - 09/13/18 13:27:38 - 3:25:49 - Evaluating de -> en -> de (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 13:54:33 - 3:52:43 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-en-de.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.valid.txt : 0.080000 | |
INFO - 09/13/18 13:54:33 - 3:52:43 - Evaluating de -> en -> de (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:17:52 - 4:16:03 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-en-de.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.test.txt : 0.090000 | |
INFO - 09/13/18 14:17:52 - 4:16:03 - Evaluating en -> de -> en (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:24:14 - 4:22:24 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-de-en.valid.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.valid.txt : 0.180000 | |
INFO - 09/13/18 14:24:14 - 4:22:24 - Evaluating en -> de -> en (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:30:42 - 4:28:53 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.en-de-en.test.txt ./dumped/endefr/4qq6ck8npo/ref.de-en.test.txt : 0.120000 | |
INFO - 09/13/18 14:30:42 - 4:28:53 - Evaluating fr -> de -> fr (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:37:48 - 4:35:59 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-de-fr.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.valid.txt : 0.140000 | |
INFO - 09/13/18 14:37:48 - 4:35:59 - Evaluating fr -> de -> fr (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:44:56 - 4:43:06 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.fr-de-fr.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-fr.test.txt : 0.130000 | |
INFO - 09/13/18 14:44:56 - 4:43:06 - Evaluating de -> fr -> de (valid) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:51:36 - 4:49:47 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-fr-de.valid.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.valid.txt : 0.060000 | |
INFO - 09/13/18 14:51:36 - 4:49:47 - Evaluating de -> fr -> de (test) ... | |
It is in-advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer. | |
INFO - 09/13/18 14:58:35 - 4:56:45 - BLEU ./dumped/endefr/4qq6ck8npo/hyp0.de-fr-de.test.txt ./dumped/endefr/4qq6ck8npo/ref.en-de.test.txt : 0.060000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - epoch -> 0.000000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_fr_valid -> 586.660772 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_fr_valid -> 0.390000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_en_valid -> 191.347273 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_en_valid -> 0.430000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_fr_test -> 495.993368 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_fr_test -> 0.400000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_en_test -> 183.781728 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_en_test -> 0.520000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_en_valid -> 229.648550 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_en_valid -> 0.240000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_de_valid -> 202.325666 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_de_valid -> 0.260000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_en_test -> 258.004274 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_en_test -> 0.250000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_de_test -> 191.679544 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_de_test -> 0.250000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_en_fr_valid -> 740.776791 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_en_fr_valid -> 0.200000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_en_fr_test -> 669.614632 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_en_fr_test -> 0.190000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_fr_en_valid -> 242.840260 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_fr_en_valid -> 0.310000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_fr_en_test -> 290.027764 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_fr_en_test -> 0.250000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_en_de_valid -> 240.212048 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_en_de_valid -> 0.080000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_en_de_test -> 228.371681 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_en_de_test -> 0.090000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_de_en_valid -> 260.373977 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_de_en_valid -> 0.180000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_en_de_en_test -> 302.970384 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_en_de_en_test -> 0.120000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_de_fr_valid -> 807.375513 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_de_fr_valid -> 0.140000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_fr_de_fr_test -> 732.443883 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_fr_de_fr_test -> 0.130000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_fr_de_valid -> 251.991812 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_fr_de_valid -> 0.060000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - ppl_de_fr_de_test -> 242.677831 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - bleu_de_fr_de_test -> 0.060000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - __log__:{"epoch": 0, "ppl_en_fr_valid": 586.6607716348946, "bleu_en_fr_valid": 0.39, "ppl_fr_en_valid": 191.34727291369722, "bleu_fr_en_valid": 0.43, "ppl_en_fr_test": 495.99336788798763, "bleu_en_fr_test": 0.4, "ppl_fr_en_test": 183.7817278337195, "bleu_fr_en_test": 0.52, "ppl_de_en_valid": 229.64854960730497, "bleu_de_en_valid": 0.24, "ppl_en_de_valid": 202.3256660041768, "bleu_en_de_valid": 0.26, "ppl_de_en_test": 258.00427385867755, "bleu_de_en_test": 0.25, "ppl_en_de_test": 191.6795442447361, "bleu_en_de_test": 0.25, "ppl_fr_en_fr_valid": 740.7767910550868, "bleu_fr_en_fr_valid": 0.2, "ppl_fr_en_fr_test": 669.6146316300457, "bleu_fr_en_fr_test": 0.19, "ppl_en_fr_en_valid": 242.84026026424664, "bleu_en_fr_en_valid": 0.31, "ppl_en_fr_en_test": 290.02776430040296, "bleu_en_fr_en_test": 0.25, "ppl_de_en_de_valid": 240.21204772218277, "bleu_de_en_de_valid": 0.08, "ppl_de_en_de_test": 228.3716813467196, "bleu_de_en_de_test": 0.09, "ppl_en_de_en_valid": 260.3739766064779, "bleu_en_de_en_valid": 0.18, "ppl_en_de_en_test": 302.97038371770924, "bleu_en_de_en_test": 0.12, "ppl_fr_de_fr_valid": 807.3755125689298, "bleu_fr_de_fr_valid": 0.14, "ppl_fr_de_fr_test": 732.4438825844823, "bleu_fr_de_fr_test": 0.13, "ppl_de_fr_de_valid": 251.99181203777914, "bleu_de_fr_de_valid": 0.06, "ppl_de_fr_de_test": 242.67783114343322, "bleu_de_fr_de_test": 0.06} | |
INFO - 09/13/18 14:58:35 - 4:56:45 - New best score for bleu_en_fr_valid: 0.390000 | |
INFO - 09/13/18 14:58:35 - 4:56:45 - Saving model to ./dumped/endefr/4qq6ck8npo/best-bleu_en_fr_valid.pth ... | |
INFO - 09/13/18 14:58:35 - 4:56:46 - New best validation score: 0.390000 | |
INFO - 09/13/18 14:58:35 - 4:56:46 - Saving checkpoint to ./dumped/endefr/4qq6ck8npo/checkpoint.pth ... | |
INFO - 09/13/18 14:58:37 - 4:56:48 - Test: Parameters are shared correctly. | |
INFO - 09/13/18 14:58:37 - 4:56:48 - ====================== Starting epoch 1 ... ====================== | |
INFO - 09/13/18 14:59:20 - 4:57:30 - 1750 - 1.17 sent/s - 30.00 words/s - XE-de-de: 2.7984 || XE-en-en: 2.8205 || XE-fr-fr: 2.3793 || XE-fr-en-fr: 4.3073 || XE-en-fr-en: 4.8027 || XE-de-en-de: 4.7979 || XE-en-de-en: 4.7709 || XE-fr-de-fr: 4.2797 || XE-de-fr-de: 4.7897 || ENC-L2-de: 4.3802 || ENC-L2-en: 4.5135 || ENC-L2-fr: 4.4047 - LR enc=1.0000e-04,dec=1.0000e-04 - Sentences generation time: 17.91s (0.15%) | |
Traceback (most recent call last): | |
File "../main.py", line 317, in <module> | |
batches = next(otf_iterator) | |
File "/data/XXXX/nmt/unsupervised/UnsupervisedMT/NMT/src/trainer.py", line 561, in otf_bt_gen_async | |
results = cache[0].gen() | |
File "/data/XXXX/nmt/unsupervised/UnsupervisedMT/NMT/src/multiprocessing_event_loop.py", line 203, in gen | |
return next(self.generator) | |
File "/data/XXXX/nmt/unsupervised/UnsupervisedMT/NMT/src/multiprocessing_event_loop.py", line 73, in fetch_all_result_generator | |
result_type, result = self.return_pipes[rank].recv() | |
File "/home/XXXX/anaconda3/lib/python3.6/multiprocessing/connection.py", line 251, in recv | |
return _ForkingPickler.loads(buf.getbuffer()) | |
File "/home/XXXX/anaconda3/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 201, in rebuild_storage_fd | |
fd = df.detach() | |
File "/home/XXXX/anaconda3/lib/python3.6/multiprocessing/resource_sharer.py", line 58, in detach | |
return reduction.recv_handle(conn) | |
File "/home/XXXX/anaconda3/lib/python3.6/multiprocessing/reduction.py", line 182, in recv_handle | |
return recvfds(s, 1)[0] | |
File "/home/XXXX/anaconda3/lib/python3.6/multiprocessing/reduction.py", line 161, in recvfds | |
len(ancdata)) | |
RuntimeError: received 0 items of ancdata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment