Skip to content

Instantly share code, notes, and snippets.

@taylanbil
Created August 19, 2019 16:54
Show Gist options
  • Save taylanbil/1487f6555159c47548cf1fd1c1960ae6 to your computer and use it in GitHub Desktop.
Save taylanbil/1487f6555159c47548cf1fd1c1960ae6 to your computer and use it in GitHub Desktop.
Fairseq Transformer, 1 GPU, full dataset - measure 100 steps.
Fri Aug 16 18:52:31 UTC 2019
#!/bin/bash
taskname=fairseq_e2e_gpu
vol_fairseq=/home/taylanbil/fairseq/
vol_data=/home/taylanbil/data/wmt18_en_de_bpej32k
#vol_data=/home/taylanbil/data/dummy
python_cli="ipython -i"
python_cli="python"
other_flags="
--clip-norm 0.0 \
--num-workers=2 \
"
$python_cli $vol_fairseq/train.py \
$vol_data \
--arch=transformer_vaswani_wmt_en_de_big \
--max-source-positions=64 \
--max-target-positions=64 \
--required-batch-size-multiple=8 \
--max-tokens=4096 \
--no-save \
--attention-dropout=0.1 \
--no-progress-bar \
--criterion=label_smoothed_cross_entropy \
--log-interval=10 \
--source-lang=en \
--lr-scheduler=inverse_sqrt \
--min-lr 1e-09 \
--skip-invalid-size-inputs-valid-test \
--target-lang=de \
--label-smoothing=0.1 \
--curriculum=4 \
--max-epoch=50 \
--update-freq=1 \
--optimizer adam \
--warmup-init-lr 1e-07 \
--lr 0.0005 \
--warmup-updates 4000 \
--adam-betas='(0.9,0.98)' \
--share-all-embeddings \
--dropout 0.3 \
--weight-decay 0.0 \
--fp16 \
--distributed-world-size=1 \
--valid-subset=valid
--------------
nohup: ignoring input
Namespace(activation_dropout=0.0, activation_fn='relu', adam_betas='(0.9,0.98)', adam_eps=1e-08, adaptive_input=False, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer_vaswani_wmt_en_de_big', attention_dropout=0.1, best_checkpoint_metric='loss', bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', curriculum=4, data='/home/taylanbil/data/wmt18_en_de_bpej32k', dataset_impl='cached', ddp_backend='c10d', decoder_attention_heads=16, decoder_embed_dim=1024, decoder_embed_path=None, decoder_ffn_embed_dim=4096, decoder_input_dim=1024, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=1024, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_port=-1, distributed_rank=0, distributed_world_size=1, dropout=0.3, encoder_attention_heads=16, encoder_embed_dim=1024, encoder_embed_path=None, encoder_ffn_embed_dim=4096, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, find_unused_parameters=False, fix_batches_to_gpus=False, fp16=True, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=-1, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format=None, log_interval=10, lr=[0.0005], lr_scheduler='inverse_sqrt', max_epoch=50, max_sentences=None, max_sentences_valid=None, max_source_positions=64, max_target_positions=64, max_tokens=4096, max_tokens_valid=4096, max_update=0, maximize_best_checkpoint_metric=False, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, no_epoch_checkpoints=False, no_last_checkpoints=False, no_progress_bar=True, no_save=True, no_save_optimizer_state=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, required_batch_size_multiple=8, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=True, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=True, source_lang='en', target_lang='de', task='translation', tbmf_wrapper=False, tensorboard_logdir='', threshold_loss_scale=None, train_subset='train', update_freq=[1], upsample_primary=1, use_bmuf=False, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=4000, weight_decay=0.0)
| [en] dictionary: 35662 types
| [de] dictionary: 35662 types
| /home/taylanbil/data/wmt18_en_de_bpej32k valid en-de 52385 examples
TransformerModel(
(encoder): TransformerEncoder(
(embed_tokens): Embedding(35662, 1024, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(1): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(2): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(3): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(4): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(5): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
)
)
(decoder): TransformerDecoder(
(embed_tokens): Embedding(35662, 1024, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(1): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(2): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(3): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(4): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
(5): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
)
)
)
)
| model transformer_vaswani_wmt_en_de_big, criterion LabelSmoothedCrossEntropyCriterion
| num. model params: 212875264 (num. trained: 212875264)
| training on 1 GPUs
| max tokens per GPU = 4096 and max sentences per GPU = None
| no existing checkpoint found checkpoints/checkpoint_last.pt
| loading train data for epoch 0
| /home/taylanbil/data/wmt18_en_de_bpej32k train en-de 5186259 examples
| WARNING: 240829 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[1422704, 2718830, 2897878, 3673048, 2016896, 2200333, 3886976, 2097242, 3124502, 2871279]
| WARNING: overflow detected, setting loss scale to: 64.0
| WARNING: overflow detected, setting loss scale to: 32.0
| WARNING: overflow detected, setting loss scale to: 16.0
| epoch 001: 10 / 35447 loss=16.071, nll_loss=16.096, ppl=70054.88, wps=338, ups=0, wpb=4096.000, bsz=2048.000, num_updates=8, lr=1.0998e-06, gnorm=23.891, clip=0.000, oom=0.000, loss_scale=16.000, wall=97, train_wall=4
| epoch 001: 20 / 35447 loss=15.214, nll_loss=15.144, ppl=36217.42, wps=728, ups=0, wpb=4076.611, bsz=1704.000, num_updates=18, lr=2.34955e-06, gnorm=22.061, clip=0.000, oom=0.000, loss_scale=16.000, wall=101, train_wall=6
| epoch 001: 30 / 35447 loss=13.856, nll_loss=13.629, ppl=12670.91, wps=1091, ups=0, wpb=4077.821, bsz=1581.143, num_updates=28, lr=3.5993e-06, gnorm=19.498, clip=0.000, oom=0.000, loss_scale=16.000, wall=105, train_wall=9
| epoch 001: 40 / 35447 loss=12.751, nll_loss=12.373, ppl=5303.14, wps=1428, ups=0, wpb=4078.395, bsz=1522.947, num_updates=38, lr=4.84905e-06, gnorm=17.021, clip=0.000, oom=0.000, loss_scale=16.000, wall=109, train_wall=12
| epoch 001: 50 / 35447 loss=12.527, nll_loss=12.103, ppl=4400.25, wps=1740, ups=0, wpb=4059.979, bsz=1440.000, num_updates=48, lr=6.0988e-06, gnorm=15.772, clip=0.000, oom=0.000, loss_scale=16.000, wall=112, train_wall=14
| epoch 001: 60 / 35447 loss=12.346, nll_loss=11.903, ppl=3830.89, wps=1979, ups=1, wpb=3926.345, bsz=1368.276, num_updates=58, lr=7.34855e-06, gnorm=14.178, clip=0.000, oom=0.000, loss_scale=16.000, wall=115, train_wall=16
| epoch 001: 70 / 35447 loss=12.275, nll_loss=11.826, ppl=3630.54, wps=2268, ups=1, wpb=3951.294, bsz=1317.647, num_updates=68, lr=8.5983e-06, gnorm=12.838, clip=0.000, oom=0.000, loss_scale=16.000, wall=118, train_wall=19
| epoch 001: 80 / 35447 loss=12.179, nll_loss=11.721, ppl=3375.73, wps=2542, ups=1, wpb=3965.026, bsz=1272.000, num_updates=78, lr=9.84805e-06, gnorm=11.687, clip=0.000, oom=0.000, loss_scale=16.000, wall=122, train_wall=21
| epoch 001: 90 / 35447 loss=12.162, nll_loss=11.702, ppl=3332.45, wps=2768, ups=1, wpb=3918.852, bsz=1220.182, num_updates=88, lr=1.10978e-05, gnorm=10.780, clip=0.000, oom=0.000, loss_scale=16.000, wall=125, train_wall=23
| epoch 001: 100 / 35447 loss=12.128, nll_loss=11.665, ppl=3247.81, wps=2998, ups=1, wpb=3899.990, bsz=1178.939, num_updates=98, lr=1.23476e-05, gnorm=9.974, clip=0.000, oom=0.000, loss_scale=16.000, wall=127, train_wall=25
| epoch 001: 110 / 35447 loss=12.125, nll_loss=11.663, ppl=3241.89, wps=3240, ups=1, wpb=3915.824, bsz=1142.815, num_updates=108, lr=1.35973e-05, gnorm=9.255, clip=0.000, oom=0.000, loss_scale=16.000, wall=131, train_wall=28
| epoch 001: 120 / 35447 loss=12.136, nll_loss=11.676, ppl=3272.83, wps=3476, ups=1, wpb=3925.254, bsz=1101.966, num_updates=118, lr=1.48471e-05, gnorm=8.663, clip=0.000, oom=0.000, loss_scale=16.000, wall=133, train_wall=30
| epoch 001: 130 / 35447 loss=12.114, nll_loss=11.653, ppl=3221.23, wps=3684, ups=1, wpb=3909.312, bsz=1064.500, num_updates=128, lr=1.60968e-05, gnorm=8.284, clip=0.000, oom=0.000, loss_scale=16.000, wall=136, train_wall=32
| epoch 001: 140 / 35447 loss=11.989, nll_loss=11.514, ppl=2924.37, wps=3859, ups=1, wpb=3872.406, bsz=1036.638, num_updates=138, lr=1.73466e-05, gnorm=7.980, clip=0.000, oom=0.000, loss_scale=16.000, wall=138, train_wall=34
| epoch 001: 150 / 35447 loss=11.935, nll_loss=11.452, ppl=2802.40, wps=4070, ups=1, wpb=3885.297, bsz=1012.541, num_updates=148, lr=1.85963e-05, gnorm=7.771, clip=0.000, oom=0.000, loss_scale=16.000, wall=141, train_wall=36
| epoch 001: 160 / 35447 loss=11.812, nll_loss=11.315, ppl=2547.54, wps=4274, ups=1, wpb=3897.620, bsz=991.494, num_updates=158, lr=1.98461e-05, gnorm=7.481, clip=0.000, oom=0.000, loss_scale=16.000, wall=144, train_wall=38
| epoch 001: 170 / 35447 loss=11.697, nll_loss=11.185, ppl=2328.18, wps=4468, ups=1, wpb=3905.405, bsz=970.667, num_updates=168, lr=2.10958e-05, gnorm=7.233, clip=0.000, oom=0.000, loss_scale=16.000, wall=147, train_wall=40
| epoch 001: 180 / 35447 loss=11.650, nll_loss=11.133, ppl=2245.22, wps=4664, ups=1, wpb=3915.663, bsz=948.944, num_updates=178, lr=2.23456e-05, gnorm=7.037, clip=0.000, oom=0.000, loss_scale=16.000, wall=149, train_wall=42
| epoch 001: 190 / 35447 loss=11.620, nll_loss=11.098, ppl=2191.69, wps=4857, ups=1, wpb=3924.931, bsz=926.851, num_updates=188, lr=2.35953e-05, gnorm=6.892, clip=0.000, oom=0.000, loss_scale=16.000, wall=152, train_wall=44
| epoch 001: 200 / 35447 loss=11.592, nll_loss=11.064, ppl=2141.12, wps=4990, ups=1, wpb=3892.530, bsz=908.444, num_updates=198, lr=2.48451e-05, gnorm=6.953, clip=0.000, oom=0.000, loss_scale=16.000, wall=154, train_wall=46
| epoch 001: 210 / 35447 loss=11.544, nll_loss=11.010, ppl=2061.99, wps=5133, ups=1, wpb=3873.851, bsz=892.846, num_updates=208, lr=2.60948e-05, gnorm=6.843, clip=0.000, oom=0.000, loss_scale=16.000, wall=157, train_wall=48
| epoch 001: 220 / 35447 loss=11.483, nll_loss=10.940, ppl=1964.61, wps=5301, ups=1, wpb=3881.151, bsz=878.679, num_updates=218, lr=2.73446e-05, gnorm=6.616, clip=0.000, oom=0.000, loss_scale=16.000, wall=160, train_wall=50
| epoch 001: 230 / 35447 loss=11.408, nll_loss=10.855, ppl=1851.85, wps=5467, ups=1, wpb=3890.224, bsz=865.754, num_updates=228, lr=2.85943e-05, gnorm=6.389, clip=0.000, oom=0.000, loss_scale=16.000, wall=162, train_wall=52
| epoch 001: 240 / 35447 loss=11.344, nll_loss=10.782, ppl=1760.33, wps=5630, ups=1, wpb=3898.319, bsz=852.706, num_updates=238, lr=2.98441e-05, gnorm=6.232, clip=0.000, oom=0.000, loss_scale=16.000, wall=165, train_wall=55
| epoch 001: 250 / 35447 loss=11.294, nll_loss=10.724, ppl=1691.20, wps=5794, ups=1, wpb=3906.290, bsz=838.968, num_updates=248, lr=3.10938e-05, gnorm=6.089, clip=0.000, oom=0.000, loss_scale=16.000, wall=167, train_wall=57
| epoch 001: 260 / 35447 loss=11.247, nll_loss=10.669, ppl=1628.57, wps=5950, ups=2, wpb=3912.229, bsz=825.302, num_updates=258, lr=3.23436e-05, gnorm=5.959, clip=0.000, oom=0.000, loss_scale=16.000, wall=170, train_wall=58
| epoch 001: 270 / 35447 loss=11.216, nll_loss=10.633, ppl=1587.84, wps=6102, ups=2, wpb=3916.858, bsz=810.925, num_updates=268, lr=3.35933e-05, gnorm=5.839, clip=0.000, oom=0.000, loss_scale=16.000, wall=172, train_wall=60
| epoch 001: 280 / 35447 loss=11.181, nll_loss=10.593, ppl=1544.23, wps=6238, ups=2, wpb=3911.273, bsz=797.554, num_updates=278, lr=3.48431e-05, gnorm=5.751, clip=0.000, oom=0.000, loss_scale=16.000, wall=174, train_wall=62
| epoch 001: 290 / 35447 loss=11.144, nll_loss=10.549, ppl=1498.33, wps=6349, ups=2, wpb=3891.934, bsz=787.639, num_updates=288, lr=3.60928e-05, gnorm=5.712, clip=0.000, oom=0.000, loss_scale=16.000, wall=177, train_wall=64
| epoch 001: 300 / 35447 loss=11.097, nll_loss=10.494, ppl=1442.45, wps=6469, ups=2, wpb=3881.601, bsz=778.389, num_updates=298, lr=3.73426e-05, gnorm=5.603, clip=0.000, oom=0.000, loss_scale=16.000, wall=179, train_wall=66
| epoch 001: 310 / 35447 loss=11.055, nll_loss=10.446, ppl=1394.82, wps=6600, ups=2, wpb=3881.062, bsz=769.740, num_updates=308, lr=3.85923e-05, gnorm=5.510, clip=0.000, oom=0.000, loss_scale=16.000, wall=181, train_wall=68
| epoch 001: 320 / 35447 loss=11.014, nll_loss=10.397, ppl=1348.15, wps=6741, ups=2, wpb=3887.821, bsz=761.635, num_updates=318, lr=3.98421e-05, gnorm=5.402, clip=0.000, oom=0.000, loss_scale=16.000, wall=183, train_wall=70
| epoch 001: 330 / 35447 loss=10.965, nll_loss=10.340, ppl=1296.36, wps=6877, ups=2, wpb=3894.168, bsz=754.024, num_updates=328, lr=4.10918e-05, gnorm=5.280, clip=0.000, oom=0.000, loss_scale=16.000, wall=186, train_wall=71
| epoch 001: 340 / 35447 loss=10.922, nll_loss=10.290, ppl=1251.63, wps=7005, ups=2, wpb=3898.450, bsz=746.107, num_updates=338, lr=4.23416e-05, gnorm=5.179, clip=0.000, oom=0.000, loss_scale=16.000, wall=188, train_wall=73
| epoch 001: 350 / 35447 loss=10.888, nll_loss=10.250, ppl=1217.74, wps=7133, ups=2, wpb=3902.287, bsz=737.540, num_updates=348, lr=4.35913e-05, gnorm=5.093, clip=0.000, oom=0.000, loss_scale=16.000, wall=190, train_wall=75
| epoch 001: 360 / 35447 loss=10.850, nll_loss=10.205, ppl=1180.74, wps=7257, ups=2, wpb=3905.911, bsz=729.453, num_updates=358, lr=4.48411e-05, gnorm=4.991, clip=0.000, oom=0.000, loss_scale=16.000, wall=193, train_wall=77
| epoch 001: 370 / 35447 loss=10.816, nll_loss=10.166, ppl=1148.58, wps=7379, ups=2, wpb=3909.022, bsz=721.152, num_updates=368, lr=4.60908e-05, gnorm=4.914, clip=0.000, oom=0.000, loss_scale=16.000, wall=195, train_wall=79
| epoch 001: 380 / 35447 loss=10.781, nll_loss=10.125, ppl=1116.31, wps=7503, ups=2, wpb=3913.545, bsz=712.868, num_updates=378, lr=4.73406e-05, gnorm=4.836, clip=0.000, oom=0.000, loss_scale=16.000, wall=197, train_wall=81
| epoch 001: 390 / 35447 loss=10.750, nll_loss=10.088, ppl=1088.67, wps=7624, ups=2, wpb=3917.149, bsz=704.598, num_updates=388, lr=4.85903e-05, gnorm=4.758, clip=0.000, oom=0.000, loss_scale=16.000, wall=199, train_wall=83
| epoch 001: 400 / 35447 loss=10.729, nll_loss=10.062, ppl=1069.12, wps=7744, ups=2, wpb=3920.437, bsz=696.141, num_updates=398, lr=4.98401e-05, gnorm=4.680, clip=0.000, oom=0.000, loss_scale=16.000, wall=202, train_wall=84
| epoch 001: 410 / 35447 loss=10.705, nll_loss=10.035, ppl=1048.83, wps=7816, ups=2, wpb=3902.809, bsz=689.863, num_updates=408, lr=5.10898e-05, gnorm=4.677, clip=0.000, oom=0.000, loss_scale=16.000, wall=204, train_wall=86
| epoch 001: 420 / 35447 loss=10.677, nll_loss=10.002, ppl=1025.33, wps=7897, ups=2, wpb=3891.821, bsz=684.077, num_updates=418, lr=5.23396e-05, gnorm=4.631, clip=0.000, oom=0.000, loss_scale=16.000, wall=206, train_wall=88
| epoch 001: 430 / 35447 loss=10.647, nll_loss=9.965, ppl=999.79, wps=7982, ups=2, wpb=3884.629, bsz=678.561, num_updates=428, lr=5.35893e-05, gnorm=4.556, clip=0.000, oom=0.000, loss_scale=16.000, wall=208, train_wall=90
| epoch 001: 440 / 35447 loss=10.613, nll_loss=9.926, ppl=972.91, wps=8066, ups=2, wpb=3877.765, bsz=673.297, num_updates=438, lr=5.48391e-05, gnorm=4.478, clip=0.000, oom=0.000, loss_scale=16.000, wall=211, train_wall=92
| epoch 001: 450 / 35447 loss=10.588, nll_loss=9.897, ppl=953.38, wps=8160, ups=2, wpb=3880.536, bsz=668.268, num_updates=448, lr=5.60888e-05, gnorm=4.429, clip=0.000, oom=0.000, loss_scale=16.000, wall=213, train_wall=94
| epoch 001: 460 / 35447 loss=10.559, nll_loss=9.863, ppl=931.50, wps=8253, ups=2, wpb=3883.843, bsz=663.459, num_updates=458, lr=5.73386e-05, gnorm=4.360, clip=0.000, oom=0.000, loss_scale=16.000, wall=216, train_wall=96
| epoch 001: 470 / 35447 loss=10.529, nll_loss=9.828, ppl=909.21, wps=8344, ups=2, wpb=3887.009, bsz=658.855, num_updates=468, lr=5.85883e-05, gnorm=4.289, clip=0.000, oom=0.000, loss_scale=16.000, wall=218, train_wall=98
| epoch 001: 480 / 35447 loss=10.498, nll_loss=9.791, ppl=886.12, wps=8433, ups=2, wpb=3890.042, bsz=654.444, num_updates=478, lr=5.98381e-05, gnorm=4.220, clip=0.000, oom=0.000, loss_scale=16.000, wall=220, train_wall=100
| epoch 001: 490 / 35447 loss=10.482, nll_loss=9.772, ppl=874.34, wps=8525, ups=2, wpb=3893.803, bsz=649.393, num_updates=488, lr=6.10878e-05, gnorm=4.183, clip=0.000, oom=0.000, loss_scale=16.000, wall=223, train_wall=102
| epoch 001: 500 / 35447 loss=10.463, nll_loss=9.750, ppl=861.15, wps=8616, ups=2, wpb=3897.542, bsz=644.546, num_updates=498, lr=6.23376e-05, gnorm=4.125, clip=0.000, oom=0.000, loss_scale=16.000, wall=225, train_wall=104
| epoch 001: 510 / 35447 loss=10.443, nll_loss=9.727, ppl=847.22, wps=8705, ups=2, wpb=3901.134, bsz=639.890, num_updates=508, lr=6.35873e-05, gnorm=4.065, clip=0.000, oom=0.000, loss_scale=16.000, wall=228, train_wall=106
| epoch 001: 520 / 35447 loss=10.428, nll_loss=9.709, ppl=836.84, wps=8791, ups=2, wpb=3903.911, bsz=634.950, num_updates=518, lr=6.48371e-05, gnorm=4.025, clip=0.000, oom=0.000, loss_scale=16.000, wall=230, train_wall=108
| epoch 001: 530 / 35447 loss=10.410, nll_loss=9.687, ppl=824.50, wps=8878, ups=2, wpb=3906.640, bsz=629.894, num_updates=528, lr=6.60868e-05, gnorm=3.977, clip=0.000, oom=0.000, loss_scale=16.000, wall=232, train_wall=110
| epoch 001: 540 / 35447 loss=10.390, nll_loss=9.664, ppl=811.01, wps=8963, ups=2, wpb=3909.268, bsz=625.026, num_updates=538, lr=6.73366e-05, gnorm=3.926, clip=0.000, oom=0.000, loss_scale=16.000, wall=235, train_wall=112
| epoch 001: 550 / 35447 loss=10.376, nll_loss=9.647, ppl=801.58, wps=9047, ups=2, wpb=3911.029, bsz=619.927, num_updates=548, lr=6.85863e-05, gnorm=3.887, clip=0.000, oom=0.000, loss_scale=16.000, wall=237, train_wall=114
| epoch 001: 560 / 35447 loss=10.363, nll_loss=9.632, ppl=793.18, wps=9132, ups=2, wpb=3913.197, bsz=614.839, num_updates=558, lr=6.98361e-05, gnorm=3.842, clip=0.000, oom=0.000, loss_scale=16.000, wall=239, train_wall=116
| epoch 001: 570 / 35447 loss=10.356, nll_loss=9.623, ppl=788.64, wps=9215, ups=2, wpb=3915.092, bsz=609.676, num_updates=568, lr=7.10858e-05, gnorm=3.802, clip=0.000, oom=0.000, loss_scale=16.000, wall=241, train_wall=118
| epoch 001: 580 / 35447 loss=10.356, nll_loss=9.622, ppl=787.97, wps=9285, ups=2, wpb=3911.405, bsz=604.858, num_updates=578, lr=7.23356e-05, gnorm=3.779, clip=0.000, oom=0.000, loss_scale=16.000, wall=243, train_wall=120
| epoch 001: 590 / 35447 loss=10.342, nll_loss=9.606, ppl=779.11, wps=9329, ups=2, wpb=3897.262, bsz=601.510, num_updates=588, lr=7.35853e-05, gnorm=3.762, clip=0.000, oom=0.000, loss_scale=16.000, wall=246, train_wall=122
| epoch 001: 600 / 35447 loss=10.322, nll_loss=9.583, ppl=766.91, wps=9378, ups=2, wpb=3886.672, bsz=598.274, num_updates=598, lr=7.48351e-05, gnorm=3.732, clip=0.000, oom=0.000, loss_scale=16.000, wall=248, train_wall=123
| epoch 001: 610 / 35447 loss=10.306, nll_loss=9.564, ppl=756.95, wps=9434, ups=2, wpb=3881.391, bsz=595.145, num_updates=608, lr=7.60848e-05, gnorm=3.705, clip=0.000, oom=0.000, loss_scale=16.000, wall=250, train_wall=125
| epoch 001: 620 / 35447 loss=10.288, nll_loss=9.543, ppl=745.91, wps=9491, ups=2, wpb=3878.003, bsz=592.117, num_updates=618, lr=7.73346e-05, gnorm=3.665, clip=0.000, oom=0.000, loss_scale=16.000, wall=253, train_wall=127
| epoch 001: 630 / 35447 loss=10.267, nll_loss=9.519, ppl=733.64, wps=9547, ups=2, wpb=3874.723, bsz=589.185, num_updates=628, lr=7.85843e-05, gnorm=3.623, clip=0.000, oom=0.000, loss_scale=16.000, wall=255, train_wall=129
| epoch 001: 640 / 35447 loss=10.247, nll_loss=9.495, ppl=721.68, wps=9603, ups=2, wpb=3872.498, bsz=586.345, num_updates=638, lr=7.98341e-05, gnorm=3.587, clip=0.000, oom=0.000, loss_scale=16.000, wall=257, train_wall=131
| epoch 001: 650 / 35447 loss=10.231, nll_loss=9.477, ppl=712.60, wps=9670, ups=2, wpb=3875.701, bsz=583.593, num_updates=648, lr=8.10838e-05, gnorm=3.559, clip=0.000, oom=0.000, loss_scale=16.000, wall=260, train_wall=133
| epoch 001: 660 / 35447 loss=10.213, nll_loss=9.455, ppl=702.03, wps=9736, ups=3, wpb=3878.805, bsz=580.924, num_updates=658, lr=8.23336e-05, gnorm=3.522, clip=0.000, oom=0.000, loss_scale=16.000, wall=262, train_wall=135
| epoch 001: 670 / 35447 loss=10.192, nll_loss=9.431, ppl=690.24, wps=9800, ups=3, wpb=3881.817, bsz=578.335, num_updates=668, lr=8.35833e-05, gnorm=3.484, clip=0.000, oom=0.000, loss_scale=16.000, wall=265, train_wall=138
| epoch 001: 680 / 35447 loss=10.170, nll_loss=9.406, ppl=678.26, wps=9859, ups=3, wpb=3884.740, bsz=575.823, num_updates=678, lr=8.48331e-05, gnorm=3.447, clip=0.000, oom=0.000, loss_scale=16.000, wall=267, train_wall=140
| epoch 001: 690 / 35447 loss=10.150, nll_loss=9.382, ppl=667.37, wps=9921, ups=3, wpb=3887.472, bsz=573.326, num_updates=688, lr=8.60828e-05, gnorm=3.415, clip=0.000, oom=0.000, loss_scale=16.000, wall=270, train_wall=142
| epoch 001: 700 / 35447 loss=10.138, nll_loss=9.368, ppl=660.86, wps=9985, ups=3, wpb=3889.772, bsz=570.384, num_updates=698, lr=8.73326e-05, gnorm=3.392, clip=0.000, oom=0.000, loss_scale=16.000, wall=272, train_wall=144
| epoch 001: 710 / 35447 loss=10.125, nll_loss=9.353, ppl=654.06, wps=10047, ups=3, wpb=3892.007, bsz=567.525, num_updates=708, lr=8.85823e-05, gnorm=3.359, clip=0.000, oom=0.000, loss_scale=16.000, wall=274, train_wall=146
| epoch 001: 720 / 35447 loss=10.111, nll_loss=9.337, ppl=646.73, wps=10109, ups=3, wpb=3894.180, bsz=564.747, num_updates=718, lr=8.98321e-05, gnorm=3.327, clip=0.000, oom=0.000, loss_scale=16.000, wall=277, train_wall=148
| epoch 001: 730 / 35447 loss=10.097, nll_loss=9.320, ppl=639.16, wps=10166, ups=3, wpb=3896.293, bsz=562.044, num_updates=728, lr=9.10818e-05, gnorm=3.295, clip=0.000, oom=0.000, loss_scale=16.000, wall=279, train_wall=150
| epoch 001: 740 / 35447 loss=10.087, nll_loss=9.308, ppl=633.92, wps=10227, ups=3, wpb=3898.103, bsz=559.198, num_updates=738, lr=9.23316e-05, gnorm=3.272, clip=0.000, oom=0.000, loss_scale=16.000, wall=281, train_wall=152
| epoch 001: 750 / 35447 loss=10.081, nll_loss=9.301, ppl=630.88, wps=10288, ups=3, wpb=3899.893, bsz=556.214, num_updates=748, lr=9.35813e-05, gnorm=3.246, clip=0.000, oom=0.000, loss_scale=16.000, wall=284, train_wall=154
| epoch 001: 760 / 35447 loss=10.073, nll_loss=9.292, ppl=626.82, wps=10348, ups=3, wpb=3901.636, bsz=553.309, num_updates=758, lr=9.48311e-05, gnorm=3.217, clip=0.000, oom=0.000, loss_scale=16.000, wall=286, train_wall=156
| epoch 001: 770 / 35447 loss=10.066, nll_loss=9.283, ppl=622.82, wps=10406, ups=3, wpb=3903.333, bsz=550.479, num_updates=768, lr=9.60808e-05, gnorm=3.189, clip=0.000, oom=0.000, loss_scale=16.000, wall=288, train_wall=157
| epoch 001: 780 / 35447 loss=10.065, nll_loss=9.281, ppl=622.13, wps=10466, ups=3, wpb=3904.895, bsz=547.414, num_updates=778, lr=9.73306e-05, gnorm=3.170, clip=0.000, oom=0.000, loss_scale=16.000, wall=290, train_wall=159
| epoch 001: 790 / 35447 loss=10.062, nll_loss=9.278, ppl=620.63, wps=10525, ups=3, wpb=3906.812, bsz=544.426, num_updates=788, lr=9.85803e-05, gnorm=3.143, clip=0.000, oom=0.000, loss_scale=16.000, wall=292, train_wall=161
| epoch 001: 800 / 35447 loss=10.061, nll_loss=9.276, ppl=619.80, wps=10584, ups=3, wpb=3908.307, bsz=541.393, num_updates=798, lr=9.98301e-05, gnorm=3.119, clip=0.000, oom=0.000, loss_scale=16.000, wall=295, train_wall=163
| epoch 001: 810 / 35447 loss=10.061, nll_loss=9.276, ppl=619.85, wps=10643, ups=3, wpb=3909.838, bsz=538.257, num_updates=808, lr=0.00010108, gnorm=3.096, clip=0.000, oom=0.000, loss_scale=16.000, wall=297, train_wall=165
| epoch 001: 820 / 35447 loss=10.059, nll_loss=9.273, ppl=618.81, wps=10684, ups=3, wpb=3903.859, bsz=535.687, num_updates=818, lr=0.00010233, gnorm=3.083, clip=0.000, oom=0.000, loss_scale=16.000, wall=299, train_wall=167
| epoch 001: 830 / 35447 loss=10.051, nll_loss=9.263, ppl=614.49, wps=10711, ups=3, wpb=3894.052, bsz=533.662, num_updates=828, lr=0.000103579, gnorm=3.074, clip=0.000, oom=0.000, loss_scale=16.000, wall=301, train_wall=169
| epoch 001: 840 / 35447 loss=10.040, nll_loss=9.251, ppl=609.14, wps=10743, ups=3, wpb=3887.106, bsz=531.685, num_updates=838, lr=0.000104829, gnorm=3.063, clip=0.000, oom=0.000, loss_scale=16.000, wall=303, train_wall=170
| epoch 001: 850 / 35447 loss=10.027, nll_loss=9.235, ppl=602.71, wps=10772, ups=3, wpb=3880.324, bsz=529.755, num_updates=848, lr=0.000106079, gnorm=3.042, clip=0.000, oom=0.000, loss_scale=16.000, wall=305, train_wall=172
| epoch 001: 860 / 35447 loss=10.014, nll_loss=9.220, ppl=596.49, wps=10811, ups=3, wpb=3877.836, bsz=527.869, num_updates=858, lr=0.000107329, gnorm=3.028, clip=0.000, oom=0.000, loss_scale=16.000, wall=308, train_wall=174
| epoch 001: 870 / 35447 loss=10.000, nll_loss=9.204, ppl=589.85, wps=10849, ups=3, wpb=3875.556, bsz=526.028, num_updates=868, lr=0.000108578, gnorm=3.006, clip=0.000, oom=0.000, loss_scale=16.000, wall=310, train_wall=176
| epoch 001: 880 / 35447 loss=9.984, nll_loss=9.186, ppl=582.60, wps=10887, ups=3, wpb=3873.329, bsz=524.228, num_updates=878, lr=0.000109828, gnorm=2.984, clip=0.000, oom=0.000, loss_scale=16.000, wall=312, train_wall=178
| epoch 001: 890 / 35447 loss=9.969, nll_loss=9.168, ppl=575.37, wps=10925, ups=3, wpb=3871.152, bsz=522.468, num_updates=888, lr=0.000111078, gnorm=2.963, clip=0.000, oom=0.000, loss_scale=16.000, wall=315, train_wall=180
| epoch 001: 900 / 35447 loss=9.956, nll_loss=9.154, ppl=569.72, wps=10965, ups=3, wpb=3871.078, bsz=520.748, num_updates=898, lr=0.000112328, gnorm=2.947, clip=0.000, oom=0.000, loss_scale=16.000, wall=317, train_wall=182
| epoch 001: 910 / 35447 loss=9.945, nll_loss=9.141, ppl=564.62, wps=11011, ups=3, wpb=3873.026, bsz=519.066, num_updates=908, lr=0.000113577, gnorm=2.928, clip=0.000, oom=0.000, loss_scale=16.000, wall=319, train_wall=184
| epoch 001: 920 / 35447 loss=9.933, nll_loss=9.126, ppl=558.84, wps=11054, ups=3, wpb=3874.932, bsz=517.420, num_updates=918, lr=0.000114827, gnorm=2.907, clip=0.000, oom=0.000, loss_scale=16.000, wall=322, train_wall=186
| epoch 001: 930 / 35447 loss=9.919, nll_loss=9.111, ppl=552.84, wps=11097, ups=3, wpb=3876.797, bsz=515.810, num_updates=928, lr=0.000116077, gnorm=2.887, clip=0.000, oom=0.000, loss_scale=16.000, wall=324, train_wall=188
| epoch 001: 940 / 35447 loss=9.905, nll_loss=9.094, ppl=546.62, wps=11139, ups=3, wpb=3878.623, bsz=514.235, num_updates=938, lr=0.000117327, gnorm=2.868, clip=0.000, oom=0.000, loss_scale=16.000, wall=327, train_wall=190
| epoch 001: 950 / 35447 loss=9.890, nll_loss=9.078, ppl=540.34, wps=11182, ups=3, wpb=3880.409, bsz=512.692, num_updates=948, lr=0.000118576, gnorm=2.849, clip=0.000, oom=0.000, loss_scale=16.000, wall=329, train_wall=192
| epoch 001: 960 / 35447 loss=9.881, nll_loss=9.066, ppl=536.05, wps=11224, ups=3, wpb=3881.767, bsz=510.981, num_updates=958, lr=0.000119826, gnorm=2.836, clip=0.000, oom=0.000, loss_scale=16.000, wall=331, train_wall=194
| epoch 001: 970 / 35447 loss=9.872, nll_loss=9.057, ppl=532.51, wps=11268, ups=3, wpb=3883.319, bsz=509.174, num_updates=968, lr=0.000121076, gnorm=2.820, clip=0.000, oom=0.000, loss_scale=16.000, wall=334, train_wall=196
| epoch 001: 980 / 35447 loss=9.863, nll_loss=9.046, ppl=528.53, wps=11310, ups=3, wpb=3884.839, bsz=507.403, num_updates=978, lr=0.000122326, gnorm=2.803, clip=0.000, oom=0.000, loss_scale=16.000, wall=336, train_wall=198
| epoch 001: 990 / 35447 loss=9.853, nll_loss=9.034, ppl=524.23, wps=11353, ups=3, wpb=3886.329, bsz=505.668, num_updates=988, lr=0.000123575, gnorm=2.786, clip=0.000, oom=0.000, loss_scale=16.000, wall=338, train_wall=200
| epoch 001: 1000 / 35447 loss=9.842, nll_loss=9.022, ppl=519.70, wps=11395, ups=3, wpb=3887.789, bsz=503.968, num_updates=998, lr=0.000124825, gnorm=2.769, clip=0.000, oom=0.000, loss_scale=16.000, wall=341, train_wall=202
| epoch 001: 1010 / 35447 loss=9.832, nll_loss=9.010, ppl=515.45, wps=11436, ups=3, wpb=3889.088, bsz=502.278, num_updates=1008, lr=0.000126075, gnorm=2.753, clip=0.000, oom=0.000, loss_scale=16.000, wall=343, train_wall=204
| epoch 001: 1020 / 35447 loss=9.828, nll_loss=9.005, ppl=513.74, wps=11480, ups=3, wpb=3890.728, bsz=500.409, num_updates=1018, lr=0.000127325, gnorm=2.744, clip=0.000, oom=0.000, loss_scale=16.000, wall=345, train_wall=206
| epoch 001: 1030 / 35447 loss=9.823, nll_loss=8.999, ppl=511.61, wps=11519, ups=3, wpb=3892.336, bsz=498.576, num_updates=1028, lr=0.000128574, gnorm=2.729, clip=0.000, oom=0.000, loss_scale=16.000, wall=347, train_wall=208
| epoch 001: 1040 / 35447 loss=9.817, nll_loss=8.992, ppl=509.06, wps=11561, ups=3, wpb=3893.912, bsz=496.778, num_updates=1038, lr=0.000129824, gnorm=2.714, clip=0.000, oom=0.000, loss_scale=16.000, wall=350, train_wall=210
| epoch 001: 1050 / 35447 loss=9.810, nll_loss=8.984, ppl=506.21, wps=11602, ups=3, wpb=3895.459, bsz=495.015, num_updates=1048, lr=0.000131074, gnorm=2.698, clip=0.000, oom=0.000, loss_scale=16.000, wall=352, train_wall=212
| epoch 001: 1060 / 35447 loss=9.807, nll_loss=8.980, ppl=504.85, wps=11644, ups=3, wpb=3896.719, bsz=493.119, num_updates=1058, lr=0.000132324, gnorm=2.687, clip=0.000, oom=0.000, loss_scale=16.000, wall=354, train_wall=214
| epoch 001: 1070 / 35447 loss=9.804, nll_loss=8.977, ppl=503.78, wps=11687, ups=3, wpb=3897.986, bsz=491.199, num_updates=1068, lr=0.000133573, gnorm=2.673, clip=0.000, oom=0.000, loss_scale=16.000, wall=356, train_wall=216
| epoch 001: 1080 / 35447 loss=9.801, nll_loss=8.972, ppl=502.32, wps=11728, ups=3, wpb=3899.229, bsz=489.314, num_updates=1078, lr=0.000134823, gnorm=2.659, clip=0.000, oom=0.000, loss_scale=16.000, wall=358, train_wall=218
| epoch 001: 1090 / 35447 loss=9.799, nll_loss=8.970, ppl=501.40, wps=11771, ups=3, wpb=3900.637, bsz=487.390, num_updates=1088, lr=0.000136073, gnorm=2.647, clip=0.000, oom=0.000, loss_scale=16.000, wall=361, train_wall=220
| epoch 001: 1100 / 35447 loss=9.797, nll_loss=8.967, ppl=500.56, wps=11814, ups=3, wpb=3902.270, bsz=485.428, num_updates=1098, lr=0.000137323, gnorm=2.634, clip=0.000, oom=0.000, loss_scale=16.000, wall=363, train_wall=222
| epoch 001: 1110 / 35447 loss=9.795, nll_loss=8.965, ppl=499.69, wps=11856, ups=3, wpb=3903.813, bsz=483.473, num_updates=1108, lr=0.000138572, gnorm=2.621, clip=0.000, oom=0.000, loss_scale=16.000, wall=365, train_wall=223
| epoch 001: 1120 / 35447 loss=9.795, nll_loss=8.964, ppl=499.53, wps=11902, ups=3, wpb=3905.532, bsz=481.438, num_updates=1118, lr=0.000139822, gnorm=2.610, clip=0.000, oom=0.000, loss_scale=16.000, wall=367, train_wall=225
| epoch 001: 1130 / 35447 loss=9.792, nll_loss=8.961, ppl=498.49, wps=11928, ups=3, wpb=3901.383, bsz=479.794, num_updates=1128, lr=0.000141072, gnorm=2.615, clip=0.000, oom=0.000, loss_scale=16.000, wall=369, train_wall=227
| epoch 001: 1140 / 35447 loss=9.784, nll_loss=8.951, ppl=495.05, wps=11943, ups=3, wpb=3893.673, bsz=478.531, num_updates=1138, lr=0.000142322, gnorm=2.607, clip=0.000, oom=0.000, loss_scale=16.000, wall=371, train_wall=229
| epoch 001: 1150 / 35447 loss=9.776, nll_loss=8.943, ppl=492.09, wps=11962, ups=3, wpb=3888.700, bsz=477.289, num_updates=1148, lr=0.000143571, gnorm=2.599, clip=0.000, oom=0.000, loss_scale=16.000, wall=373, train_wall=231
| epoch 001: 1160 / 35447 loss=9.767, nll_loss=8.932, ppl=488.41, wps=11983, ups=3, wpb=3884.135, bsz=476.069, num_updates=1158, lr=0.000144821, gnorm=2.587, clip=0.000, oom=0.000, loss_scale=16.000, wall=375, train_wall=232
| epoch 001: 1170 / 35447 loss=9.756, nll_loss=8.920, ppl=484.22, wps=12004, ups=3, wpb=3879.647, bsz=474.870, num_updates=1168, lr=0.000146071, gnorm=2.575, clip=0.000, oom=0.000, loss_scale=16.000, wall=377, train_wall=234
| epoch 001: 1180 / 35447 loss=9.747, nll_loss=8.909, ppl=480.83, wps=12028, ups=3, wpb=3877.444, bsz=473.691, num_updates=1178, lr=0.000147321, gnorm=2.567, clip=0.000, oom=0.000, loss_scale=16.000, wall=380, train_wall=236
| epoch 001: 1190 / 35447 loss=9.737, nll_loss=8.898, ppl=476.90, wps=12054, ups=3, wpb=3875.917, bsz=472.532, num_updates=1188, lr=0.00014857, gnorm=2.555, clip=0.000, oom=0.000, loss_scale=16.000, wall=382, train_wall=238
| epoch 001: 1200 / 35447 loss=9.725, nll_loss=8.884, ppl=472.42, wps=12080, ups=3, wpb=3874.415, bsz=471.392, num_updates=1198, lr=0.00014982, gnorm=2.542, clip=0.000, oom=0.000, loss_scale=16.000, wall=384, train_wall=240
| epoch 001: 1210 / 35447 loss=9.713, nll_loss=8.870, ppl=467.89, wps=12105, ups=3, wpb=3872.938, bsz=470.272, num_updates=1208, lr=0.00015107, gnorm=2.530, clip=0.000, oom=0.000, loss_scale=16.000, wall=387, train_wall=242
| epoch 001: 1220 / 35447 loss=9.701, nll_loss=8.856, ppl=463.38, wps=12130, ups=3, wpb=3871.485, bsz=469.169, num_updates=1218, lr=0.00015232, gnorm=2.519, clip=0.000, oom=0.000, loss_scale=16.000, wall=389, train_wall=244
| epoch 001: 1230 / 35447 loss=9.691, nll_loss=8.844, ppl=459.56, wps=12156, ups=3, wpb=3871.173, bsz=468.085, num_updates=1228, lr=0.000153569, gnorm=2.511, clip=0.000, oom=0.000, loss_scale=16.000, wall=391, train_wall=246
| epoch 001: 1240 / 35447 loss=9.682, nll_loss=8.835, ppl=456.51, wps=12186, ups=3, wpb=3872.472, bsz=467.018, num_updates=1238, lr=0.000154819, gnorm=2.501, clip=0.000, oom=0.000, loss_scale=16.000, wall=393, train_wall=248
| epoch 001: 1250 / 35447 loss=9.673, nll_loss=8.823, ppl=453.03, wps=12216, ups=3, wpb=3873.750, bsz=465.968, num_updates=1248, lr=0.000156069, gnorm=2.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=396, train_wall=250
| epoch 001: 1260 / 35447 loss=9.663, nll_loss=8.812, ppl=449.36, wps=12246, ups=3, wpb=3875.008, bsz=464.935, num_updates=1258, lr=0.000157319, gnorm=2.479, clip=0.000, oom=0.000, loss_scale=16.000, wall=398, train_wall=252
| epoch 001: 1270 / 35447 loss=9.652, nll_loss=8.799, ppl=445.51, wps=12275, ups=3, wpb=3876.246, bsz=463.918, num_updates=1268, lr=0.000158568, gnorm=2.468, clip=0.000, oom=0.000, loss_scale=16.000, wall=400, train_wall=254
| epoch 001: 1280 / 35447 loss=9.641, nll_loss=8.787, ppl=441.73, wps=12304, ups=3, wpb=3877.465, bsz=462.917, num_updates=1278, lr=0.000159818, gnorm=2.457, clip=0.000, oom=0.000, loss_scale=16.000, wall=403, train_wall=256
| epoch 001: 1290 / 35447 loss=9.630, nll_loss=8.774, ppl=437.87, wps=12333, ups=3, wpb=3878.665, bsz=461.932, num_updates=1288, lr=0.000161068, gnorm=2.446, clip=0.000, oom=0.000, loss_scale=16.000, wall=405, train_wall=258
| epoch 001: 1300 / 35447 loss=9.622, nll_loss=8.765, ppl=435.04, wps=12364, ups=3, wpb=3879.994, bsz=460.795, num_updates=1298, lr=0.000162318, gnorm=2.441, clip=0.000, oom=0.000, loss_scale=16.000, wall=407, train_wall=260
| epoch 001: 1310 / 35447 loss=9.612, nll_loss=8.754, ppl=431.64, wps=12395, ups=3, wpb=3881.339, bsz=459.657, num_updates=1308, lr=0.000163567, gnorm=2.431, clip=0.000, oom=0.000, loss_scale=16.000, wall=410, train_wall=262
| epoch 001: 1320 / 35447 loss=9.603, nll_loss=8.742, ppl=428.20, wps=12425, ups=3, wpb=3882.665, bsz=458.537, num_updates=1318, lr=0.000164817, gnorm=2.421, clip=0.000, oom=0.000, loss_scale=16.000, wall=412, train_wall=264
| epoch 001: 1330 / 35447 loss=9.592, nll_loss=8.730, ppl=424.70, wps=12456, ups=3, wpb=3883.970, bsz=457.434, num_updates=1328, lr=0.000166067, gnorm=2.411, clip=0.000, oom=0.000, loss_scale=16.000, wall=414, train_wall=266
| epoch 001: 1340 / 35447 loss=9.582, nll_loss=8.718, ppl=421.14, wps=12485, ups=3, wpb=3885.256, bsz=456.347, num_updates=1338, lr=0.000167317, gnorm=2.402, clip=0.000, oom=0.000, loss_scale=16.000, wall=416, train_wall=268
| epoch 001: 1350 / 35447 loss=9.571, nll_loss=8.706, ppl=417.54, wps=12514, ups=3, wpb=3886.522, bsz=455.276, num_updates=1348, lr=0.000168566, gnorm=2.392, clip=0.000, oom=0.000, loss_scale=16.000, wall=419, train_wall=270
| epoch 001: 1360 / 35447 loss=9.563, nll_loss=8.697, ppl=414.95, wps=12543, ups=3, wpb=3887.663, bsz=454.150, num_updates=1358, lr=0.000169816, gnorm=2.386, clip=0.000, oom=0.000, loss_scale=16.000, wall=421, train_wall=272
| epoch 001: 1370 / 35447 loss=9.558, nll_loss=8.691, ppl=413.18, wps=12573, ups=3, wpb=3888.719, bsz=452.936, num_updates=1368, lr=0.000171066, gnorm=2.378, clip=0.000, oom=0.000, loss_scale=16.000, wall=423, train_wall=274
| epoch 001: 1380 / 35447 loss=9.553, nll_loss=8.684, ppl=411.30, wps=12599, ups=3, wpb=3889.758, bsz=451.739, num_updates=1378, lr=0.000172316, gnorm=2.369, clip=0.000, oom=0.000, loss_scale=16.000, wall=425, train_wall=276
| epoch 001: 1390 / 35447 loss=9.546, nll_loss=8.677, ppl=409.24, wps=12628, ups=3, wpb=3890.783, bsz=450.559, num_updates=1388, lr=0.000173565, gnorm=2.360, clip=0.000, oom=0.000, loss_scale=16.000, wall=428, train_wall=278
| epoch 001: 1400 / 35447 loss=9.540, nll_loss=8.670, ppl=407.18, wps=12657, ups=3, wpb=3891.793, bsz=449.396, num_updates=1398, lr=0.000174815, gnorm=2.351, clip=0.000, oom=0.000, loss_scale=16.000, wall=430, train_wall=280
| epoch 001: 1410 / 35447 loss=9.534, nll_loss=8.662, ppl=405.19, wps=12686, ups=3, wpb=3892.717, bsz=448.227, num_updates=1408, lr=0.000176065, gnorm=2.343, clip=0.000, oom=0.000, loss_scale=16.000, wall=432, train_wall=281
| epoch 001: 1420 / 35447 loss=9.530, nll_loss=8.658, ppl=403.81, wps=12717, ups=3, wpb=3894.037, bsz=446.984, num_updates=1418, lr=0.000177315, gnorm=2.336, clip=0.000, oom=0.000, loss_scale=16.000, wall=434, train_wall=283
| epoch 001: 1430 / 35447 loss=9.526, nll_loss=8.653, ppl=402.40, wps=12747, ups=3, wpb=3895.340, bsz=445.759, num_updates=1428, lr=0.000178564, gnorm=2.327, clip=0.000, oom=0.000, loss_scale=16.000, wall=436, train_wall=285
| epoch 001: 1440 / 35447 loss=9.521, nll_loss=8.647, ppl=400.78, wps=12777, ups=3, wpb=3896.624, bsz=444.551, num_updates=1438, lr=0.000179814, gnorm=2.319, clip=0.000, oom=0.000, loss_scale=16.000, wall=439, train_wall=287
| epoch 001: 1450 / 35447 loss=9.516, nll_loss=8.641, ppl=399.25, wps=12807, ups=3, wpb=3897.874, bsz=443.337, num_updates=1448, lr=0.000181064, gnorm=2.311, clip=0.000, oom=0.000, loss_scale=16.000, wall=441, train_wall=289
| epoch 001: 1460 / 35447 loss=9.514, nll_loss=8.638, ppl=398.37, wps=12840, ups=3, wpb=3899.233, bsz=442.052, num_updates=1458, lr=0.000182314, gnorm=2.305, clip=0.000, oom=0.000, loss_scale=16.000, wall=443, train_wall=291
| epoch 001: 1470 / 35447 loss=9.510, nll_loss=8.634, ppl=397.22, wps=12873, ups=3, wpb=3900.574, bsz=440.785, num_updates=1468, lr=0.000183563, gnorm=2.297, clip=0.000, oom=0.000, loss_scale=16.000, wall=445, train_wall=293
| epoch 001: 1480 / 35447 loss=9.507, nll_loss=8.630, ppl=396.17, wps=12904, ups=3, wpb=3901.726, bsz=439.502, num_updates=1478, lr=0.000184813, gnorm=2.289, clip=0.000, oom=0.000, loss_scale=16.000, wall=447, train_wall=295
| epoch 001: 1490 / 35447 loss=9.503, nll_loss=8.626, ppl=395.01, wps=12934, ups=3, wpb=3902.924, bsz=438.161, num_updates=1488, lr=0.000186063, gnorm=2.283, clip=0.000, oom=0.000, loss_scale=16.000, wall=449, train_wall=296
| epoch 001: 1500 / 35447 loss=9.499, nll_loss=8.621, ppl=393.65, wps=12964, ups=3, wpb=3904.042, bsz=436.838, num_updates=1498, lr=0.000187313, gnorm=2.275, clip=0.000, oom=0.000, loss_scale=16.000, wall=451, train_wall=298
| epoch 001: 1510 / 35447 loss=9.494, nll_loss=8.615, ppl=392.05, wps=12969, ups=3, wpb=3896.774, bsz=436.011, num_updates=1508, lr=0.000188562, gnorm=2.275, clip=0.000, oom=0.000, loss_scale=16.000, wall=453, train_wall=300
| epoch 001: 1520 / 35447 loss=9.487, nll_loss=8.606, ppl=389.75, wps=12979, ups=3, wpb=3891.646, bsz=435.194, num_updates=1518, lr=0.000189812, gnorm=2.270, clip=0.000, oom=0.000, loss_scale=16.000, wall=455, train_wall=302
| epoch 001: 1530 / 35447 loss=9.477, nll_loss=8.595, ppl=386.81, wps=12989, ups=3, wpb=3886.596, bsz=434.387, num_updates=1528, lr=0.000191062, gnorm=2.263, clip=0.000, oom=0.000, loss_scale=16.000, wall=457, train_wall=304
| epoch 001: 1540 / 35447 loss=9.469, nll_loss=8.586, ppl=384.40, wps=13001, ups=3, wpb=3882.956, bsz=433.592, num_updates=1538, lr=0.000192312, gnorm=2.258, clip=0.000, oom=0.000, loss_scale=16.000, wall=459, train_wall=305
| epoch 001: 1550 / 35447 loss=9.462, nll_loss=8.578, ppl=382.07, wps=13014, ups=3, wpb=3880.043, bsz=432.806, num_updates=1548, lr=0.000193561, gnorm=2.251, clip=0.000, oom=0.000, loss_scale=16.000, wall=462, train_wall=307
| epoch 001: 1560 / 35447 loss=9.453, nll_loss=8.568, ppl=379.49, wps=13028, ups=3, wpb=3877.167, bsz=432.031, num_updates=1558, lr=0.000194811, gnorm=2.244, clip=0.000, oom=0.000, loss_scale=16.000, wall=464, train_wall=309
| epoch 001: 1570 / 35447 loss=9.445, nll_loss=8.558, ppl=376.82, wps=13041, ups=3, wpb=3874.328, bsz=431.265, num_updates=1568, lr=0.000196061, gnorm=2.237, clip=0.000, oom=0.000, loss_scale=16.000, wall=466, train_wall=311
| epoch 001: 1580 / 35447 loss=9.437, nll_loss=8.549, ppl=374.64, wps=13058, ups=3, wpb=3873.250, bsz=430.510, num_updates=1578, lr=0.000197311, gnorm=2.232, clip=0.000, oom=0.000, loss_scale=16.000, wall=468, train_wall=313
| epoch 001: 1590 / 35447 loss=9.429, nll_loss=8.540, ppl=372.14, wps=13076, ups=3, wpb=3872.436, bsz=429.763, num_updates=1588, lr=0.00019856, gnorm=2.225, clip=0.000, oom=0.000, loss_scale=16.000, wall=470, train_wall=315
| epoch 001: 1600 / 35447 loss=9.420, nll_loss=8.530, ppl=369.56, wps=13094, ups=3, wpb=3871.632, bsz=429.026, num_updates=1598, lr=0.00019981, gnorm=2.218, clip=0.000, oom=0.000, loss_scale=16.000, wall=472, train_wall=317
| epoch 001: 1610 / 35447 loss=9.411, nll_loss=8.519, ppl=366.92, wps=13112, ups=3, wpb=3870.838, bsz=428.299, num_updates=1608, lr=0.00020106, gnorm=2.211, clip=0.000, oom=0.000, loss_scale=16.000, wall=475, train_wall=319
| epoch 001: 1620 / 35447 loss=9.401, nll_loss=8.508, ppl=364.02, wps=13130, ups=3, wpb=3870.054, bsz=427.580, num_updates=1618, lr=0.00020231, gnorm=2.204, clip=0.000, oom=0.000, loss_scale=16.000, wall=477, train_wall=321
| epoch 001: 1630 / 35447 loss=9.391, nll_loss=8.496, ppl=361.14, wps=13146, ups=3, wpb=3869.280, bsz=426.870, num_updates=1628, lr=0.000203559, gnorm=2.197, clip=0.000, oom=0.000, loss_scale=16.000, wall=479, train_wall=322
| epoch 001: 1640 / 35447 loss=9.385, nll_loss=8.489, ppl=359.26, wps=13167, ups=3, wpb=3870.374, bsz=426.168, num_updates=1638, lr=0.000204809, gnorm=2.193, clip=0.000, oom=0.000, loss_scale=16.000, wall=481, train_wall=324
| epoch 001: 1650 / 35447 loss=9.377, nll_loss=8.480, ppl=357.13, wps=13187, ups=3, wpb=3871.500, bsz=425.476, num_updates=1648, lr=0.000206059, gnorm=2.186, clip=0.000, oom=0.000, loss_scale=16.000, wall=484, train_wall=326
| epoch 001: 1660 / 35447 loss=9.370, nll_loss=8.472, ppl=354.98, wps=13208, ups=3, wpb=3872.613, bsz=424.791, num_updates=1658, lr=0.000207309, gnorm=2.180, clip=0.000, oom=0.000, loss_scale=16.000, wall=486, train_wall=328
| epoch 001: 1670 / 35447 loss=9.362, nll_loss=8.463, ppl=352.75, wps=13229, ups=3, wpb=3873.712, bsz=424.115, num_updates=1668, lr=0.000208558, gnorm=2.173, clip=0.000, oom=0.000, loss_scale=16.000, wall=488, train_wall=331
| epoch 001: 1680 / 35447 loss=9.354, nll_loss=8.453, ppl=350.51, wps=13249, ups=3, wpb=3874.799, bsz=423.447, num_updates=1678, lr=0.000209808, gnorm=2.167, clip=0.000, oom=0.000, loss_scale=16.000, wall=491, train_wall=333
| epoch 001: 1690 / 35447 loss=9.346, nll_loss=8.444, ppl=348.23, wps=13269, ups=3, wpb=3875.872, bsz=422.787, num_updates=1688, lr=0.000211058, gnorm=2.160, clip=0.000, oom=0.000, loss_scale=16.000, wall=493, train_wall=335
| epoch 001: 1700 / 35447 loss=9.338, nll_loss=8.434, ppl=345.96, wps=13290, ups=3, wpb=3876.933, bsz=422.134, num_updates=1698, lr=0.000212308, gnorm=2.154, clip=0.000, oom=0.000, loss_scale=16.000, wall=495, train_wall=337
| epoch 001: 1710 / 35447 loss=9.332, nll_loss=8.428, ppl=344.35, wps=13311, ups=3, wpb=3877.687, bsz=421.349, num_updates=1708, lr=0.000213557, gnorm=2.150, clip=0.000, oom=0.000, loss_scale=16.000, wall=498, train_wall=338
| epoch 001: 1720 / 35447 loss=9.325, nll_loss=8.420, ppl=342.54, wps=13333, ups=3, wpb=3878.585, bsz=420.573, num_updates=1718, lr=0.000214807, gnorm=2.145, clip=0.000, oom=0.000, loss_scale=16.000, wall=500, train_wall=340
| epoch 001: 1730 / 35447 loss=9.318, nll_loss=8.411, ppl=340.45, wps=13355, ups=3, wpb=3879.473, bsz=419.806, num_updates=1728, lr=0.000216057, gnorm=2.138, clip=0.000, oom=0.000, loss_scale=16.000, wall=502, train_wall=342
| epoch 001: 1740 / 35447 loss=9.310, nll_loss=8.402, ppl=338.37, wps=13376, ups=3, wpb=3880.350, bsz=419.047, num_updates=1738, lr=0.000217307, gnorm=2.132, clip=0.000, oom=0.000, loss_scale=16.000, wall=504, train_wall=344
| epoch 001: 1750 / 35447 loss=9.302, nll_loss=8.393, ppl=336.23, wps=13393, ups=3, wpb=3881.218, bsz=418.297, num_updates=1748, lr=0.000218556, gnorm=2.126, clip=0.000, oom=0.000, loss_scale=16.000, wall=507, train_wall=346
| epoch 001: 1760 / 35447 loss=9.294, nll_loss=8.384, ppl=334.09, wps=13414, ups=3, wpb=3882.076, bsz=417.556, num_updates=1758, lr=0.000219806, gnorm=2.120, clip=0.000, oom=0.000, loss_scale=16.000, wall=509, train_wall=348
| epoch 001: 1770 / 35447 loss=9.286, nll_loss=8.375, ppl=331.94, wps=13434, ups=3, wpb=3882.924, bsz=416.824, num_updates=1768, lr=0.000221056, gnorm=2.114, clip=0.000, oom=0.000, loss_scale=16.000, wall=511, train_wall=350
| epoch 001: 1780 / 35447 loss=9.281, nll_loss=8.369, ppl=330.66, wps=13455, ups=3, wpb=3883.972, bsz=416.027, num_updates=1778, lr=0.000222306, gnorm=2.111, clip=0.000, oom=0.000, loss_scale=16.000, wall=513, train_wall=352
| epoch 001: 1790 / 35447 loss=9.277, nll_loss=8.363, ppl=329.35, wps=13477, ups=3, wpb=3885.068, bsz=415.221, num_updates=1788, lr=0.000223555, gnorm=2.106, clip=0.000, oom=0.000, loss_scale=16.000, wall=515, train_wall=354
| epoch 001: 1800 / 35447 loss=9.272, nll_loss=8.358, ppl=328.00, wps=13498, ups=3, wpb=3886.152, bsz=414.425, num_updates=1798, lr=0.000224805, gnorm=2.100, clip=0.000, oom=0.000, loss_scale=16.000, wall=518, train_wall=356
| epoch 001: 1810 / 35447 loss=9.266, nll_loss=8.351, ppl=326.53, wps=13520, ups=3, wpb=3887.225, bsz=413.637, num_updates=1808, lr=0.000226055, gnorm=2.095, clip=0.000, oom=0.000, loss_scale=16.000, wall=520, train_wall=358
| epoch 001: 1820 / 35447 loss=9.261, nll_loss=8.345, ppl=325.14, wps=13541, ups=3, wpb=3888.285, bsz=412.858, num_updates=1818, lr=0.000227305, gnorm=2.090, clip=0.000, oom=0.000, loss_scale=16.000, wall=522, train_wall=360
| epoch 001: 1830 / 35447 loss=9.255, nll_loss=8.339, ppl=323.70, wps=13562, ups=3, wpb=3889.334, bsz=412.088, num_updates=1828, lr=0.000228554, gnorm=2.084, clip=0.000, oom=0.000, loss_scale=16.000, wall=524, train_wall=362
| epoch 001: 1840 / 35447 loss=9.252, nll_loss=8.334, ppl=322.74, wps=13585, ups=3, wpb=3890.368, bsz=411.238, num_updates=1838, lr=0.000229804, gnorm=2.080, clip=0.000, oom=0.000, loss_scale=16.000, wall=526, train_wall=364
| epoch 001: 1850 / 35447 loss=9.248, nll_loss=8.329, ppl=321.66, wps=13609, ups=3, wpb=3891.481, bsz=410.398, num_updates=1848, lr=0.000231054, gnorm=2.075, clip=0.000, oom=0.000, loss_scale=16.000, wall=528, train_wall=365
| epoch 001: 1860 / 35447 loss=9.243, nll_loss=8.324, ppl=320.51, wps=13633, ups=4, wpb=3892.581, bsz=409.567, num_updates=1858, lr=0.000232304, gnorm=2.070, clip=0.000, oom=0.000, loss_scale=16.000, wall=531, train_wall=367
| epoch 001: 1870 / 35447 loss=9.239, nll_loss=8.319, ppl=319.33, wps=13656, ups=4, wpb=3893.670, bsz=408.745, num_updates=1868, lr=0.000233553, gnorm=2.064, clip=0.000, oom=0.000, loss_scale=16.000, wall=533, train_wall=369
| epoch 001: 1880 / 35447 loss=9.234, nll_loss=8.314, ppl=318.19, wps=13679, ups=4, wpb=3894.629, bsz=407.906, num_updates=1878, lr=0.000234803, gnorm=2.060, clip=0.000, oom=0.000, loss_scale=16.000, wall=535, train_wall=371
| epoch 001: 1890 / 35447 loss=9.232, nll_loss=8.311, ppl=317.51, wps=13701, ups=4, wpb=3895.611, bsz=407.017, num_updates=1888, lr=0.000236053, gnorm=2.055, clip=0.000, oom=0.000, loss_scale=16.000, wall=537, train_wall=373
| epoch 001: 1900 / 35447 loss=9.228, nll_loss=8.307, ppl=316.62, wps=13723, ups=4, wpb=3896.583, bsz=406.137, num_updates=1898, lr=0.000237303, gnorm=2.050, clip=0.000, oom=0.000, loss_scale=16.000, wall=539, train_wall=375
| epoch 001: 1910 / 35447 loss=9.224, nll_loss=8.302, ppl=315.59, wps=13744, ups=4, wpb=3897.544, bsz=405.266, num_updates=1908, lr=0.000238552, gnorm=2.045, clip=0.000, oom=0.000, loss_scale=16.000, wall=541, train_wall=377
| epoch 001: 1920 / 35447 loss=9.222, nll_loss=8.299, ppl=314.97, wps=13766, ups=4, wpb=3898.238, bsz=404.338, num_updates=1918, lr=0.000239802, gnorm=2.041, clip=0.000, oom=0.000, loss_scale=16.000, wall=543, train_wall=378
| epoch 001: 1930 / 35447 loss=9.220, nll_loss=8.296, ppl=314.39, wps=13788, ups=4, wpb=3898.932, bsz=403.402, num_updates=1928, lr=0.000241052, gnorm=2.037, clip=0.000, oom=0.000, loss_scale=16.000, wall=545, train_wall=380
| epoch 001: 1940 / 35447 loss=9.218, nll_loss=8.294, ppl=313.85, wps=13809, ups=4, wpb=3899.414, bsz=402.452, num_updates=1938, lr=0.000242302, gnorm=2.032, clip=0.000, oom=0.000, loss_scale=16.000, wall=547, train_wall=382
| epoch 001: 1950 / 35447 loss=9.217, nll_loss=8.293, ppl=313.61, wps=13831, ups=4, wpb=3899.684, bsz=401.454, num_updates=1948, lr=0.000243551, gnorm=2.028, clip=0.000, oom=0.000, loss_scale=16.000, wall=549, train_wall=384
| epoch 001: 1960 / 35447 loss=9.215, nll_loss=8.291, ppl=313.14, wps=13845, ups=4, wpb=3897.849, bsz=400.588, num_updates=1958, lr=0.000244801, gnorm=2.027, clip=0.000, oom=0.000, loss_scale=16.000, wall=551, train_wall=386
| epoch 001: 1970 / 35447 loss=9.209, nll_loss=8.284, ppl=311.66, wps=13847, ups=4, wpb=3892.677, bsz=400.016, num_updates=1968, lr=0.000246051, gnorm=2.024, clip=0.000, oom=0.000, loss_scale=16.000, wall=553, train_wall=387
| epoch 001: 1980 / 35447 loss=9.203, nll_loss=8.277, ppl=310.16, wps=13853, ups=4, wpb=3888.893, bsz=399.450, num_updates=1978, lr=0.000247301, gnorm=2.022, clip=0.000, oom=0.000, loss_scale=16.000, wall=555, train_wall=389
| epoch 001: 1990 / 35447 loss=9.196, nll_loss=8.268, ppl=308.36, wps=13859, ups=4, wpb=3885.267, bsz=398.889, num_updates=1988, lr=0.00024855, gnorm=2.017, clip=0.000, oom=0.000, loss_scale=16.000, wall=557, train_wall=391
| epoch 001: 2000 / 35447 loss=9.188, nll_loss=8.259, ppl=306.40, wps=13865, ups=4, wpb=3881.677, bsz=398.334, num_updates=1998, lr=0.0002498, gnorm=2.012, clip=0.000, oom=0.000, loss_scale=16.000, wall=559, train_wall=393
| epoch 001: 2010 / 35447 loss=9.182, nll_loss=8.253, ppl=305.00, wps=13874, ups=4, wpb=3879.543, bsz=397.785, num_updates=2008, lr=0.00025105, gnorm=2.009, clip=0.000, oom=0.000, loss_scale=16.000, wall=561, train_wall=395
| epoch 001: 2020 / 35447 loss=9.176, nll_loss=8.245, ppl=303.40, wps=13884, ups=4, wpb=3877.444, bsz=397.241, num_updates=2018, lr=0.0002523, gnorm=2.005, clip=0.000, oom=0.000, loss_scale=16.000, wall=564, train_wall=396
| epoch 001: 2030 / 35447 loss=9.169, nll_loss=8.237, ppl=301.76, wps=13893, ups=4, wpb=3875.366, bsz=396.702, num_updates=2028, lr=0.000253549, gnorm=2.001, clip=0.000, oom=0.000, loss_scale=16.000, wall=566, train_wall=398
| epoch 001: 2040 / 35447 loss=9.162, nll_loss=8.229, ppl=300.02, wps=13903, ups=4, wpb=3873.309, bsz=396.169, num_updates=2038, lr=0.000254799, gnorm=1.996, clip=0.000, oom=0.000, loss_scale=16.000, wall=568, train_wall=400
| epoch 001: 2050 / 35447 loss=9.156, nll_loss=8.222, ppl=298.54, wps=13914, ups=4, wpb=3872.238, bsz=395.641, num_updates=2048, lr=0.000256049, gnorm=1.993, clip=0.000, oom=0.000, loss_scale=16.000, wall=570, train_wall=402
| epoch 001: 2060 / 35447 loss=9.149, nll_loss=8.214, ppl=296.97, wps=13925, ups=4, wpb=3871.615, bsz=395.118, num_updates=2058, lr=0.000257299, gnorm=1.988, clip=0.000, oom=0.000, loss_scale=16.000, wall=572, train_wall=404
| epoch 001: 2070 / 35447 loss=9.142, nll_loss=8.206, ppl=295.32, wps=13937, ups=4, wpb=3870.998, bsz=394.600, num_updates=2068, lr=0.000258548, gnorm=1.984, clip=0.000, oom=0.000, loss_scale=16.000, wall=574, train_wall=406
| epoch 001: 2080 / 35447 loss=9.135, nll_loss=8.198, ppl=293.67, wps=13949, ups=4, wpb=3870.386, bsz=394.087, num_updates=2078, lr=0.000259798, gnorm=1.979, clip=0.000, oom=0.000, loss_scale=16.000, wall=577, train_wall=408
| epoch 001: 2090 / 35447 loss=9.127, nll_loss=8.189, ppl=291.87, wps=13961, ups=4, wpb=3869.781, bsz=393.579, num_updates=2088, lr=0.000261048, gnorm=1.975, clip=0.000, oom=0.000, loss_scale=16.000, wall=579, train_wall=410
| epoch 001: 2100 / 35447 loss=9.120, nll_loss=8.181, ppl=290.13, wps=13973, ups=4, wpb=3869.182, bsz=393.075, num_updates=2098, lr=0.000262298, gnorm=1.971, clip=0.000, oom=0.000, loss_scale=16.000, wall=581, train_wall=412
| epoch 001: 2110 / 35447 loss=9.113, nll_loss=8.173, ppl=288.55, wps=13986, ups=4, wpb=3869.077, bsz=392.577, num_updates=2108, lr=0.000263547, gnorm=1.968, clip=0.000, oom=0.000, loss_scale=16.000, wall=583, train_wall=413
| epoch 001: 2120 / 35447 loss=9.107, nll_loss=8.165, ppl=287.07, wps=14002, ups=4, wpb=3869.847, bsz=392.083, num_updates=2118, lr=0.000264797, gnorm=1.964, clip=0.000, oom=0.000, loss_scale=16.000, wall=585, train_wall=415
| epoch 001: 2130 / 35447 loss=9.100, nll_loss=8.158, ppl=285.54, wps=14017, ups=4, wpb=3870.609, bsz=391.594, num_updates=2128, lr=0.000266047, gnorm=1.959, clip=0.000, oom=0.000, loss_scale=16.000, wall=588, train_wall=417
| epoch 001: 2140 / 35447 loss=9.092, nll_loss=8.149, ppl=283.85, wps=14032, ups=4, wpb=3871.363, bsz=391.109, num_updates=2138, lr=0.000267297, gnorm=1.955, clip=0.000, oom=0.000, loss_scale=16.000, wall=590, train_wall=419
| epoch 001: 2150 / 35447 loss=9.085, nll_loss=8.141, ppl=282.20, wps=14046, ups=4, wpb=3872.111, bsz=390.629, num_updates=2148, lr=0.000268546, gnorm=1.951, clip=0.000, oom=0.000, loss_scale=16.000, wall=592, train_wall=421
| epoch 001: 2160 / 35447 loss=9.078, nll_loss=8.132, ppl=280.52, wps=14061, ups=4, wpb=3872.852, bsz=390.154, num_updates=2158, lr=0.000269796, gnorm=1.947, clip=0.000, oom=0.000, loss_scale=16.000, wall=594, train_wall=423
| epoch 001: 2170 / 35447 loss=9.071, nll_loss=8.124, ppl=278.93, wps=14072, ups=4, wpb=3873.586, bsz=389.683, num_updates=2168, lr=0.000271046, gnorm=1.943, clip=0.000, oom=0.000, loss_scale=16.000, wall=597, train_wall=425
| epoch 001: 2180 / 35447 loss=9.063, nll_loss=8.115, ppl=277.25, wps=14087, ups=4, wpb=3874.314, bsz=389.216, num_updates=2178, lr=0.000272296, gnorm=1.939, clip=0.000, oom=0.000, loss_scale=16.000, wall=599, train_wall=427
| epoch 001: 2190 / 35447 loss=9.057, nll_loss=8.108, ppl=275.81, wps=14101, ups=4, wpb=3875.027, bsz=388.724, num_updates=2188, lr=0.000273545, gnorm=1.935, clip=0.000, oom=0.000, loss_scale=16.000, wall=601, train_wall=429
| epoch 001: 2200 / 35447 loss=9.051, nll_loss=8.101, ppl=274.59, wps=14117, ups=4, wpb=3875.959, bsz=388.193, num_updates=2198, lr=0.000274795, gnorm=1.932, clip=0.000, oom=0.000, loss_scale=16.000, wall=603, train_wall=431
| epoch 001: 2210 / 35447 loss=9.045, nll_loss=8.094, ppl=273.27, wps=14132, ups=4, wpb=3876.883, bsz=387.667, num_updates=2208, lr=0.000276045, gnorm=1.928, clip=0.000, oom=0.000, loss_scale=16.000, wall=606, train_wall=433
| epoch 001: 2220 / 35447 loss=9.039, nll_loss=8.087, ppl=271.92, wps=14148, ups=4, wpb=3877.799, bsz=387.145, num_updates=2218, lr=0.000277295, gnorm=1.924, clip=0.000, oom=0.000, loss_scale=16.000, wall=608, train_wall=435
| epoch 001: 2230 / 35447 loss=9.033, nll_loss=8.080, ppl=270.55, wps=14164, ups=4, wpb=3878.706, bsz=386.628, num_updates=2228, lr=0.000278544, gnorm=1.920, clip=0.000, oom=0.000, loss_scale=16.000, wall=610, train_wall=437
| epoch 001: 2240 / 35447 loss=9.026, nll_loss=8.072, ppl=269.15, wps=14179, ups=4, wpb=3879.606, bsz=386.116, num_updates=2238, lr=0.000279794, gnorm=1.916, clip=0.000, oom=0.000, loss_scale=16.000, wall=612, train_wall=439
| epoch 001: 2250 / 35447 loss=9.020, nll_loss=8.065, ppl=267.77, wps=14195, ups=4, wpb=3880.497, bsz=385.609, num_updates=2248, lr=0.000281044, gnorm=1.912, clip=0.000, oom=0.000, loss_scale=16.000, wall=615, train_wall=441
| epoch 001: 2260 / 35447 loss=9.013, nll_loss=8.058, ppl=266.42, wps=14210, ups=4, wpb=3881.381, bsz=385.105, num_updates=2258, lr=0.000282294, gnorm=1.909, clip=0.000, oom=0.000, loss_scale=16.000, wall=617, train_wall=443
| epoch 001: 2270 / 35447 loss=9.009, nll_loss=8.052, ppl=265.43, wps=14227, ups=4, wpb=3882.253, bsz=384.550, num_updates=2268, lr=0.000283543, gnorm=1.906, clip=0.000, oom=0.000, loss_scale=16.000, wall=619, train_wall=445
| epoch 001: 2280 / 35447 loss=9.005, nll_loss=8.047, ppl=264.50, wps=14244, ups=4, wpb=3883.191, bsz=383.986, num_updates=2278, lr=0.000284793, gnorm=1.902, clip=0.000, oom=0.000, loss_scale=16.000, wall=621, train_wall=447
| epoch 001: 2290 / 35447 loss=9.000, nll_loss=8.042, ppl=263.52, wps=14261, ups=4, wpb=3884.121, bsz=383.427, num_updates=2288, lr=0.000286043, gnorm=1.898, clip=0.000, oom=0.000, loss_scale=16.000, wall=623, train_wall=449
| epoch 001: 2300 / 35447 loss=8.995, nll_loss=8.036, ppl=262.51, wps=14278, ups=4, wpb=3885.043, bsz=382.872, num_updates=2298, lr=0.000287293, gnorm=1.895, clip=0.000, oom=0.000, loss_scale=16.000, wall=625, train_wall=450
| epoch 001: 2310 / 35447 loss=8.990, nll_loss=8.031, ppl=261.48, wps=14295, ups=4, wpb=3885.957, bsz=382.322, num_updates=2308, lr=0.000288542, gnorm=1.891, clip=0.000, oom=0.000, loss_scale=16.000, wall=627, train_wall=452
| epoch 001: 2320 / 35447 loss=8.986, nll_loss=8.025, ppl=260.49, wps=14312, ups=4, wpb=3886.863, bsz=381.777, num_updates=2318, lr=0.000289792, gnorm=1.888, clip=0.000, oom=0.000, loss_scale=16.000, wall=630, train_wall=454
| epoch 001: 2330 / 35447 loss=8.981, nll_loss=8.020, ppl=259.59, wps=14328, ups=4, wpb=3887.736, bsz=381.223, num_updates=2328, lr=0.000291042, gnorm=1.884, clip=0.000, oom=0.000, loss_scale=16.000, wall=632, train_wall=456
| epoch 001: 2340 / 35447 loss=8.978, nll_loss=8.016, ppl=258.85, wps=14344, ups=4, wpb=3888.558, bsz=380.619, num_updates=2338, lr=0.000292292, gnorm=1.882, clip=0.000, oom=0.000, loss_scale=16.000, wall=634, train_wall=458
| epoch 001: 2350 / 35447 loss=8.974, nll_loss=8.011, ppl=258.04, wps=14361, ups=4, wpb=3889.374, bsz=380.020, num_updates=2348, lr=0.000293541, gnorm=1.878, clip=0.000, oom=0.000, loss_scale=16.000, wall=636, train_wall=460
| epoch 001: 2360 / 35447 loss=8.970, nll_loss=8.007, ppl=257.22, wps=14376, ups=4, wpb=3890.182, bsz=379.427, num_updates=2358, lr=0.000294791, gnorm=1.875, clip=0.000, oom=0.000, loss_scale=16.000, wall=638, train_wall=462
| epoch 001: 2370 / 35447 loss=8.966, nll_loss=8.002, ppl=256.35, wps=14392, ups=4, wpb=3890.984, bsz=378.838, num_updates=2368, lr=0.000296041, gnorm=1.871, clip=0.000, oom=0.000, loss_scale=16.000, wall=640, train_wall=464
| epoch 001: 2380 / 35447 loss=8.962, nll_loss=7.997, ppl=255.52, wps=14407, ups=4, wpb=3891.778, bsz=378.254, num_updates=2378, lr=0.000297291, gnorm=1.868, clip=0.000, oom=0.000, loss_scale=16.000, wall=642, train_wall=466
| epoch 001: 2390 / 35447 loss=8.959, nll_loss=7.994, ppl=254.89, wps=14423, ups=4, wpb=3892.294, bsz=377.615, num_updates=2388, lr=0.00029854, gnorm=1.866, clip=0.000, oom=0.000, loss_scale=16.000, wall=644, train_wall=467
| epoch 001: 2400 / 35447 loss=8.956, nll_loss=7.990, ppl=254.20, wps=14439, ups=4, wpb=3892.877, bsz=376.974, num_updates=2398, lr=0.00029979, gnorm=1.863, clip=0.000, oom=0.000, loss_scale=16.000, wall=647, train_wall=469
| epoch 001: 2410 / 35447 loss=8.952, nll_loss=7.985, ppl=253.43, wps=14454, ups=4, wpb=3893.455, bsz=376.339, num_updates=2408, lr=0.00030104, gnorm=1.859, clip=0.000, oom=0.000, loss_scale=16.000, wall=649, train_wall=471
| epoch 001: 2420 / 35447 loss=8.949, nll_loss=7.981, ppl=252.72, wps=14470, ups=4, wpb=3894.028, bsz=375.709, num_updates=2418, lr=0.00030229, gnorm=1.856, clip=0.000, oom=0.000, loss_scale=16.000, wall=651, train_wall=473
| epoch 001: 2430 / 35447 loss=8.946, nll_loss=7.978, ppl=252.20, wps=14485, ups=4, wpb=3894.331, bsz=375.044, num_updates=2428, lr=0.000303539, gnorm=1.853, clip=0.000, oom=0.000, loss_scale=16.000, wall=653, train_wall=475
| epoch 001: 2440 / 35447 loss=8.944, nll_loss=7.976, ppl=251.83, wps=14501, ups=4, wpb=3894.568, bsz=374.359, num_updates=2438, lr=0.000304789, gnorm=1.850, clip=0.000, oom=0.000, loss_scale=16.000, wall=655, train_wall=477
| epoch 001: 2450 / 35447 loss=8.942, nll_loss=7.974, ppl=251.41, wps=14517, ups=4, wpb=3894.802, bsz=373.680, num_updates=2448, lr=0.000306039, gnorm=1.847, clip=0.000, oom=0.000, loss_scale=16.000, wall=657, train_wall=478
| epoch 001: 2460 / 35447 loss=8.941, nll_loss=7.972, ppl=251.10, wps=14532, ups=4, wpb=3895.112, bsz=372.989, num_updates=2458, lr=0.000307289, gnorm=1.845, clip=0.000, oom=0.000, loss_scale=16.000, wall=659, train_wall=480
| epoch 001: 2470 / 35447 loss=8.940, nll_loss=7.970, ppl=250.80, wps=14548, ups=4, wpb=3895.537, bsz=372.288, num_updates=2468, lr=0.000308538, gnorm=1.842, clip=0.000, oom=0.000, loss_scale=16.000, wall=661, train_wall=482
| epoch 001: 2480 / 35447 loss=8.938, nll_loss=7.969, ppl=250.54, wps=14559, ups=4, wpb=3894.695, bsz=371.680, num_updates=2478, lr=0.000309788, gnorm=1.842, clip=0.000, oom=0.000, loss_scale=16.000, wall=663, train_wall=484
| epoch 001: 2490 / 35447 loss=8.935, nll_loss=7.965, ppl=249.88, wps=14561, ups=4, wpb=3891.067, bsz=371.280, num_updates=2488, lr=0.000311038, gnorm=1.840, clip=0.000, oom=0.000, loss_scale=16.000, wall=665, train_wall=486
| epoch 001: 2500 / 35447 loss=8.930, nll_loss=7.960, ppl=248.93, wps=14563, ups=4, wpb=3888.161, bsz=370.882, num_updates=2498, lr=0.000312288, gnorm=1.839, clip=0.000, oom=0.000, loss_scale=16.000, wall=667, train_wall=487
| epoch 001: 2510 / 35447 loss=8.925, nll_loss=7.953, ppl=247.77, wps=14567, ups=4, wpb=3885.672, bsz=370.488, num_updates=2508, lr=0.000313537, gnorm=1.836, clip=0.000, oom=0.000, loss_scale=16.000, wall=669, train_wall=489
| epoch 001: 2520 / 35447 loss=8.919, nll_loss=7.946, ppl=246.58, wps=14571, ups=4, wpb=3883.203, bsz=370.097, num_updates=2518, lr=0.000314787, gnorm=1.833, clip=0.000, oom=0.000, loss_scale=16.000, wall=671, train_wall=491
| epoch 001: 2530 / 35447 loss=8.913, nll_loss=7.939, ppl=245.45, wps=14575, ups=4, wpb=3880.966, bsz=369.709, num_updates=2528, lr=0.000316037, gnorm=1.831, clip=0.000, oom=0.000, loss_scale=16.000, wall=673, train_wall=493
| epoch 001: 2540 / 35447 loss=8.908, nll_loss=7.934, ppl=244.50, wps=14582, ups=4, wpb=3879.607, bsz=369.324, num_updates=2538, lr=0.000317287, gnorm=1.829, clip=0.000, oom=0.000, loss_scale=16.000, wall=675, train_wall=495
| epoch 001: 2550 / 35447 loss=8.902, nll_loss=7.927, ppl=243.43, wps=14589, ups=4, wpb=3878.258, bsz=368.942, num_updates=2548, lr=0.000318536, gnorm=1.826, clip=0.000, oom=0.000, loss_scale=16.000, wall=677, train_wall=496
| epoch 001: 2560 / 35447 loss=8.896, nll_loss=7.920, ppl=242.24, wps=14595, ups=4, wpb=3876.920, bsz=368.563, num_updates=2558, lr=0.000319786, gnorm=1.823, clip=0.000, oom=0.000, loss_scale=16.000, wall=679, train_wall=498
| epoch 001: 2570 / 35447 loss=8.890, nll_loss=7.913, ppl=241.08, wps=14602, ups=4, wpb=3875.593, bsz=368.187, num_updates=2568, lr=0.000321036, gnorm=1.820, clip=0.000, oom=0.000, loss_scale=16.000, wall=682, train_wall=500
| epoch 001: 2580 / 35447 loss=8.885, nll_loss=7.907, ppl=239.98, wps=14610, ups=4, wpb=3874.604, bsz=367.814, num_updates=2578, lr=0.000322286, gnorm=1.817, clip=0.000, oom=0.000, loss_scale=16.000, wall=684, train_wall=502
| epoch 001: 2590 / 35447 loss=8.880, nll_loss=7.901, ppl=239.02, wps=14619, ups=4, wpb=3874.347, bsz=367.444, num_updates=2588, lr=0.000323535, gnorm=1.815, clip=0.000, oom=0.000, loss_scale=16.000, wall=686, train_wall=504
| epoch 001: 2600 / 35447 loss=8.874, nll_loss=7.895, ppl=237.97, wps=14628, ups=4, wpb=3874.092, bsz=367.076, num_updates=2598, lr=0.000324785, gnorm=1.812, clip=0.000, oom=0.000, loss_scale=16.000, wall=688, train_wall=506
| epoch 001: 2610 / 35447 loss=8.868, nll_loss=7.888, ppl=236.89, wps=14637, ups=4, wpb=3873.838, bsz=366.712, num_updates=2608, lr=0.000326035, gnorm=1.809, clip=0.000, oom=0.000, loss_scale=16.000, wall=690, train_wall=508
| epoch 001: 2620 / 35447 loss=8.863, nll_loss=7.881, ppl=235.80, wps=14646, ups=4, wpb=3873.587, bsz=366.350, num_updates=2618, lr=0.000327285, gnorm=1.806, clip=0.000, oom=0.000, loss_scale=16.000, wall=692, train_wall=510
| epoch 001: 2630 / 35447 loss=8.857, nll_loss=7.875, ppl=234.69, wps=14655, ups=4, wpb=3873.337, bsz=365.991, num_updates=2628, lr=0.000328534, gnorm=1.804, clip=0.000, oom=0.000, loss_scale=16.000, wall=695, train_wall=512
| epoch 001: 2640 / 35447 loss=8.851, nll_loss=7.868, ppl=233.54, wps=14664, ups=4, wpb=3873.089, bsz=365.635, num_updates=2638, lr=0.000329784, gnorm=1.801, clip=0.000, oom=0.000, loss_scale=16.000, wall=697, train_wall=514
| epoch 001: 2650 / 35447 loss=8.846, nll_loss=7.862, ppl=232.64, wps=14672, ups=4, wpb=3873.460, bsz=365.281, num_updates=2648, lr=0.000331034, gnorm=1.799, clip=0.000, oom=0.000, loss_scale=16.000, wall=699, train_wall=515
| epoch 001: 2660 / 35447 loss=8.841, nll_loss=7.857, ppl=231.80, wps=14683, ups=4, wpb=3874.237, bsz=364.930, num_updates=2658, lr=0.000332284, gnorm=1.796, clip=0.000, oom=0.000, loss_scale=16.000, wall=701, train_wall=517
| epoch 001: 2670 / 35447 loss=8.836, nll_loss=7.851, ppl=230.85, wps=14694, ups=4, wpb=3875.009, bsz=364.582, num_updates=2668, lr=0.000333533, gnorm=1.794, clip=0.000, oom=0.000, loss_scale=16.000, wall=704, train_wall=519
| epoch 001: 2680 / 35447 loss=8.831, nll_loss=7.844, ppl=229.83, wps=14705, ups=4, wpb=3875.774, bsz=364.236, num_updates=2678, lr=0.000334783, gnorm=1.791, clip=0.000, oom=0.000, loss_scale=16.000, wall=706, train_wall=521
| epoch 001: 2690 / 35447 loss=8.825, nll_loss=7.838, ppl=228.83, wps=14716, ups=4, wpb=3876.534, bsz=363.893, num_updates=2688, lr=0.000336033, gnorm=1.788, clip=0.000, oom=0.000, loss_scale=16.000, wall=708, train_wall=523
| epoch 001: 2700 / 35447 loss=8.820, nll_loss=7.832, ppl=227.80, wps=14727, ups=4, wpb=3877.288, bsz=363.552, num_updates=2698, lr=0.000337283, gnorm=1.785, clip=0.000, oom=0.000, loss_scale=16.000, wall=710, train_wall=525
| epoch 001: 2710 / 35447 loss=8.814, nll_loss=7.825, ppl=226.80, wps=14738, ups=4, wpb=3878.037, bsz=363.214, num_updates=2708, lr=0.000338532, gnorm=1.783, clip=0.000, oom=0.000, loss_scale=16.000, wall=713, train_wall=527
| epoch 001: 2720 / 35447 loss=8.809, nll_loss=7.819, ppl=225.80, wps=14748, ups=4, wpb=3878.780, bsz=362.879, num_updates=2718, lr=0.000339782, gnorm=1.780, clip=0.000, oom=0.000, loss_scale=16.000, wall=715, train_wall=529
| epoch 001: 2730 / 35447 loss=8.804, nll_loss=7.813, ppl=224.92, wps=14760, ups=4, wpb=3879.477, bsz=362.516, num_updates=2728, lr=0.000341032, gnorm=1.778, clip=0.000, oom=0.000, loss_scale=16.000, wall=717, train_wall=531
| epoch 001: 2740 / 35447 loss=8.799, nll_loss=7.808, ppl=224.09, wps=14773, ups=4, wpb=3880.268, bsz=362.127, num_updates=2738, lr=0.000342282, gnorm=1.776, clip=0.000, oom=0.000, loss_scale=16.000, wall=719, train_wall=533
| epoch 001: 2750 / 35447 loss=8.794, nll_loss=7.802, ppl=223.14, wps=14786, ups=4, wpb=3881.053, bsz=361.741, num_updates=2748, lr=0.000343531, gnorm=1.773, clip=0.000, oom=0.000, loss_scale=16.000, wall=721, train_wall=535
| epoch 001: 2760 / 35447 loss=8.789, nll_loss=7.796, ppl=222.17, wps=14799, ups=4, wpb=3881.832, bsz=361.358, num_updates=2758, lr=0.000344781, gnorm=1.771, clip=0.000, oom=0.000, loss_scale=16.000, wall=723, train_wall=537
| epoch 001: 2770 / 35447 loss=8.783, nll_loss=7.789, ppl=221.20, wps=14812, ups=4, wpb=3882.606, bsz=360.977, num_updates=2768, lr=0.000346031, gnorm=1.768, clip=0.000, oom=0.000, loss_scale=16.000, wall=726, train_wall=539
| epoch 001: 2780 / 35447 loss=8.778, nll_loss=7.783, ppl=220.23, wps=14825, ups=4, wpb=3883.374, bsz=360.599, num_updates=2778, lr=0.000347281, gnorm=1.765, clip=0.000, oom=0.000, loss_scale=16.000, wall=728, train_wall=541
| epoch 001: 2790 / 35447 loss=8.772, nll_loss=7.776, ppl=219.25, wps=14838, ups=4, wpb=3884.137, bsz=360.224, num_updates=2788, lr=0.00034853, gnorm=1.763, clip=0.000, oom=0.000, loss_scale=16.000, wall=730, train_wall=543
| epoch 001: 2800 / 35447 loss=8.766, nll_loss=7.770, ppl=218.25, wps=14850, ups=4, wpb=3884.894, bsz=359.851, num_updates=2798, lr=0.00034978, gnorm=1.760, clip=0.000, oom=0.000, loss_scale=16.000, wall=732, train_wall=545
| epoch 001: 2810 / 35447 loss=8.762, nll_loss=7.764, ppl=217.43, wps=14863, ups=4, wpb=3885.545, bsz=359.459, num_updates=2808, lr=0.00035103, gnorm=1.758, clip=0.000, oom=0.000, loss_scale=16.000, wall=734, train_wall=546
| epoch 001: 2820 / 35447 loss=8.758, nll_loss=7.760, ppl=216.84, wps=14875, ups=4, wpb=3886.235, bsz=359.035, num_updates=2818, lr=0.00035228, gnorm=1.756, clip=0.000, oom=0.000, loss_scale=16.000, wall=736, train_wall=548
| epoch 001: 2830 / 35447 loss=8.755, nll_loss=7.756, ppl=216.19, wps=14887, ups=4, wpb=3886.920, bsz=358.614, num_updates=2828, lr=0.000353529, gnorm=1.754, clip=0.000, oom=0.000, loss_scale=16.000, wall=738, train_wall=550
| epoch 001: 2840 / 35447 loss=8.751, nll_loss=7.752, ppl=215.59, wps=14898, ups=4, wpb=3887.601, bsz=358.196, num_updates=2838, lr=0.000354779, gnorm=1.752, clip=0.000, oom=0.000, loss_scale=16.000, wall=741, train_wall=552
| epoch 001: 2850 / 35447 loss=8.748, nll_loss=7.748, ppl=214.96, wps=14910, ups=4, wpb=3888.276, bsz=357.781, num_updates=2848, lr=0.000356029, gnorm=1.749, clip=0.000, oom=0.000, loss_scale=16.000, wall=743, train_wall=554
| epoch 001: 2860 / 35447 loss=8.744, nll_loss=7.744, ppl=214.32, wps=14921, ups=4, wpb=3888.947, bsz=357.369, num_updates=2858, lr=0.000357279, gnorm=1.747, clip=0.000, oom=0.000, loss_scale=16.000, wall=745, train_wall=556
| epoch 001: 2870 / 35447 loss=8.740, nll_loss=7.739, ppl=213.67, wps=14932, ups=4, wpb=3889.613, bsz=356.960, num_updates=2868, lr=0.000358528, gnorm=1.745, clip=0.000, oom=0.000, loss_scale=16.000, wall=747, train_wall=558
| epoch 001: 2880 / 35447 loss=8.737, nll_loss=7.735, ppl=213.10, wps=14943, ups=4, wpb=3890.214, bsz=356.536, num_updates=2878, lr=0.000359778, gnorm=1.743, clip=0.000, oom=0.000, loss_scale=16.000, wall=749, train_wall=560
| epoch 001: 2890 / 35447 loss=8.735, nll_loss=7.733, ppl=212.68, wps=14955, ups=4, wpb=3890.705, bsz=356.078, num_updates=2888, lr=0.000361028, gnorm=1.741, clip=0.000, oom=0.000, loss_scale=16.000, wall=751, train_wall=562
| epoch 001: 2900 / 35447 loss=8.732, nll_loss=7.729, ppl=212.18, wps=14967, ups=4, wpb=3891.193, bsz=355.622, num_updates=2898, lr=0.000362278, gnorm=1.739, clip=0.000, oom=0.000, loss_scale=16.000, wall=753, train_wall=564
| epoch 001: 2910 / 35447 loss=8.729, nll_loss=7.726, ppl=211.65, wps=14979, ups=4, wpb=3891.677, bsz=355.169, num_updates=2908, lr=0.000363527, gnorm=1.737, clip=0.000, oom=0.000, loss_scale=16.000, wall=756, train_wall=566
| epoch 001: 2920 / 35447 loss=8.726, nll_loss=7.722, ppl=211.13, wps=14991, ups=4, wpb=3892.158, bsz=354.720, num_updates=2918, lr=0.000364777, gnorm=1.735, clip=0.000, oom=0.000, loss_scale=16.000, wall=758, train_wall=567
| epoch 001: 2930 / 35447 loss=8.723, nll_loss=7.718, ppl=210.62, wps=15002, ups=4, wpb=3892.636, bsz=354.273, num_updates=2928, lr=0.000366027, gnorm=1.733, clip=0.000, oom=0.000, loss_scale=16.000, wall=760, train_wall=569
| epoch 001: 2940 / 35447 loss=8.720, nll_loss=7.715, ppl=210.13, wps=15013, ups=4, wpb=3892.954, bsz=353.808, num_updates=2938, lr=0.000367277, gnorm=1.731, clip=0.000, oom=0.000, loss_scale=16.000, wall=762, train_wall=571
| epoch 001: 2950 / 35447 loss=8.717, nll_loss=7.712, ppl=209.68, wps=15025, ups=4, wpb=3893.154, bsz=353.313, num_updates=2948, lr=0.000368526, gnorm=1.729, clip=0.000, oom=0.000, loss_scale=16.000, wall=764, train_wall=573
| epoch 001: 2960 / 35447 loss=8.714, nll_loss=7.709, ppl=209.19, wps=15036, ups=4, wpb=3893.353, bsz=352.822, num_updates=2958, lr=0.000369776, gnorm=1.727, clip=0.000, oom=0.000, loss_scale=16.000, wall=766, train_wall=575
| epoch 001: 2970 / 35447 loss=8.711, nll_loss=7.705, ppl=208.67, wps=15048, ups=4, wpb=3893.551, bsz=352.334, num_updates=2968, lr=0.000371026, gnorm=1.725, clip=0.000, oom=0.000, loss_scale=16.000, wall=768, train_wall=577
| epoch 001: 2980 / 35447 loss=8.708, nll_loss=7.701, ppl=208.12, wps=15059, ups=4, wpb=3893.747, bsz=351.850, num_updates=2978, lr=0.000372276, gnorm=1.723, clip=0.000, oom=0.000, loss_scale=16.000, wall=770, train_wall=578
| epoch 001: 2990 / 35447 loss=8.706, nll_loss=7.699, ppl=207.72, wps=15071, ups=4, wpb=3893.982, bsz=351.352, num_updates=2988, lr=0.000373525, gnorm=1.721, clip=0.000, oom=0.000, loss_scale=16.000, wall=772, train_wall=580
| epoch 001: 3000 / 35447 loss=8.704, nll_loss=7.696, ppl=207.38, wps=15083, ups=4, wpb=3894.336, bsz=350.847, num_updates=2998, lr=0.000374775, gnorm=1.719, clip=0.000, oom=0.000, loss_scale=16.000, wall=774, train_wall=582
| epoch 001: 3010 / 35447 loss=8.702, nll_loss=7.694, ppl=207.03, wps=15095, ups=4, wpb=3894.687, bsz=350.346, num_updates=3008, lr=0.000376025, gnorm=1.717, clip=0.000, oom=0.000, loss_scale=16.000, wall=776, train_wall=584
| epoch 001: 3020 / 35447 loss=8.700, nll_loss=7.691, ppl=206.69, wps=15107, ups=4, wpb=3895.036, bsz=349.848, num_updates=3018, lr=0.000377275, gnorm=1.715, clip=0.000, oom=0.000, loss_scale=16.000, wall=778, train_wall=586
| epoch 001: 3030 / 35447 loss=8.699, nll_loss=7.690, ppl=206.50, wps=15119, ups=4, wpb=3895.455, bsz=349.326, num_updates=3028, lr=0.000378524, gnorm=1.714, clip=0.000, oom=0.000, loss_scale=16.000, wall=780, train_wall=588
| epoch 001: 3040 / 35447 loss=8.697, nll_loss=7.688, ppl=206.24, wps=15132, ups=4, wpb=3895.905, bsz=348.808, num_updates=3038, lr=0.000379774, gnorm=1.712, clip=0.000, oom=0.000, loss_scale=16.000, wall=782, train_wall=589
| epoch 001: 3050 / 35447 loss=8.696, nll_loss=7.687, ppl=206.05, wps=15145, ups=4, wpb=3896.340, bsz=348.281, num_updates=3048, lr=0.000381024, gnorm=1.710, clip=0.000, oom=0.000, loss_scale=16.000, wall=784, train_wall=591
| epoch 001: 3060 / 35447 loss=8.695, nll_loss=7.686, ppl=205.91, wps=15157, ups=4, wpb=3896.836, bsz=347.744, num_updates=3058, lr=0.000382274, gnorm=1.708, clip=0.000, oom=0.000, loss_scale=16.000, wall=786, train_wall=593
| epoch 001: 3070 / 35447 loss=8.694, nll_loss=7.684, ppl=205.69, wps=15162, ups=4, wpb=3894.679, bsz=347.327, num_updates=3068, lr=0.000383523, gnorm=1.708, clip=0.000, oom=0.000, loss_scale=16.000, wall=788, train_wall=595
| epoch 001: 3080 / 35447 loss=8.692, nll_loss=7.681, ppl=205.27, wps=15163, ups=4, wpb=3891.529, bsz=347.031, num_updates=3078, lr=0.000384773, gnorm=1.707, clip=0.000, oom=0.000, loss_scale=16.000, wall=790, train_wall=596
| epoch 001: 3090 / 35447 loss=8.689, nll_loss=7.678, ppl=204.79, wps=15167, ups=4, wpb=3888.875, bsz=346.736, num_updates=3088, lr=0.000386023, gnorm=1.705, clip=0.000, oom=0.000, loss_scale=16.000, wall=792, train_wall=598
| epoch 001: 3100 / 35447 loss=8.685, nll_loss=7.674, ppl=204.25, wps=15171, ups=4, wpb=3886.628, bsz=346.443, num_updates=3098, lr=0.000387273, gnorm=1.704, clip=0.000, oom=0.000, loss_scale=16.000, wall=794, train_wall=600
| epoch 001: 3110 / 35447 loss=8.682, nll_loss=7.670, ppl=203.63, wps=15176, ups=4, wpb=3884.830, bsz=346.152, num_updates=3108, lr=0.000388522, gnorm=1.703, clip=0.000, oom=0.000, loss_scale=16.000, wall=796, train_wall=601
| epoch 001: 3120 / 35447 loss=8.677, nll_loss=7.665, ppl=202.93, wps=15180, ups=4, wpb=3883.045, bsz=345.863, num_updates=3118, lr=0.000389772, gnorm=1.701, clip=0.000, oom=0.000, loss_scale=16.000, wall=798, train_wall=603
| epoch 001: 3130 / 35447 loss=8.673, nll_loss=7.660, ppl=202.22, wps=15185, ups=4, wpb=3881.270, bsz=345.575, num_updates=3128, lr=0.000391022, gnorm=1.699, clip=0.000, oom=0.000, loss_scale=16.000, wall=800, train_wall=605
| epoch 001: 3140 / 35447 loss=8.668, nll_loss=7.654, ppl=201.47, wps=15191, ups=4, wpb=3880.158, bsz=345.290, num_updates=3138, lr=0.000392272, gnorm=1.698, clip=0.000, oom=0.000, loss_scale=16.000, wall=802, train_wall=606
| epoch 001: 3150 / 35447 loss=8.663, nll_loss=7.649, ppl=200.66, wps=15198, ups=4, wpb=3879.217, bsz=345.006, num_updates=3148, lr=0.000393521, gnorm=1.696, clip=0.000, oom=0.000, loss_scale=16.000, wall=804, train_wall=608
| epoch 001: 3160 / 35447 loss=8.658, nll_loss=7.642, ppl=199.80, wps=15205, ups=4, wpb=3878.282, bsz=344.725, num_updates=3158, lr=0.000394771, gnorm=1.694, clip=0.000, oom=0.000, loss_scale=16.000, wall=806, train_wall=610
| epoch 001: 3170 / 35447 loss=8.652, nll_loss=7.636, ppl=198.90, wps=15212, ups=4, wpb=3877.353, bsz=344.444, num_updates=3168, lr=0.000396021, gnorm=1.692, clip=0.000, oom=0.000, loss_scale=16.000, wall=808, train_wall=612
| epoch 001: 3180 / 35447 loss=8.646, nll_loss=7.629, ppl=197.97, wps=15219, ups=4, wpb=3876.430, bsz=344.166, num_updates=3178, lr=0.000397271, gnorm=1.690, clip=0.000, oom=0.000, loss_scale=16.000, wall=809, train_wall=613
| epoch 001: 3190 / 35447 loss=8.641, nll_loss=7.623, ppl=197.12, wps=15225, ups=4, wpb=3875.596, bsz=343.890, num_updates=3188, lr=0.00039852, gnorm=1.689, clip=0.000, oom=0.000, loss_scale=16.000, wall=812, train_wall=615
| epoch 001: 3200 / 35447 loss=8.637, nll_loss=7.619, ppl=196.52, wps=15233, ups=4, wpb=3875.485, bsz=343.615, num_updates=3198, lr=0.00039977, gnorm=1.687, clip=0.000, oom=0.000, loss_scale=16.000, wall=814, train_wall=617
| epoch 001: 3210 / 35447 loss=8.633, nll_loss=7.614, ppl=195.87, wps=15241, ups=4, wpb=3875.374, bsz=343.342, num_updates=3208, lr=0.00040102, gnorm=1.686, clip=0.000, oom=0.000, loss_scale=16.000, wall=816, train_wall=619
| epoch 001: 3220 / 35447 loss=8.629, nll_loss=7.609, ppl=195.19, wps=15250, ups=4, wpb=3875.264, bsz=343.070, num_updates=3218, lr=0.00040227, gnorm=1.684, clip=0.000, oom=0.000, loss_scale=16.000, wall=818, train_wall=621
| epoch 001: 3230 / 35447 loss=8.624, nll_loss=7.604, ppl=194.49, wps=15256, ups=4, wpb=3875.155, bsz=342.800, num_updates=3228, lr=0.000403519, gnorm=1.682, clip=0.000, oom=0.000, loss_scale=16.000, wall=820, train_wall=622
| epoch 001: 3240 / 35447 loss=8.620, nll_loss=7.598, ppl=193.77, wps=15264, ups=4, wpb=3875.046, bsz=342.532, num_updates=3238, lr=0.000404769, gnorm=1.680, clip=0.000, oom=0.000, loss_scale=16.000, wall=822, train_wall=624
| epoch 001: 3250 / 35447 loss=8.615, nll_loss=7.593, ppl=193.04, wps=15272, ups=4, wpb=3874.938, bsz=342.266, num_updates=3248, lr=0.000406019, gnorm=1.679, clip=0.000, oom=0.000, loss_scale=16.000, wall=824, train_wall=626
| epoch 001: 3260 / 35447 loss=8.610, nll_loss=7.587, ppl=192.31, wps=15280, ups=4, wpb=3874.831, bsz=342.001, num_updates=3258, lr=0.000407269, gnorm=1.677, clip=0.000, oom=0.000, loss_scale=16.000, wall=826, train_wall=628
| epoch 001: 3270 / 35447 loss=8.606, nll_loss=7.583, ppl=191.71, wps=15292, ups=4, wpb=3875.446, bsz=341.738, num_updates=3268, lr=0.000408518, gnorm=1.676, clip=0.000, oom=0.000, loss_scale=16.000, wall=828, train_wall=630
| epoch 001: 3280 / 35447 loss=8.602, nll_loss=7.578, ppl=191.04, wps=15303, ups=4, wpb=3876.119, bsz=341.477, num_updates=3278, lr=0.000409768, gnorm=1.674, clip=0.000, oom=0.000, loss_scale=16.000, wall=830, train_wall=631
| epoch 001: 3290 / 35447 loss=8.597, nll_loss=7.572, ppl=190.33, wps=15314, ups=4, wpb=3876.788, bsz=341.217, num_updates=3288, lr=0.000411018, gnorm=1.672, clip=0.000, oom=0.000, loss_scale=16.000, wall=832, train_wall=633
| epoch 001: 3300 / 35447 loss=8.592, nll_loss=7.567, ppl=189.59, wps=15326, ups=4, wpb=3877.452, bsz=340.958, num_updates=3298, lr=0.000412268, gnorm=1.670, clip=0.000, oom=0.000, loss_scale=16.000, wall=834, train_wall=635
| epoch 001: 3310 / 35447 loss=8.588, nll_loss=7.561, ppl=188.87, wps=15337, ups=4, wpb=3878.113, bsz=340.701, num_updates=3308, lr=0.000413517, gnorm=1.669, clip=0.000, oom=0.000, loss_scale=16.000, wall=836, train_wall=637
| epoch 001: 3320 / 35447 loss=8.583, nll_loss=7.556, ppl=188.14, wps=15348, ups=4, wpb=3878.770, bsz=340.446, num_updates=3318, lr=0.000414767, gnorm=1.667, clip=0.000, oom=0.000, loss_scale=16.000, wall=839, train_wall=639
| epoch 001: 3330 / 35447 loss=8.578, nll_loss=7.550, ppl=187.42, wps=15359, ups=4, wpb=3879.422, bsz=340.192, num_updates=3328, lr=0.000416017, gnorm=1.665, clip=0.000, oom=0.000, loss_scale=16.000, wall=841, train_wall=641
| epoch 001: 3340 / 35447 loss=8.573, nll_loss=7.545, ppl=186.70, wps=15370, ups=4, wpb=3880.071, bsz=339.940, num_updates=3338, lr=0.000417267, gnorm=1.663, clip=0.000, oom=0.000, loss_scale=16.000, wall=843, train_wall=642
| epoch 001: 3350 / 35447 loss=8.569, nll_loss=7.540, ppl=186.05, wps=15380, ups=4, wpb=3880.688, bsz=339.675, num_updates=3348, lr=0.000418516, gnorm=1.662, clip=0.000, oom=0.000, loss_scale=16.000, wall=845, train_wall=644
./myle-e2e.sh: line 45: 3926 Killed $python_cli $vol_fairseq/train.py $vol_data --arch=transformer_vaswani_wmt_en_de_big --max-source-positions=64 --max-target-positions=64 --required-batch-size-multiple=8 --max-tokens=4096 --no-save --attention-dropout=0.1 --no-progress-bar --criterion=label_smoothed_cross_entropy --log-interval=10 --source-lang=en --lr-scheduler=inverse_sqrt --min-lr 1e-09 --skip-invalid-size-inputs-valid-test --target-lang=de --label-smoothing=0.1 --curriculum=4 --max-epoch=50 --update-freq=1 --optimizer adam --warmup-init-lr 1e-07 --lr 0.0005 --warmup-updates 4000 --adam-betas='(0.9,0.98)' --share-all-embeddings --dropout 0.3 --weight-decay 0.0 --fp16 --distributed-world-size=1 --valid-subset=valid
Fri Aug 16 19:06:49 UTC 2019
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment