Created
August 19, 2019 16:56
-
-
Save taylanbil/92abf01709ce26d7fb8f701dd273abd5 to your computer and use it in GitHub Desktop.
Fairseq Transformer on 1 TPU, full dataset - measure 100 steps.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Fri Aug 16 19:11:06 UTC 2019 | |
#!/bin/bash | |
batch_size=128 | |
n_words=64 | |
data_path=/home/taylanbil/data/wmt18_en_de_bpej32k | |
#data_path=/home/taylanbil/data/dummy | |
#conda activate pytorch | |
pkill -9 python | |
TPU_IP_ADDRESS=10.1.2.2 # nightly | |
TPU_IP_ADDRESS=10.1.4.2 # nightly | |
#export XLA_USE_32BIT_LONG=1 | |
#export XLA_IR_DEBUG=1 | |
#export XLA_HLO_DEBUG=1 | |
#export GET_TENSORS_OPBYOP=1 | |
#export SYNC_TENSORS_OPBYOP=1 | |
#export XLA_SAVE_TENSORS_FILE=$tensors_dir/${taskname}_tensors.txt | |
#export TRIM_GRAPH_SIZE=50000 | |
#export XLA_SYNC_WAIT=1 | |
export XRT_TPU_CONFIG="tpu_worker;0;$TPU_IP_ADDRESS:8470" | |
other_flags=" | |
--disable-validation \ | |
--max-tokens=4096 \ # has no effect w/ TPUS | |
--num-workers=8 \ | |
" | |
#LD_PRELOAD=/usr/lib/libtcmalloc_minimal.so.4 python tpu-examples/fairseq_train_tpu.py \ | |
python tpu-examples/fairseq_train_tpu.py \ | |
$data_path \ | |
--arch=transformer_vaswani_wmt_en_de_big \ | |
--max-sentences=$batch_size \ | |
--max-sentences-valid=$batch_size \ | |
--max-source-positions=$n_words \ | |
--max-target-positions=$n_words \ | |
--required-batch-size-multiple=$batch_size \ | |
--no-save \ | |
--attention-dropout=0.1 \ | |
--no-progress-bar \ | |
--criterion=label_smoothed_cross_entropy \ | |
--log-interval=100 \ | |
--source-lang=en \ | |
--lr-scheduler=inverse_sqrt \ | |
--min-lr 1e-09 \ | |
--skip-invalid-size-inputs-valid-test \ | |
--target-lang=de \ | |
--label-smoothing=0.1 \ | |
--update-freq=1 \ | |
--optimizer adam \ | |
--adam-betas '(0.9, 0.98)' \ | |
--warmup-init-lr 1e-07 \ | |
--lr 0.0005 \ | |
--warmup-updates 4000 \ | |
--share-all-embeddings \ | |
--dropout 0.3 \ | |
--weight-decay 0.0 \ | |
--valid-subset=valid \ | |
--curriculum=4 \ | |
--max-epoch=50 \ | |
--num_cores=1 \ | |
--metrics_debug \ | |
--pad_to_length=$n_words \ | |
--log_steps=10 | |
-------------- | |
nohup: ignoring input | |
2019-08-16 19:11:07.384481: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) CPU:0 -> /job:tpu_worker/replica:0/task:0/device:XLA_CPU:0 | |
2019-08-16 19:11:07.384547: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:0 -> /job:tpu_worker/replica:0/task:0/device:TPU:0 | |
2019-08-16 19:11:07.384555: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:1 -> /job:tpu_worker/replica:0/task:0/device:TPU:1 | |
2019-08-16 19:11:07.384561: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:2 -> /job:tpu_worker/replica:0/task:0/device:TPU:2 | |
2019-08-16 19:11:07.384567: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:3 -> /job:tpu_worker/replica:0/task:0/device:TPU:3 | |
2019-08-16 19:11:07.384572: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:4 -> /job:tpu_worker/replica:0/task:0/device:TPU:4 | |
2019-08-16 19:11:07.384578: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:5 -> /job:tpu_worker/replica:0/task:0/device:TPU:5 | |
2019-08-16 19:11:07.384584: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:6 -> /job:tpu_worker/replica:0/task:0/device:TPU:6 | |
2019-08-16 19:11:07.384589: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:7 -> /job:tpu_worker/replica:0/task:0/device:TPU:7 | |
2019-08-16 19:11:07.384617: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:200] Worker grpc://10.1.4.2:8470 for /job:tpu_worker/replica:0/task:0 | |
2019-08-16 19:11:07.384623: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:204] XRT default device: TPU:0 | |
2019-08-16 19:11:07.386730: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:1086] Configuring TPU for worker tpu_worker:0 at grpc://10.1.4.2:8470 | |
2019-08-16 19:11:10.203782: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:1102] TPU topology: mesh_shape: 2 | |
mesh_shape: 2 | |
mesh_shape: 2 | |
num_tasks: 1 | |
num_tpu_devices_per_task: 8 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
device_coordinates: 0 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
device_coordinates: 0 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
device_coordinates: 1 | |
| [en] dictionary: 35662 types | |
| [de] dictionary: 35662 types | |
| /home/taylanbil/data/wmt18_en_de_bpej32k valid en-de 52385 examples | |
TransformerModel( | |
(encoder): TransformerEncoder( | |
(embed_tokens): Embedding(35662, 1024, padding_idx=1) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(3): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(4): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(5): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
) | |
) | |
(decoder): TransformerDecoder( | |
(embed_tokens): Embedding(35662, 1024, padding_idx=1) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(3): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(4): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
(5): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) | |
) | |
) | |
) | |
) | |
| model transformer_vaswani_wmt_en_de_big, criterion LabelSmoothedCrossEntropyCriterion | |
| num. model params: 212875264 (num. trained: 212875264) | |
| no existing checkpoint found checkpoints/checkpoint_last.pt | |
| loading train data for epoch 0 | |
| /home/taylanbil/data/wmt18_en_de_bpej32k train en-de 5186259 examples | |
| WARNING: 240829 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[1422704, 2718830, 2897878, 3673048, 2016896, 2200333, 3886976, 2097242, 3124502, 2871279] | |
Epoch 1 begin 19:12:18 | |
training/ 19:13:50, device xla:1, step 10, Rate=147.54, Global Rate=54.28 | |
training/ 19:13:57, device xla:1, step 20, Rate=181.96, Global Rate=83.93 | |
training/ 19:14:04, device xla:1, step 30, Rate=186.06, Global Rate=102.69 | |
training/ 19:14:11, device xla:1, step 40, Rate=188.02, Global Rate=115.84 | |
training/ 19:14:18, device xla:1, step 50, Rate=187.74, Global Rate=125.44 | |
training/ 19:14:25, device xla:1, step 60, Rate=185.40, Global Rate=132.60 | |
training/ 19:14:31, device xla:1, step 70, Rate=188.47, Global Rate=138.48 | |
training/ 19:14:38, device xla:1, step 80, Rate=188.61, Global Rate=143.22 | |
training/ 19:14:45, device xla:1, step 90, Rate=186.87, Global Rate=147.07 | |
training/ 19:14:52, device xla:1, step 100, Rate=186.01, Global Rate=150.22 | |
training/ 19:14:59, device xla:1, step 110, Rate=187.23, Global Rate=152.94 | |
training/ 19:15:05, device xla:1, step 120, Rate=187.16, Global Rate=155.34 | |
training/ 19:15:12, device xla:1, step 130, Rate=185.56, Global Rate=157.29 | |
training/ 19:15:19, device xla:1, step 140, Rate=185.94, Global Rate=159.04 | |
training/ 19:15:26, device xla:1, step 150, Rate=186.13, Global Rate=160.60 | |
training/ 19:15:33, device xla:1, step 160, Rate=186.80, Global Rate=162.02 | |
training/ 19:15:40, device xla:1, step 170, Rate=187.05, Global Rate=163.31 | |
training/ 19:15:47, device xla:1, step 180, Rate=187.78, Global Rate=164.50 | |
training/ 19:15:53, device xla:1, step 190, Rate=186.71, Global Rate=165.55 | |
training/ 19:16:00, device xla:1, step 200, Rate=187.25, Global Rate=166.49 | |
training/ 19:16:07, device xla:1, step 210, Rate=187.82, Global Rate=167.38 | |
training/ 19:16:14, device xla:1, step 220, Rate=187.15, Global Rate=168.20 | |
training/ 19:16:21, device xla:1, step 230, Rate=186.59, Global Rate=168.93 | |
training/ 19:16:28, device xla:1, step 240, Rate=186.66, Global Rate=169.58 | |
training/ 19:16:35, device xla:1, step 250, Rate=186.69, Global Rate=170.20 | |
training/ 19:16:41, device xla:1, step 260, Rate=187.25, Global Rate=170.79 | |
training/ 19:16:48, device xla:1, step 270, Rate=187.82, Global Rate=171.38 | |
training/ 19:16:55, device xla:1, step 280, Rate=187.10, Global Rate=171.90 | |
training/ 19:17:02, device xla:1, step 290, Rate=186.56, Global Rate=172.36 | |
training/ 19:17:09, device xla:1, step 300, Rate=187.18, Global Rate=172.81 | |
training/ 19:17:16, device xla:1, step 310, Rate=188.27, Global Rate=173.27 | |
training/ 19:17:22, device xla:1, step 320, Rate=187.49, Global Rate=173.69 | |
training/ 19:17:29, device xla:1, step 330, Rate=188.12, Global Rate=174.10 | |
training/ 19:17:36, device xla:1, step 340, Rate=186.50, Global Rate=174.44 | |
training/ 19:17:43, device xla:1, step 350, Rate=187.66, Global Rate=174.79 | |
training/ 19:17:50, device xla:1, step 360, Rate=187.33, Global Rate=175.12 | |
training/ 19:17:57, device xla:1, step 370, Rate=188.36, Global Rate=175.46 | |
training/ 19:18:03, device xla:1, step 380, Rate=186.55, Global Rate=175.73 | |
training/ 19:19:30, device xla:1, step 390, Rate=168.56, Global Rate=137.32 | |
training/ 19:19:37, device xla:1, step 400, Rate=184.89, Global Rate=138.24 | |
training/ 19:19:44, device xla:1, step 410, Rate=186.70, Global Rate=139.13 | |
training/ 19:19:51, device xla:1, step 420, Rate=186.37, Global Rate=139.97 | |
training/ 19:19:58, device xla:1, step 430, Rate=186.65, Global Rate=140.78 | |
training/ 19:20:04, device xla:1, step 440, Rate=187.37, Global Rate=141.58 | |
training/ 19:20:11, device xla:1, step 450, Rate=186.85, Global Rate=142.35 | |
training/ 19:20:18, device xla:1, step 460, Rate=185.47, Global Rate=143.07 | |
training/ 19:20:25, device xla:1, step 470, Rate=186.31, Global Rate=143.78 | |
training/ 19:20:32, device xla:1, step 480, Rate=186.04, Global Rate=144.46 | |
training/ 19:20:39, device xla:1, step 490, Rate=186.27, Global Rate=145.13 | |
training/ 19:20:46, device xla:1, step 500, Rate=186.74, Global Rate=145.78 | |
training/ 19:20:53, device xla:1, step 510, Rate=184.84, Global Rate=146.39 | |
training/ 19:20:59, device xla:1, step 520, Rate=185.34, Global Rate=146.99 | |
training/ 19:21:06, device xla:1, step 530, Rate=186.37, Global Rate=147.58 | |
training/ 19:21:13, device xla:1, step 540, Rate=185.27, Global Rate=148.13 | |
training/ 19:21:20, device xla:1, step 550, Rate=186.04, Global Rate=148.68 | |
training/ 19:21:27, device xla:1, step 560, Rate=186.03, Global Rate=149.21 | |
training/ 19:21:34, device xla:1, step 570, Rate=187.00, Global Rate=149.74 | |
training/ 19:21:41, device xla:1, step 580, Rate=185.08, Global Rate=150.24 | |
training/ 19:21:48, device xla:1, step 590, Rate=184.48, Global Rate=150.69 | |
training/ 19:21:55, device xla:1, step 600, Rate=186.42, Global Rate=151.17 | |
training/ 19:22:02, device xla:1, step 610, Rate=185.78, Global Rate=151.63 | |
training/ 19:22:08, device xla:1, step 620, Rate=186.52, Global Rate=152.09 | |
training/ 19:22:15, device xla:1, step 630, Rate=186.41, Global Rate=152.54 | |
training/ 19:22:22, device xla:1, step 640, Rate=186.55, Global Rate=152.97 | |
training/ 19:22:29, device xla:1, step 650, Rate=186.48, Global Rate=153.40 | |
training/ 19:22:36, device xla:1, step 660, Rate=184.41, Global Rate=153.76 | |
training/ 19:22:43, device xla:1, step 670, Rate=186.34, Global Rate=154.17 | |
training/ 19:22:50, device xla:1, step 680, Rate=186.05, Global Rate=154.55 | |
training/ 19:22:57, device xla:1, step 690, Rate=184.91, Global Rate=154.92 | |
training/ 19:23:04, device xla:1, step 700, Rate=184.10, Global Rate=155.27 | |
training/ 19:23:11, device xla:1, step 710, Rate=184.34, Global Rate=155.62 | |
training/ 19:23:17, device xla:1, step 720, Rate=185.56, Global Rate=155.98 | |
training/ 19:23:24, device xla:1, step 730, Rate=186.65, Global Rate=156.32 | |
training/ 19:23:31, device xla:1, step 740, Rate=186.87, Global Rate=156.67 | |
training/ 19:23:38, device xla:1, step 750, Rate=187.02, Global Rate=157.01 | |
training/ 19:23:45, device xla:1, step 760, Rate=186.58, Global Rate=157.34 | |
training/ 19:23:52, device xla:1, step 770, Rate=187.04, Global Rate=157.66 | |
training/ 19:23:59, device xla:1, step 780, Rate=185.92, Global Rate=157.96 | |
training/ 19:24:06, device xla:1, step 790, Rate=186.37, Global Rate=158.27 | |
training/ 19:24:12, device xla:1, step 800, Rate=185.71, Global Rate=158.56 | |
training/ 19:24:19, device xla:1, step 810, Rate=183.70, Global Rate=158.83 | |
training/ 19:24:26, device xla:1, step 820, Rate=185.45, Global Rate=159.11 | |
training/ 19:24:33, device xla:1, step 830, Rate=185.46, Global Rate=159.37 | |
training/ 19:24:40, device xla:1, step 840, Rate=186.16, Global Rate=159.65 | |
training/ 19:24:47, device xla:1, step 850, Rate=182.81, Global Rate=159.89 | |
training/ 19:24:54, device xla:1, step 860, Rate=186.54, Global Rate=160.15 | |
training/ 19:25:01, device xla:1, step 870, Rate=185.59, Global Rate=160.41 | |
training/ 19:25:08, device xla:1, step 880, Rate=186.54, Global Rate=160.66 | |
training/ 19:25:15, device xla:1, step 890, Rate=187.43, Global Rate=160.92 | |
training/ 19:25:21, device xla:1, step 900, Rate=187.47, Global Rate=161.17 | |
training/ 19:25:28, device xla:1, step 910, Rate=186.96, Global Rate=161.42 | |
training/ 19:25:35, device xla:1, step 920, Rate=184.70, Global Rate=161.63 | |
training/ 19:25:42, device xla:1, step 930, Rate=186.57, Global Rate=161.86 | |
training/ 19:25:49, device xla:1, step 940, Rate=185.84, Global Rate=162.08 | |
training/ 19:25:56, device xla:1, step 950, Rate=185.63, Global Rate=162.30 | |
training/ 19:26:03, device xla:1, step 960, Rate=186.33, Global Rate=162.52 | |
training/ 19:26:10, device xla:1, step 970, Rate=186.88, Global Rate=162.74 | |
training/ 19:26:16, device xla:1, step 980, Rate=187.71, Global Rate=162.96 | |
training/ 19:26:23, device xla:1, step 990, Rate=186.77, Global Rate=163.16 | |
training/ 19:26:30, device xla:1, step 1000, Rate=187.25, Global Rate=163.37 | |
training/ 19:26:37, device xla:1, step 1010, Rate=185.91, Global Rate=163.56 | |
training/ 19:26:44, device xla:1, step 1020, Rate=186.25, Global Rate=163.76 | |
training/ 19:26:51, device xla:1, step 1030, Rate=185.78, Global Rate=163.95 | |
training/ 19:26:58, device xla:1, step 1040, Rate=185.39, Global Rate=164.13 | |
training/ 19:27:05, device xla:1, step 1050, Rate=184.32, Global Rate=164.30 | |
training/ 19:27:12, device xla:1, step 1060, Rate=182.69, Global Rate=164.46 | |
training/ 19:27:18, device xla:1, step 1070, Rate=186.17, Global Rate=164.64 | |
training/ 19:27:25, device xla:1, step 1080, Rate=186.41, Global Rate=164.81 | |
training/ 19:27:32, device xla:1, step 1090, Rate=183.01, Global Rate=164.97 | |
training/ 19:27:39, device xla:1, step 1100, Rate=184.42, Global Rate=165.13 | |
training/ 19:27:46, device xla:1, step 1110, Rate=185.77, Global Rate=165.29 | |
training/ 19:27:53, device xla:1, step 1120, Rate=186.76, Global Rate=165.46 | |
training/ 19:28:00, device xla:1, step 1130, Rate=186.92, Global Rate=165.63 | |
training/ 19:28:07, device xla:1, step 1140, Rate=186.51, Global Rate=165.79 | |
training/ 19:28:14, device xla:1, step 1150, Rate=186.30, Global Rate=165.95 | |
training/ 19:28:21, device xla:1, step 1160, Rate=185.14, Global Rate=166.10 | |
training/ 19:28:27, device xla:1, step 1170, Rate=184.13, Global Rate=166.24 | |
training/ 19:28:34, device xla:1, step 1180, Rate=185.06, Global Rate=166.39 | |
training/ 19:28:41, device xla:1, step 1190, Rate=186.86, Global Rate=166.54 | |
training/ 19:28:48, device xla:1, step 1200, Rate=186.99, Global Rate=166.69 | |
training/ 19:28:55, device xla:1, step 1210, Rate=186.67, Global Rate=166.83 | |
training/ 19:29:02, device xla:1, step 1220, Rate=186.29, Global Rate=166.97 | |
training/ 19:29:09, device xla:1, step 1230, Rate=184.54, Global Rate=167.10 | |
training/ 19:29:16, device xla:1, step 1240, Rate=185.07, Global Rate=167.23 | |
training/ 19:29:23, device xla:1, step 1250, Rate=185.30, Global Rate=167.36 | |
training/ 19:29:30, device xla:1, step 1260, Rate=185.01, Global Rate=167.48 | |
training/ 19:29:36, device xla:1, step 1270, Rate=185.68, Global Rate=167.62 | |
training/ 19:29:43, device xla:1, step 1280, Rate=186.37, Global Rate=167.75 | |
training/ 19:29:50, device xla:1, step 1290, Rate=187.15, Global Rate=167.88 | |
training/ 19:29:57, device xla:1, step 1300, Rate=186.82, Global Rate=168.01 | |
training/ 19:30:04, device xla:1, step 1310, Rate=182.35, Global Rate=168.12 | |
training/ 19:30:11, device xla:1, step 1320, Rate=186.00, Global Rate=168.25 | |
training/ 19:30:18, device xla:1, step 1330, Rate=186.45, Global Rate=168.37 | |
training/ 19:30:25, device xla:1, step 1340, Rate=184.82, Global Rate=168.48 | |
training/ 19:30:32, device xla:1, step 1350, Rate=186.35, Global Rate=168.60 | |
training/ 19:30:38, device xla:1, step 1360, Rate=186.34, Global Rate=168.72 | |
training/ 19:30:45, device xla:1, step 1370, Rate=186.30, Global Rate=168.83 | |
training/ 19:30:52, device xla:1, step 1380, Rate=185.49, Global Rate=168.94 | |
training/ 19:30:59, device xla:1, step 1390, Rate=184.25, Global Rate=169.03 | |
training/ 19:31:06, device xla:1, step 1400, Rate=185.99, Global Rate=169.15 | |
training/ 19:31:13, device xla:1, step 1410, Rate=186.44, Global Rate=169.26 | |
training/ 19:31:20, device xla:1, step 1420, Rate=183.82, Global Rate=169.36 | |
training/ 19:31:27, device xla:1, step 1430, Rate=186.68, Global Rate=169.47 | |
training/ 19:31:34, device xla:1, step 1440, Rate=186.40, Global Rate=169.57 | |
training/ 19:31:41, device xla:1, step 1450, Rate=185.86, Global Rate=169.66 | |
training/ 19:31:47, device xla:1, step 1460, Rate=185.60, Global Rate=169.76 | |
training/ 19:31:54, device xla:1, step 1470, Rate=185.72, Global Rate=169.86 | |
training/ 19:32:01, device xla:1, step 1480, Rate=186.44, Global Rate=169.96 | |
training/ 19:32:08, device xla:1, step 1490, Rate=185.70, Global Rate=170.06 | |
training/ 19:32:15, device xla:1, step 1500, Rate=185.56, Global Rate=170.15 | |
training/ 19:32:22, device xla:1, step 1510, Rate=185.26, Global Rate=170.24 | |
training/ 19:32:29, device xla:1, step 1520, Rate=185.42, Global Rate=170.33 | |
training/ 19:32:36, device xla:1, step 1530, Rate=185.11, Global Rate=170.42 | |
training/ 19:32:43, device xla:1, step 1540, Rate=185.86, Global Rate=170.51 | |
training/ 19:32:50, device xla:1, step 1550, Rate=185.25, Global Rate=170.60 | |
training/ 19:32:57, device xla:1, step 1560, Rate=184.66, Global Rate=170.68 | |
training/ 19:33:03, device xla:1, step 1570, Rate=185.51, Global Rate=170.77 | |
training/ 19:33:10, device xla:1, step 1580, Rate=186.02, Global Rate=170.86 | |
training/ 19:33:17, device xla:1, step 1590, Rate=185.21, Global Rate=170.94 | |
training/ 19:33:24, device xla:1, step 1600, Rate=185.99, Global Rate=171.03 | |
training/ 19:33:31, device xla:1, step 1610, Rate=186.51, Global Rate=171.12 | |
training/ 19:33:38, device xla:1, step 1620, Rate=183.23, Global Rate=171.19 | |
training/ 19:33:45, device xla:1, step 1630, Rate=184.66, Global Rate=171.28 | |
training/ 19:33:52, device xla:1, step 1640, Rate=184.81, Global Rate=171.35 | |
training/ 19:33:59, device xla:1, step 1650, Rate=185.64, Global Rate=171.43 | |
training/ 19:34:05, device xla:1, step 1660, Rate=184.85, Global Rate=171.51 | |
training/ 19:34:12, device xla:1, step 1670, Rate=186.26, Global Rate=171.59 | |
training/ 19:34:19, device xla:1, step 1680, Rate=186.42, Global Rate=171.67 | |
training/ 19:34:26, device xla:1, step 1690, Rate=186.36, Global Rate=171.75 | |
training/ 19:34:33, device xla:1, step 1700, Rate=182.40, Global Rate=171.81 | |
training/ 19:34:40, device xla:1, step 1710, Rate=185.47, Global Rate=171.89 | |
training/ 19:34:47, device xla:1, step 1720, Rate=186.88, Global Rate=171.97 | |
training/ 19:34:54, device xla:1, step 1730, Rate=185.80, Global Rate=172.05 | |
training/ 19:35:01, device xla:1, step 1740, Rate=183.99, Global Rate=172.11 | |
training/ 19:35:08, device xla:1, step 1750, Rate=185.54, Global Rate=172.18 | |
training/ 19:35:14, device xla:1, step 1760, Rate=187.24, Global Rate=172.26 | |
training/ 19:35:21, device xla:1, step 1770, Rate=184.00, Global Rate=172.33 | |
training/ 19:35:28, device xla:1, step 1780, Rate=187.41, Global Rate=172.40 | |
training/ 19:35:35, device xla:1, step 1790, Rate=187.14, Global Rate=172.48 | |
training/ 19:35:42, device xla:1, step 1800, Rate=182.60, Global Rate=172.54 | |
training/ 19:35:49, device xla:1, step 1810, Rate=185.93, Global Rate=172.61 | |
training/ 19:35:56, device xla:1, step 1820, Rate=184.25, Global Rate=172.68 | |
training/ 19:36:03, device xla:1, step 1830, Rate=186.53, Global Rate=172.74 | |
training/ 19:36:09, device xla:1, step 1840, Rate=186.16, Global Rate=172.81 | |
training/ 19:36:16, device xla:1, step 1850, Rate=187.09, Global Rate=172.88 | |
training/ 19:36:23, device xla:1, step 1860, Rate=186.81, Global Rate=172.94 | |
training/ 19:36:30, device xla:1, step 1870, Rate=187.45, Global Rate=173.01 | |
training/ 19:36:37, device xla:1, step 1880, Rate=183.32, Global Rate=173.07 | |
training/ 19:36:44, device xla:1, step 1890, Rate=187.30, Global Rate=173.14 | |
training/ 19:36:51, device xla:1, step 1900, Rate=186.92, Global Rate=173.21 | |
training/ 19:36:58, device xla:1, step 1910, Rate=186.06, Global Rate=173.27 | |
training/ 19:37:04, device xla:1, step 1920, Rate=186.16, Global Rate=173.33 | |
training/ 19:37:11, device xla:1, step 1930, Rate=186.65, Global Rate=173.40 | |
training/ 19:37:18, device xla:1, step 1940, Rate=186.83, Global Rate=173.46 | |
training/ 19:37:25, device xla:1, step 1950, Rate=186.19, Global Rate=173.52 | |
training/ 19:37:32, device xla:1, step 1960, Rate=185.87, Global Rate=173.58 | |
training/ 19:37:39, device xla:1, step 1970, Rate=185.04, Global Rate=173.63 | |
training/ 19:37:46, device xla:1, step 1980, Rate=184.88, Global Rate=173.69 | |
training/ 19:37:53, device xla:1, step 1990, Rate=185.78, Global Rate=173.74 | |
training/ 19:38:00, device xla:1, step 2000, Rate=185.22, Global Rate=173.79 | |
training/ 19:38:07, device xla:1, step 2010, Rate=184.74, Global Rate=173.85 | |
training/ 19:38:13, device xla:1, step 2020, Rate=186.94, Global Rate=173.91 | |
training/ 19:38:20, device xla:1, step 2030, Rate=187.25, Global Rate=173.97 | |
training/ 19:38:27, device xla:1, step 2040, Rate=186.30, Global Rate=174.02 | |
training/ 19:38:34, device xla:1, step 2050, Rate=186.69, Global Rate=174.08 | |
training/ 19:38:41, device xla:1, step 2060, Rate=184.38, Global Rate=174.13 | |
training/ 19:38:48, device xla:1, step 2070, Rate=184.52, Global Rate=174.18 | |
training/ 19:38:55, device xla:1, step 2080, Rate=184.12, Global Rate=174.22 | |
training/ 19:39:02, device xla:1, step 2090, Rate=186.34, Global Rate=174.27 | |
training/ 19:39:09, device xla:1, step 2100, Rate=185.15, Global Rate=174.33 | |
training/ 19:39:16, device xla:1, step 2110, Rate=182.75, Global Rate=174.36 | |
training/ 19:39:22, device xla:1, step 2120, Rate=187.01, Global Rate=174.42 | |
training/ 19:39:29, device xla:1, step 2130, Rate=187.61, Global Rate=174.47 | |
training/ 19:39:36, device xla:1, step 2140, Rate=187.50, Global Rate=174.53 | |
training/ 19:39:43, device xla:1, step 2150, Rate=186.67, Global Rate=174.58 | |
training/ 19:39:50, device xla:1, step 2160, Rate=185.00, Global Rate=174.63 | |
training/ 19:39:57, device xla:1, step 2170, Rate=187.03, Global Rate=174.68 | |
training/ 19:40:04, device xla:1, step 2180, Rate=184.96, Global Rate=174.72 | |
training/ 19:40:11, device xla:1, step 2190, Rate=186.88, Global Rate=174.77 | |
training/ 19:40:17, device xla:1, step 2200, Rate=187.45, Global Rate=174.83 | |
training/ 19:40:24, device xla:1, step 2210, Rate=187.77, Global Rate=174.88 | |
training/ 19:40:31, device xla:1, step 2220, Rate=185.48, Global Rate=174.92 | |
training/ 19:40:38, device xla:1, step 2230, Rate=187.14, Global Rate=174.97 | |
training/ 19:40:45, device xla:1, step 2240, Rate=187.18, Global Rate=175.02 | |
training/ 19:40:52, device xla:1, step 2250, Rate=187.61, Global Rate=175.07 | |
training/ 19:40:59, device xla:1, step 2260, Rate=183.84, Global Rate=175.11 | |
training/ 19:41:05, device xla:1, step 2270, Rate=186.06, Global Rate=175.16 | |
training/ 19:41:12, device xla:1, step 2280, Rate=185.68, Global Rate=175.20 | |
training/ 19:41:19, device xla:1, step 2290, Rate=187.09, Global Rate=175.25 | |
training/ 19:41:26, device xla:1, step 2300, Rate=185.77, Global Rate=175.30 | |
training/ 19:41:33, device xla:1, step 2310, Rate=187.42, Global Rate=175.35 | |
training/ 19:41:40, device xla:1, step 2320, Rate=185.84, Global Rate=175.39 | |
training/ 19:41:47, device xla:1, step 2330, Rate=187.35, Global Rate=175.44 | |
training/ 19:41:53, device xla:1, step 2340, Rate=186.20, Global Rate=175.48 | |
training/ 19:42:00, device xla:1, step 2350, Rate=184.80, Global Rate=175.52 | |
training/ 19:42:07, device xla:1, step 2360, Rate=185.02, Global Rate=175.55 | |
training/ 19:42:14, device xla:1, step 2370, Rate=186.69, Global Rate=175.60 | |
training/ 19:42:21, device xla:1, step 2380, Rate=185.29, Global Rate=175.64 | |
training/ 19:42:28, device xla:1, step 2390, Rate=186.32, Global Rate=175.68 | |
training/ 19:42:35, device xla:1, step 2400, Rate=184.87, Global Rate=175.72 | |
training/ 19:42:42, device xla:1, step 2410, Rate=185.60, Global Rate=175.75 | |
training/ 19:42:49, device xla:1, step 2420, Rate=184.61, Global Rate=175.79 | |
training/ 19:42:56, device xla:1, step 2430, Rate=185.29, Global Rate=175.83 | |
training/ 19:43:02, device xla:1, step 2440, Rate=186.20, Global Rate=175.87 | |
training/ 19:43:09, device xla:1, step 2450, Rate=185.62, Global Rate=175.90 | |
training/ 19:43:16, device xla:1, step 2460, Rate=185.54, Global Rate=175.94 | |
training/ 19:43:23, device xla:1, step 2470, Rate=186.19, Global Rate=175.98 | |
training/ 19:43:30, device xla:1, step 2480, Rate=187.04, Global Rate=176.02 | |
training/ 19:43:37, device xla:1, step 2490, Rate=187.18, Global Rate=176.07 | |
training/ 19:43:44, device xla:1, step 2500, Rate=185.32, Global Rate=176.10 | |
training/ 19:43:51, device xla:1, step 2510, Rate=186.24, Global Rate=176.14 | |
training/ 19:43:58, device xla:1, step 2520, Rate=184.88, Global Rate=176.17 | |
training/ 19:44:05, device xla:1, step 2530, Rate=184.22, Global Rate=176.20 | |
training/ 19:44:11, device xla:1, step 2540, Rate=186.43, Global Rate=176.24 | |
training/ 19:44:18, device xla:1, step 2550, Rate=188.05, Global Rate=176.28 | |
training/ 19:44:25, device xla:1, step 2560, Rate=186.71, Global Rate=176.32 | |
training/ 19:44:32, device xla:1, step 2570, Rate=185.65, Global Rate=176.35 | |
training/ 19:44:39, device xla:1, step 2580, Rate=186.18, Global Rate=176.39 | |
training/ 19:44:46, device xla:1, step 2590, Rate=187.28, Global Rate=176.43 | |
training/ 19:44:53, device xla:1, step 2600, Rate=187.04, Global Rate=176.46 | |
training/ 19:45:00, device xla:1, step 2610, Rate=185.51, Global Rate=176.49 | |
training/ 19:45:06, device xla:1, step 2620, Rate=185.94, Global Rate=176.53 | |
training/ 19:45:13, device xla:1, step 2630, Rate=186.56, Global Rate=176.56 | |
training/ 19:45:20, device xla:1, step 2640, Rate=186.46, Global Rate=176.60 | |
training/ 19:45:27, device xla:1, step 2650, Rate=187.12, Global Rate=176.63 | |
training/ 19:45:34, device xla:1, step 2660, Rate=186.38, Global Rate=176.67 | |
training/ 19:45:41, device xla:1, step 2670, Rate=187.29, Global Rate=176.70 | |
training/ 19:45:48, device xla:1, step 2680, Rate=186.93, Global Rate=176.74 | |
training/ 19:45:54, device xla:1, step 2690, Rate=184.46, Global Rate=176.77 | |
training/ 19:46:01, device xla:1, step 2700, Rate=186.10, Global Rate=176.80 | |
training/ 19:46:08, device xla:1, step 2710, Rate=186.33, Global Rate=176.83 | |
training/ 19:46:15, device xla:1, step 2720, Rate=186.85, Global Rate=176.87 | |
training/ 19:46:22, device xla:1, step 2730, Rate=186.76, Global Rate=176.90 | |
training/ 19:46:29, device xla:1, step 2740, Rate=185.95, Global Rate=176.93 | |
training/ 19:46:36, device xla:1, step 2750, Rate=185.79, Global Rate=176.96 | |
training/ 19:46:43, device xla:1, step 2760, Rate=186.42, Global Rate=176.99 | |
training/ 19:46:49, device xla:1, step 2770, Rate=186.29, Global Rate=177.02 | |
training/ 19:46:56, device xla:1, step 2780, Rate=185.82, Global Rate=177.05 | |
training/ 19:47:03, device xla:1, step 2790, Rate=186.96, Global Rate=177.08 | |
training/ 19:47:10, device xla:1, step 2800, Rate=185.89, Global Rate=177.11 | |
training/ 19:47:17, device xla:1, step 2810, Rate=185.72, Global Rate=177.14 | |
training/ 19:47:24, device xla:1, step 2820, Rate=185.63, Global Rate=177.17 | |
training/ 19:47:31, device xla:1, step 2830, Rate=186.92, Global Rate=177.20 | |
training/ 19:47:38, device xla:1, step 2840, Rate=186.66, Global Rate=177.23 | |
training/ 19:47:45, device xla:1, step 2850, Rate=186.86, Global Rate=177.26 | |
training/ 19:47:52, device xla:1, step 2860, Rate=185.88, Global Rate=177.29 | |
training/ 19:47:58, device xla:1, step 2870, Rate=186.70, Global Rate=177.32 | |
training/ 19:48:05, device xla:1, step 2880, Rate=184.38, Global Rate=177.34 | |
training/ 19:48:12, device xla:1, step 2890, Rate=184.60, Global Rate=177.37 | |
training/ 19:48:19, device xla:1, step 2900, Rate=186.22, Global Rate=177.39 | |
training/ 19:48:26, device xla:1, step 2910, Rate=185.67, Global Rate=177.42 | |
training/ 19:48:33, device xla:1, step 2920, Rate=185.91, Global Rate=177.45 | |
training/ 19:48:40, device xla:1, step 2930, Rate=187.05, Global Rate=177.48 | |
training/ 19:48:47, device xla:1, step 2940, Rate=186.36, Global Rate=177.51 | |
training/ 19:48:53, device xla:1, step 2950, Rate=186.66, Global Rate=177.54 | |
training/ 19:49:00, device xla:1, step 2960, Rate=185.06, Global Rate=177.56 | |
training/ 19:49:07, device xla:1, step 2970, Rate=183.32, Global Rate=177.58 | |
training/ 19:49:14, device xla:1, step 2980, Rate=185.81, Global Rate=177.61 | |
training/ 19:49:21, device xla:1, step 2990, Rate=185.97, Global Rate=177.63 | |
training/ 19:49:28, device xla:1, step 3000, Rate=185.00, Global Rate=177.65 | |
training/ 19:49:35, device xla:1, step 3010, Rate=184.52, Global Rate=177.68 | |
training/ 19:49:42, device xla:1, step 3020, Rate=184.83, Global Rate=177.70 | |
training/ 19:49:49, device xla:1, step 3030, Rate=185.96, Global Rate=177.72 | |
training/ 19:49:56, device xla:1, step 3040, Rate=184.87, Global Rate=177.75 | |
training/ 19:50:03, device xla:1, step 3050, Rate=184.97, Global Rate=177.77 | |
training/ 19:50:10, device xla:1, step 3060, Rate=184.35, Global Rate=177.79 | |
training/ 19:50:17, device xla:1, step 3070, Rate=184.86, Global Rate=177.81 | |
training/ 19:50:24, device xla:1, step 3080, Rate=184.17, Global Rate=177.83 | |
training/ 19:50:30, device xla:1, step 3090, Rate=184.89, Global Rate=177.85 | |
training/ 19:50:37, device xla:1, step 3100, Rate=185.02, Global Rate=177.87 | |
training/ 19:50:44, device xla:1, step 3110, Rate=185.20, Global Rate=177.90 | |
training/ 19:50:51, device xla:1, step 3120, Rate=185.28, Global Rate=177.92 | |
training/ 19:50:58, device xla:1, step 3130, Rate=182.50, Global Rate=177.93 | |
training/ 19:51:05, device xla:1, step 3140, Rate=184.60, Global Rate=177.95 | |
training/ 19:51:12, device xla:1, step 3150, Rate=184.41, Global Rate=177.97 | |
training/ 19:51:19, device xla:1, step 3160, Rate=184.43, Global Rate=177.99 | |
training/ 19:51:26, device xla:1, step 3170, Rate=184.85, Global Rate=178.01 | |
training/ 19:51:33, device xla:1, step 3180, Rate=184.59, Global Rate=178.03 | |
training/ 19:51:40, device xla:1, step 3190, Rate=184.01, Global Rate=178.05 | |
training/ 19:51:47, device xla:1, step 3200, Rate=184.48, Global Rate=178.07 | |
training/ 19:51:54, device xla:1, step 3210, Rate=185.00, Global Rate=178.09 | |
training/ 19:52:01, device xla:1, step 3220, Rate=184.02, Global Rate=178.11 | |
training/ 19:52:08, device xla:1, step 3230, Rate=184.78, Global Rate=178.13 | |
training/ 19:52:15, device xla:1, step 3240, Rate=184.84, Global Rate=178.15 | |
training/ 19:52:21, device xla:1, step 3250, Rate=184.98, Global Rate=178.17 | |
training/ 19:52:28, device xla:1, step 3260, Rate=182.50, Global Rate=178.18 | |
training/ 19:52:35, device xla:1, step 3270, Rate=183.98, Global Rate=178.20 | |
training/ 19:52:42, device xla:1, step 3280, Rate=184.47, Global Rate=178.22 | |
training/ 19:52:49, device xla:1, step 3290, Rate=184.91, Global Rate=178.24 | |
training/ 19:52:56, device xla:1, step 3300, Rate=185.48, Global Rate=178.26 | |
training/ 19:53:03, device xla:1, step 3310, Rate=184.60, Global Rate=178.28 | |
training/ 19:53:10, device xla:1, step 3320, Rate=185.40, Global Rate=178.30 | |
training/ 19:53:17, device xla:1, step 3330, Rate=186.50, Global Rate=178.32 | |
training/ 19:53:24, device xla:1, step 3340, Rate=185.36, Global Rate=178.34 | |
training/ 19:53:31, device xla:1, step 3350, Rate=184.37, Global Rate=178.36 | |
training/ 19:53:38, device xla:1, step 3360, Rate=182.94, Global Rate=178.37 | |
training/ 19:53:45, device xla:1, step 3370, Rate=184.70, Global Rate=178.39 | |
training/ 19:53:52, device xla:1, step 3380, Rate=185.10, Global Rate=178.41 | |
training/ 19:53:59, device xla:1, step 3390, Rate=184.81, Global Rate=178.42 | |
training/ 19:54:06, device xla:1, step 3400, Rate=184.23, Global Rate=178.44 | |
training/ 19:54:12, device xla:1, step 3410, Rate=184.59, Global Rate=178.46 | |
training/ 19:54:19, device xla:1, step 3420, Rate=182.64, Global Rate=178.47 | |
training/ 19:54:26, device xla:1, step 3430, Rate=184.44, Global Rate=178.49 | |
training/ 19:54:33, device xla:1, step 3440, Rate=185.02, Global Rate=178.51 | |
training/ 19:54:40, device xla:1, step 3450, Rate=184.51, Global Rate=178.52 | |
training/ 19:54:47, device xla:1, step 3460, Rate=182.53, Global Rate=178.53 | |
training/ 19:54:54, device xla:1, step 3470, Rate=184.23, Global Rate=178.55 | |
training/ 19:55:01, device xla:1, step 3480, Rate=184.97, Global Rate=178.57 | |
training/ 19:55:08, device xla:1, step 3490, Rate=180.61, Global Rate=178.58 | |
training/ 19:55:15, device xla:1, step 3500, Rate=184.30, Global Rate=178.59 | |
training/ 19:55:22, device xla:1, step 3510, Rate=184.60, Global Rate=178.61 | |
training/ 19:55:29, device xla:1, step 3520, Rate=184.40, Global Rate=178.63 | |
training/ 19:55:36, device xla:1, step 3530, Rate=184.22, Global Rate=178.64 | |
training/ 19:55:43, device xla:1, step 3540, Rate=184.30, Global Rate=178.66 | |
training/ 19:55:50, device xla:1, step 3550, Rate=184.92, Global Rate=178.67 | |
training/ 19:55:57, device xla:1, step 3560, Rate=180.55, Global Rate=178.68 | |
training/ 19:56:04, device xla:1, step 3570, Rate=185.13, Global Rate=178.70 | |
training/ 19:56:11, device xla:1, step 3580, Rate=185.13, Global Rate=178.72 | |
training/ 19:56:18, device xla:1, step 3590, Rate=186.37, Global Rate=178.74 | |
training/ 19:56:24, device xla:1, step 3600, Rate=185.00, Global Rate=178.75 | |
training/ 19:56:31, device xla:1, step 3610, Rate=184.01, Global Rate=178.76 | |
training/ 19:56:38, device xla:1, step 3620, Rate=184.43, Global Rate=178.78 | |
training/ 19:56:45, device xla:1, step 3630, Rate=183.48, Global Rate=178.79 | |
training/ 19:56:52, device xla:1, step 3640, Rate=184.70, Global Rate=178.81 | |
training/ 19:56:59, device xla:1, step 3650, Rate=184.28, Global Rate=178.82 | |
training/ 19:57:06, device xla:1, step 3660, Rate=184.75, Global Rate=178.84 | |
training/ 19:57:13, device xla:1, step 3670, Rate=184.62, Global Rate=178.85 | |
training/ 19:57:20, device xla:1, step 3680, Rate=184.91, Global Rate=178.87 | |
training/ 19:57:27, device xla:1, step 3690, Rate=183.08, Global Rate=178.88 | |
training/ 19:57:34, device xla:1, step 3700, Rate=185.00, Global Rate=178.90 | |
training/ 19:57:41, device xla:1, step 3710, Rate=183.67, Global Rate=178.91 | |
training/ 19:57:48, device xla:1, step 3720, Rate=184.84, Global Rate=178.92 | |
training/ 19:57:55, device xla:1, step 3730, Rate=185.10, Global Rate=178.94 | |
training/ 19:58:02, device xla:1, step 3740, Rate=184.52, Global Rate=178.95 | |
training/ 19:58:09, device xla:1, step 3750, Rate=184.10, Global Rate=178.97 | |
training/ 19:58:16, device xla:1, step 3760, Rate=184.61, Global Rate=178.98 | |
training/ 19:58:23, device xla:1, step 3770, Rate=184.77, Global Rate=179.00 | |
training/ 19:58:29, device xla:1, step 3780, Rate=184.43, Global Rate=179.01 | |
training/ 19:58:36, device xla:1, step 3790, Rate=185.06, Global Rate=179.03 | |
training/ 19:58:43, device xla:1, step 3800, Rate=184.58, Global Rate=179.04 | |
training/ 19:58:50, device xla:1, step 3810, Rate=184.80, Global Rate=179.05 | |
training/ 19:58:57, device xla:1, step 3820, Rate=185.10, Global Rate=179.07 | |
training/ 19:59:04, device xla:1, step 3830, Rate=184.53, Global Rate=179.08 | |
training/ 19:59:11, device xla:1, step 3840, Rate=183.44, Global Rate=179.09 | |
training/ 19:59:18, device xla:1, step 3850, Rate=185.00, Global Rate=179.11 | |
training/ 19:59:25, device xla:1, step 3860, Rate=182.18, Global Rate=179.12 | |
training/ 19:59:32, device xla:1, step 3870, Rate=186.18, Global Rate=179.14 | |
training/ 19:59:39, device xla:1, step 3880, Rate=185.45, Global Rate=179.15 | |
training/ 19:59:46, device xla:1, step 3890, Rate=184.87, Global Rate=179.17 | |
training/ 19:59:53, device xla:1, step 3900, Rate=185.05, Global Rate=179.18 | |
training/ 20:00:00, device xla:1, step 3910, Rate=184.54, Global Rate=179.19 | |
training/ 20:00:07, device xla:1, step 3920, Rate=184.50, Global Rate=179.20 | |
training/ 20:00:13, device xla:1, step 3930, Rate=184.12, Global Rate=179.22 | |
training/ 20:00:20, device xla:1, step 3940, Rate=183.98, Global Rate=179.23 | |
training/ 20:00:27, device xla:1, step 3950, Rate=184.81, Global Rate=179.24 | |
training/ 20:00:34, device xla:1, step 3960, Rate=184.47, Global Rate=179.26 | |
training/ 20:00:41, device xla:1, step 3970, Rate=183.64, Global Rate=179.27 | |
training/ 20:00:48, device xla:1, step 3980, Rate=184.64, Global Rate=179.28 | |
training/ 20:00:55, device xla:1, step 3990, Rate=185.54, Global Rate=179.30 | |
training/ 20:01:02, device xla:1, step 4000, Rate=184.86, Global Rate=179.31 | |
training/ 20:01:09, device xla:1, step 4010, Rate=183.76, Global Rate=179.32 | |
training/ 20:01:16, device xla:1, step 4020, Rate=184.67, Global Rate=179.33 | |
training/ 20:01:23, device xla:1, step 4030, Rate=184.09, Global Rate=179.34 | |
training/ 20:01:30, device xla:1, step 4040, Rate=183.91, Global Rate=179.35 | |
training/ 20:01:37, device xla:1, step 4050, Rate=184.08, Global Rate=179.36 | |
training/ 20:01:44, device xla:1, step 4060, Rate=183.66, Global Rate=179.37 | |
training/ 20:01:51, device xla:1, step 4070, Rate=185.29, Global Rate=179.39 | |
training/ 20:01:58, device xla:1, step 4080, Rate=184.99, Global Rate=179.40 | |
training/ 20:02:05, device xla:1, step 4090, Rate=184.46, Global Rate=179.41 | |
training/ 20:02:11, device xla:1, step 4100, Rate=185.81, Global Rate=179.43 | |
training/ 20:02:18, device xla:1, step 4110, Rate=181.29, Global Rate=179.43 | |
training/ 20:02:25, device xla:1, step 4120, Rate=183.51, Global Rate=179.45 | |
training/ 20:02:32, device xla:1, step 4130, Rate=184.93, Global Rate=179.46 | |
training/ 20:02:39, device xla:1, step 4140, Rate=184.43, Global Rate=179.47 | |
training/ 20:02:46, device xla:1, step 4150, Rate=185.54, Global Rate=179.48 | |
training/ 20:02:53, device xla:1, step 4160, Rate=186.84, Global Rate=179.50 | |
training/ 20:03:00, device xla:1, step 4170, Rate=186.37, Global Rate=179.52 | |
training/ 20:03:07, device xla:1, step 4180, Rate=186.73, Global Rate=179.53 | |
training/ 20:03:14, device xla:1, step 4190, Rate=186.04, Global Rate=179.54 | |
training/ 20:03:21, device xla:1, step 4200, Rate=185.56, Global Rate=179.56 | |
training/ 20:03:28, device xla:1, step 4210, Rate=185.96, Global Rate=179.57 | |
training/ 20:03:34, device xla:1, step 4220, Rate=185.33, Global Rate=179.58 | |
training/ 20:03:41, device xla:1, step 4230, Rate=184.41, Global Rate=179.60 | |
training/ 20:03:48, device xla:1, step 4240, Rate=186.06, Global Rate=179.61 | |
training/ 20:03:55, device xla:1, step 4250, Rate=185.43, Global Rate=179.62 | |
training/ 20:04:02, device xla:1, step 4260, Rate=184.33, Global Rate=179.63 | |
training/ 20:04:09, device xla:1, step 4270, Rate=182.67, Global Rate=179.64 | |
training/ 20:04:16, device xla:1, step 4280, Rate=184.95, Global Rate=179.65 | |
training/ 20:04:23, device xla:1, step 4290, Rate=185.47, Global Rate=179.66 | |
training/ 20:04:30, device xla:1, step 4300, Rate=185.81, Global Rate=179.68 | |
training/ 20:04:37, device xla:1, step 4310, Rate=184.13, Global Rate=179.68 | |
training/ 20:04:44, device xla:1, step 4320, Rate=186.04, Global Rate=179.70 | |
training/ 20:04:51, device xla:1, step 4330, Rate=184.92, Global Rate=179.71 | |
training/ 20:04:58, device xla:1, step 4340, Rate=184.99, Global Rate=179.72 | |
training/ 20:05:05, device xla:1, step 4350, Rate=184.67, Global Rate=179.73 | |
training/ 20:05:12, device xla:1, step 4360, Rate=183.92, Global Rate=179.74 | |
training/ 20:05:18, device xla:1, step 4370, Rate=185.62, Global Rate=179.75 | |
training/ 20:05:25, device xla:1, step 4380, Rate=186.31, Global Rate=179.77 | |
training/ 20:05:32, device xla:1, step 4390, Rate=182.13, Global Rate=179.77 | |
training/ 20:05:39, device xla:1, step 4400, Rate=184.22, Global Rate=179.78 | |
training/ 20:05:46, device xla:1, step 4410, Rate=184.41, Global Rate=179.80 | |
training/ 20:05:53, device xla:1, step 4420, Rate=185.14, Global Rate=179.81 | |
training/ 20:06:00, device xla:1, step 4430, Rate=185.52, Global Rate=179.82 | |
training/ 20:06:07, device xla:1, step 4440, Rate=185.96, Global Rate=179.83 | |
training/ 20:06:14, device xla:1, step 4450, Rate=185.45, Global Rate=179.84 | |
training/ 20:06:21, device xla:1, step 4460, Rate=185.09, Global Rate=179.86 | |
training/ 20:06:28, device xla:1, step 4470, Rate=185.39, Global Rate=179.87 | |
training/ 20:06:35, device xla:1, step 4480, Rate=185.03, Global Rate=179.88 | |
training/ 20:06:41, device xla:1, step 4490, Rate=185.32, Global Rate=179.89 | |
training/ 20:06:48, device xla:1, step 4500, Rate=184.37, Global Rate=179.90 | |
training/ 20:06:55, device xla:1, step 4510, Rate=185.33, Global Rate=179.91 | |
training/ 20:07:02, device xla:1, step 4520, Rate=185.21, Global Rate=179.92 | |
training/ 20:07:09, device xla:1, step 4530, Rate=181.29, Global Rate=179.93 | |
training/ 20:07:16, device xla:1, step 4540, Rate=184.37, Global Rate=179.94 | |
training/ 20:07:23, device xla:1, step 4550, Rate=185.50, Global Rate=179.95 | |
training/ 20:07:30, device xla:1, step 4560, Rate=186.30, Global Rate=179.96 | |
training/ 20:07:37, device xla:1, step 4570, Rate=186.17, Global Rate=179.98 | |
training/ 20:07:44, device xla:1, step 4580, Rate=185.64, Global Rate=179.99 | |
training/ 20:07:51, device xla:1, step 4590, Rate=185.36, Global Rate=180.00 | |
training/ 20:07:58, device xla:1, step 4600, Rate=184.98, Global Rate=180.01 | |
training/ 20:08:04, device xla:1, step 4610, Rate=185.62, Global Rate=180.02 | |
training/ 20:08:11, device xla:1, step 4620, Rate=185.13, Global Rate=180.03 | |
training/ 20:08:18, device xla:1, step 4630, Rate=185.40, Global Rate=180.04 | |
training/ 20:08:25, device xla:1, step 4640, Rate=184.19, Global Rate=180.05 | |
training/ 20:08:32, device xla:1, step 4650, Rate=185.39, Global Rate=180.06 | |
training/ 20:08:39, device xla:1, step 4660, Rate=185.08, Global Rate=180.07 | |
training/ 20:08:46, device xla:1, step 4670, Rate=184.83, Global Rate=180.08 | |
training/ 20:08:53, device xla:1, step 4680, Rate=184.92, Global Rate=180.09 | |
training/ 20:09:00, device xla:1, step 4690, Rate=184.18, Global Rate=180.10 | |
training/ 20:09:07, device xla:1, step 4700, Rate=181.46, Global Rate=180.11 | |
training/ 20:09:14, device xla:1, step 4710, Rate=184.14, Global Rate=180.12 | |
training/ 20:09:21, device xla:1, step 4720, Rate=184.61, Global Rate=180.13 | |
training/ 20:09:28, device xla:1, step 4730, Rate=184.62, Global Rate=180.13 | |
training/ 20:09:35, device xla:1, step 4740, Rate=185.51, Global Rate=180.14 | |
training/ 20:09:42, device xla:1, step 4750, Rate=184.28, Global Rate=180.15 | |
training/ 20:09:48, device xla:1, step 4760, Rate=184.91, Global Rate=180.16 | |
training/ 20:09:55, device xla:1, step 4770, Rate=185.63, Global Rate=180.17 | |
training/ 20:10:02, device xla:1, step 4780, Rate=185.33, Global Rate=180.18 | |
training/ 20:10:09, device xla:1, step 4790, Rate=185.73, Global Rate=180.19 | |
training/ 20:10:16, device xla:1, step 4800, Rate=185.66, Global Rate=180.20 | |
training/ 20:10:23, device xla:1, step 4810, Rate=185.84, Global Rate=180.22 | |
training/ 20:10:30, device xla:1, step 4820, Rate=185.04, Global Rate=180.22 | |
training/ 20:10:37, device xla:1, step 4830, Rate=184.70, Global Rate=180.23 | |
training/ 20:10:44, device xla:1, step 4840, Rate=183.76, Global Rate=180.24 | |
training/ 20:10:51, device xla:1, step 4850, Rate=184.20, Global Rate=180.25 | |
training/ 20:10:58, device xla:1, step 4860, Rate=182.48, Global Rate=180.26 | |
training/ 20:11:05, device xla:1, step 4870, Rate=185.28, Global Rate=180.27 | |
training/ 20:11:12, device xla:1, step 4880, Rate=184.99, Global Rate=180.28 | |
training/ 20:11:18, device xla:1, step 4890, Rate=186.69, Global Rate=180.29 | |
training/ 20:11:25, device xla:1, step 4900, Rate=186.26, Global Rate=180.30 | |
training/ 20:11:32, device xla:1, step 4910, Rate=185.47, Global Rate=180.31 | |
training/ 20:11:39, device xla:1, step 4920, Rate=186.24, Global Rate=180.32 | |
training/ 20:11:46, device xla:1, step 4930, Rate=185.97, Global Rate=180.33 | |
training/ 20:11:53, device xla:1, step 4940, Rate=185.74, Global Rate=180.34 | |
training/ 20:12:00, device xla:1, step 4950, Rate=185.92, Global Rate=180.35 | |
training/ 20:12:07, device xla:1, step 4960, Rate=184.29, Global Rate=180.36 | |
training/ 20:12:14, device xla:1, step 4970, Rate=184.20, Global Rate=180.37 | |
training/ 20:12:21, device xla:1, step 4980, Rate=182.72, Global Rate=180.37 | |
training/ 20:12:28, device xla:1, step 4990, Rate=185.12, Global Rate=180.38 | |
training/ 20:12:34, device xla:1, step 5000, Rate=185.75, Global Rate=180.39 | |
training/ 20:12:41, device xla:1, step 5010, Rate=185.78, Global Rate=180.40 | |
training/ 20:12:48, device xla:1, step 5020, Rate=186.27, Global Rate=180.41 | |
training/ 20:12:55, device xla:1, step 5030, Rate=185.28, Global Rate=180.42 | |
training/ 20:13:02, device xla:1, step 5040, Rate=184.89, Global Rate=180.43 | |
training/ 20:13:09, device xla:1, step 5050, Rate=184.21, Global Rate=180.44 | |
training/ 20:13:16, device xla:1, step 5060, Rate=182.68, Global Rate=180.44 | |
training/ 20:13:23, device xla:1, step 5070, Rate=185.52, Global Rate=180.45 | |
training/ 20:13:30, device xla:1, step 5080, Rate=185.04, Global Rate=180.46 | |
training/ 20:13:37, device xla:1, step 5090, Rate=184.76, Global Rate=180.47 | |
training/ 20:13:44, device xla:1, step 5100, Rate=184.64, Global Rate=180.48 | |
training/ 20:13:51, device xla:1, step 5110, Rate=185.83, Global Rate=180.49 | |
training/ 20:13:58, device xla:1, step 5120, Rate=181.69, Global Rate=180.49 | |
training/ 20:14:04, device xla:1, step 5130, Rate=185.52, Global Rate=180.50 | |
training/ 20:14:11, device xla:1, step 5140, Rate=185.26, Global Rate=180.51 | |
training/ 20:14:18, device xla:1, step 5150, Rate=185.32, Global Rate=180.52 | |
training/ 20:14:25, device xla:1, step 5160, Rate=185.56, Global Rate=180.53 | |
training/ 20:14:32, device xla:1, step 5170, Rate=184.30, Global Rate=180.53 | |
training/ 20:14:39, device xla:1, step 5180, Rate=185.40, Global Rate=180.54 | |
training/ 20:14:46, device xla:1, step 5190, Rate=184.32, Global Rate=180.55 | |
training/ 20:14:53, device xla:1, step 5200, Rate=186.25, Global Rate=180.56 | |
training/ 20:15:00, device xla:1, step 5210, Rate=186.09, Global Rate=180.57 | |
training/ 20:15:07, device xla:1, step 5220, Rate=186.04, Global Rate=180.58 | |
training/ 20:15:14, device xla:1, step 5230, Rate=185.66, Global Rate=180.59 | |
training/ 20:15:21, device xla:1, step 5240, Rate=184.36, Global Rate=180.60 | |
training/ 20:15:27, device xla:1, step 5250, Rate=185.18, Global Rate=180.60 | |
training/ 20:15:34, device xla:1, step 5260, Rate=185.04, Global Rate=180.61 | |
training/ 20:15:41, device xla:1, step 5270, Rate=185.70, Global Rate=180.62 | |
training/ 20:15:48, device xla:1, step 5280, Rate=186.23, Global Rate=180.63 | |
training/ 20:15:55, device xla:1, step 5290, Rate=185.61, Global Rate=180.64 | |
training/ 20:16:02, device xla:1, step 5300, Rate=185.99, Global Rate=180.65 | |
training/ 20:16:09, device xla:1, step 5310, Rate=185.52, Global Rate=180.65 | |
training/ 20:16:16, device xla:1, step 5320, Rate=185.40, Global Rate=180.66 | |
training/ 20:16:23, device xla:1, step 5330, Rate=186.41, Global Rate=180.67 | |
training/ 20:16:30, device xla:1, step 5340, Rate=184.68, Global Rate=180.68 | |
training/ 20:16:37, device xla:1, step 5350, Rate=186.40, Global Rate=180.69 | |
training/ 20:16:44, device xla:1, step 5360, Rate=183.49, Global Rate=180.69 | |
training/ 20:16:50, device xla:1, step 5370, Rate=184.93, Global Rate=180.70 | |
training/ 20:16:57, device xla:1, step 5380, Rate=184.91, Global Rate=180.71 | |
training/ 20:17:04, device xla:1, step 5390, Rate=185.72, Global Rate=180.72 | |
training/ 20:17:11, device xla:1, step 5400, Rate=184.79, Global Rate=180.73 | |
training/ 20:17:18, device xla:1, step 5410, Rate=185.15, Global Rate=180.73 | |
training/ 20:17:25, device xla:1, step 5420, Rate=184.92, Global Rate=180.74 | |
training/ 20:17:32, device xla:1, step 5430, Rate=185.82, Global Rate=180.75 | |
training/ 20:17:39, device xla:1, step 5440, Rate=186.08, Global Rate=180.76 | |
training/ 20:17:46, device xla:1, step 5450, Rate=186.40, Global Rate=180.77 | |
training/ 20:17:53, device xla:1, step 5460, Rate=185.79, Global Rate=180.78 | |
training/ 20:17:59, device xla:1, step 5470, Rate=186.32, Global Rate=180.79 | |
training/ 20:18:06, device xla:1, step 5480, Rate=182.10, Global Rate=180.79 | |
training/ 20:18:13, device xla:1, step 5490, Rate=185.62, Global Rate=180.80 | |
training/ 20:18:20, device xla:1, step 5500, Rate=185.15, Global Rate=180.81 | |
training/ 20:18:27, device xla:1, step 5510, Rate=184.24, Global Rate=180.81 | |
training/ 20:18:34, device xla:1, step 5520, Rate=184.59, Global Rate=180.82 | |
training/ 20:18:41, device xla:1, step 5530, Rate=184.66, Global Rate=180.82 | |
training/ 20:18:48, device xla:1, step 5540, Rate=185.58, Global Rate=180.83 | |
training/ 20:18:55, device xla:1, step 5550, Rate=185.13, Global Rate=180.84 | |
training/ 20:19:02, device xla:1, step 5560, Rate=185.48, Global Rate=180.85 | |
training/ 20:19:09, device xla:1, step 5570, Rate=184.70, Global Rate=180.85 | |
training/ 20:19:16, device xla:1, step 5580, Rate=184.12, Global Rate=180.86 | |
training/ 20:19:23, device xla:1, step 5590, Rate=185.93, Global Rate=180.87 | |
training/ 20:19:30, device xla:1, step 5600, Rate=185.26, Global Rate=180.88 | |
training/ 20:19:36, device xla:1, step 5610, Rate=185.02, Global Rate=180.88 | |
training/ 20:19:43, device xla:1, step 5620, Rate=185.30, Global Rate=180.89 | |
training/ 20:19:50, device xla:1, step 5630, Rate=185.09, Global Rate=180.90 | |
training/ 20:19:57, device xla:1, step 5640, Rate=185.54, Global Rate=180.90 | |
training/ 20:20:04, device xla:1, step 5650, Rate=182.91, Global Rate=180.91 | |
training/ 20:20:11, device xla:1, step 5660, Rate=183.52, Global Rate=180.91 | |
training/ 20:20:18, device xla:1, step 5670, Rate=184.21, Global Rate=180.92 | |
training/ 20:20:25, device xla:1, step 5680, Rate=185.17, Global Rate=180.93 | |
training/ 20:20:32, device xla:1, step 5690, Rate=184.65, Global Rate=180.93 | |
training/ 20:20:39, device xla:1, step 5700, Rate=184.55, Global Rate=180.94 | |
training/ 20:20:46, device xla:1, step 5710, Rate=183.17, Global Rate=180.94 | |
training/ 20:20:53, device xla:1, step 5720, Rate=184.56, Global Rate=180.95 | |
training/ 20:21:00, device xla:1, step 5730, Rate=184.97, Global Rate=180.96 | |
training/ 20:21:07, device xla:1, step 5740, Rate=185.47, Global Rate=180.96 | |
training/ 20:21:14, device xla:1, step 5750, Rate=185.45, Global Rate=180.97 | |
training/ 20:21:20, device xla:1, step 5760, Rate=185.80, Global Rate=180.98 | |
training/ 20:21:27, device xla:1, step 5770, Rate=185.58, Global Rate=180.99 | |
training/ 20:21:34, device xla:1, step 5780, Rate=185.28, Global Rate=180.99 | |
training/ 20:21:41, device xla:1, step 5790, Rate=184.46, Global Rate=181.00 | |
training/ 20:21:48, device xla:1, step 5800, Rate=185.36, Global Rate=181.01 | |
training/ 20:21:55, device xla:1, step 5810, Rate=182.88, Global Rate=181.01 | |
training/ 20:22:02, device xla:1, step 5820, Rate=183.70, Global Rate=181.02 | |
training/ 20:22:09, device xla:1, step 5830, Rate=184.42, Global Rate=181.02 | |
training/ 20:22:16, device xla:1, step 5840, Rate=184.25, Global Rate=181.03 | |
training/ 20:22:23, device xla:1, step 5850, Rate=184.68, Global Rate=181.03 | |
training/ 20:22:30, device xla:1, step 5860, Rate=186.29, Global Rate=181.04 | |
training/ 20:22:37, device xla:1, step 5870, Rate=186.18, Global Rate=181.05 | |
training/ 20:22:43, device xla:1, step 5880, Rate=185.92, Global Rate=181.06 | |
training/ 20:22:50, device xla:1, step 5890, Rate=186.62, Global Rate=181.07 | |
training/ 20:22:57, device xla:1, step 5900, Rate=186.12, Global Rate=181.07 | |
training/ 20:23:04, device xla:1, step 5910, Rate=186.18, Global Rate=181.08 | |
training/ 20:23:11, device xla:1, step 5920, Rate=185.46, Global Rate=181.09 | |
training/ 20:23:18, device xla:1, step 5930, Rate=185.46, Global Rate=181.10 | |
training/ 20:23:25, device xla:1, step 5940, Rate=184.59, Global Rate=181.10 | |
training/ 20:23:32, device xla:1, step 5950, Rate=184.78, Global Rate=181.11 | |
training/ 20:23:39, device xla:1, step 5960, Rate=185.98, Global Rate=181.12 | |
training/ 20:23:46, device xla:1, step 5970, Rate=185.91, Global Rate=181.12 | |
training/ 20:23:52, device xla:1, step 5980, Rate=186.22, Global Rate=181.13 | |
training/ 20:23:59, device xla:1, step 5990, Rate=185.64, Global Rate=181.14 | |
training/ 20:24:06, device xla:1, step 6000, Rate=185.40, Global Rate=181.14 | |
training/ 20:24:13, device xla:1, step 6010, Rate=185.85, Global Rate=181.15 | |
training/ 20:24:20, device xla:1, step 6020, Rate=185.61, Global Rate=181.16 | |
training/ 20:24:27, device xla:1, step 6030, Rate=182.92, Global Rate=181.16 | |
training/ 20:24:34, device xla:1, step 6040, Rate=184.37, Global Rate=181.17 | |
training/ 20:24:41, device xla:1, step 6050, Rate=185.14, Global Rate=181.18 | |
training/ 20:24:48, device xla:1, step 6060, Rate=183.93, Global Rate=181.18 | |
training/ 20:24:55, device xla:1, step 6070, Rate=185.73, Global Rate=181.19 | |
training/ 20:25:02, device xla:1, step 6080, Rate=186.07, Global Rate=181.20 | |
training/ 20:25:09, device xla:1, step 6090, Rate=185.70, Global Rate=181.20 | |
training/ 20:25:15, device xla:1, step 6100, Rate=184.13, Global Rate=181.21 | |
training/ 20:25:22, device xla:1, step 6110, Rate=185.77, Global Rate=181.21 | |
training/ 20:25:29, device xla:1, step 6120, Rate=185.89, Global Rate=181.22 | |
training/ 20:25:36, device xla:1, step 6130, Rate=184.87, Global Rate=181.23 | |
training/ 20:25:43, device xla:1, step 6140, Rate=182.43, Global Rate=181.23 | |
training/ 20:25:50, device xla:1, step 6150, Rate=184.29, Global Rate=181.24 | |
training/ 20:25:57, device xla:1, step 6160, Rate=185.89, Global Rate=181.24 | |
training/ 20:26:04, device xla:1, step 6170, Rate=181.09, Global Rate=181.25 | |
training/ 20:26:11, device xla:1, step 6180, Rate=185.26, Global Rate=181.25 | |
training/ 20:26:18, device xla:1, step 6190, Rate=184.15, Global Rate=181.26 | |
training/ 20:26:25, device xla:1, step 6200, Rate=184.21, Global Rate=181.26 | |
training/ 20:26:32, device xla:1, step 6210, Rate=183.63, Global Rate=181.26 | |
training/ 20:26:39, device xla:1, step 6220, Rate=185.38, Global Rate=181.27 | |
training/ 20:26:46, device xla:1, step 6230, Rate=185.63, Global Rate=181.28 | |
training/ 20:26:53, device xla:1, step 6240, Rate=186.51, Global Rate=181.28 | |
training/ 20:26:59, device xla:1, step 6250, Rate=186.07, Global Rate=181.29 | |
training/ 20:27:06, device xla:1, step 6260, Rate=186.09, Global Rate=181.30 | |
training/ 20:27:13, device xla:1, step 6270, Rate=183.59, Global Rate=181.30 | |
training/ 20:27:20, device xla:1, step 6280, Rate=184.84, Global Rate=181.31 | |
training/ 20:27:27, device xla:1, step 6290, Rate=185.35, Global Rate=181.31 | |
training/ 20:27:34, device xla:1, step 6300, Rate=185.60, Global Rate=181.32 | |
training/ 20:27:41, device xla:1, step 6310, Rate=185.61, Global Rate=181.32 | |
training/ 20:27:48, device xla:1, step 6320, Rate=184.65, Global Rate=181.33 | |
training/ 20:27:55, device xla:1, step 6330, Rate=186.02, Global Rate=181.34 | |
training/ 20:28:02, device xla:1, step 6340, Rate=185.89, Global Rate=181.34 | |
training/ 20:28:09, device xla:1, step 6350, Rate=184.92, Global Rate=181.35 | |
training/ 20:28:16, device xla:1, step 6360, Rate=184.23, Global Rate=181.35 | |
training/ 20:28:22, device xla:1, step 6370, Rate=183.99, Global Rate=181.36 | |
training/ 20:28:29, device xla:1, step 6380, Rate=185.29, Global Rate=181.36 | |
training/ 20:28:36, device xla:1, step 6390, Rate=186.31, Global Rate=181.37 | |
training/ 20:28:43, device xla:1, step 6400, Rate=185.47, Global Rate=181.38 | |
training/ 20:28:50, device xla:1, step 6410, Rate=185.67, Global Rate=181.38 | |
training/ 20:28:57, device xla:1, step 6420, Rate=185.41, Global Rate=181.39 | |
training/ 20:29:04, device xla:1, step 6430, Rate=186.25, Global Rate=181.40 | |
training/ 20:29:11, device xla:1, step 6440, Rate=185.70, Global Rate=181.40./rmyle-tpu.sh: line 63: 49584 Killed python tpu-examples/fairseq_train_tpu.py $data_path --arch=transformer_vaswani_wmt_en_de_big --max-sentences=$batch_size --max-sentences-valid=$batch_size --max-source-positions=$n_words --max-target-positions=$n_words --required-batch-size-multiple=$batch_size --no-save --attention-dropout=0.1 --no-progress-bar --criterion=label_smoothed_cross_entropy --log-interval=100 --source-lang=en --lr-scheduler=inverse_sqrt --min-lr 1e-09 --skip-invalid-size-inputs-valid-test --target-lang=de --label-smoothing=0.1 --update-freq=1 --optimizer adam --adam-betas '(0.9, 0.98)' --warmup-init-lr 1e-07 --lr 0.0005 --warmup-updates 4000 --share-all-embeddings --dropout 0.3 --weight-decay 0.0 --valid-subset=valid --curriculum=4 --max-epoch=50 --num_cores=1 --metrics_debug --pad_to_length=$n_words --log_steps=10 | |
Fri Aug 16 20:30:10 UTC 2019 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment