Skip to content

Instantly share code, notes, and snippets.

@taylanbil
Created August 19, 2019 16:56
Show Gist options
  • Save taylanbil/92abf01709ce26d7fb8f701dd273abd5 to your computer and use it in GitHub Desktop.
Save taylanbil/92abf01709ce26d7fb8f701dd273abd5 to your computer and use it in GitHub Desktop.
Fairseq Transformer on 1 TPU, full dataset - measure 100 steps.
Fri Aug 16 19:11:06 UTC 2019
#!/bin/bash
batch_size=128
n_words=64
data_path=/home/taylanbil/data/wmt18_en_de_bpej32k
#data_path=/home/taylanbil/data/dummy
#conda activate pytorch
pkill -9 python
TPU_IP_ADDRESS=10.1.2.2 # nightly
TPU_IP_ADDRESS=10.1.4.2 # nightly
#export XLA_USE_32BIT_LONG=1
#export XLA_IR_DEBUG=1
#export XLA_HLO_DEBUG=1
#export GET_TENSORS_OPBYOP=1
#export SYNC_TENSORS_OPBYOP=1
#export XLA_SAVE_TENSORS_FILE=$tensors_dir/${taskname}_tensors.txt
#export TRIM_GRAPH_SIZE=50000
#export XLA_SYNC_WAIT=1
export XRT_TPU_CONFIG="tpu_worker;0;$TPU_IP_ADDRESS:8470"
other_flags="
--disable-validation \
--max-tokens=4096 \ # has no effect w/ TPUS
--num-workers=8 \
"
#LD_PRELOAD=/usr/lib/libtcmalloc_minimal.so.4 python tpu-examples/fairseq_train_tpu.py \
python tpu-examples/fairseq_train_tpu.py \
$data_path \
--arch=transformer_vaswani_wmt_en_de_big \
--max-sentences=$batch_size \
--max-sentences-valid=$batch_size \
--max-source-positions=$n_words \
--max-target-positions=$n_words \
--required-batch-size-multiple=$batch_size \
--no-save \
--attention-dropout=0.1 \
--no-progress-bar \
--criterion=label_smoothed_cross_entropy \
--log-interval=100 \
--source-lang=en \
--lr-scheduler=inverse_sqrt \
--min-lr 1e-09 \
--skip-invalid-size-inputs-valid-test \
--target-lang=de \
--label-smoothing=0.1 \
--update-freq=1 \
--optimizer adam \
--adam-betas '(0.9, 0.98)' \
--warmup-init-lr 1e-07 \
--lr 0.0005 \
--warmup-updates 4000 \
--share-all-embeddings \
--dropout 0.3 \
--weight-decay 0.0 \
--valid-subset=valid \
--curriculum=4 \
--max-epoch=50 \
--num_cores=1 \
--metrics_debug \
--pad_to_length=$n_words \
--log_steps=10
--------------
nohup: ignoring input
2019-08-16 19:11:07.384481: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) CPU:0 -> /job:tpu_worker/replica:0/task:0/device:XLA_CPU:0
2019-08-16 19:11:07.384547: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:0 -> /job:tpu_worker/replica:0/task:0/device:TPU:0
2019-08-16 19:11:07.384555: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:1 -> /job:tpu_worker/replica:0/task:0/device:TPU:1
2019-08-16 19:11:07.384561: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:2 -> /job:tpu_worker/replica:0/task:0/device:TPU:2
2019-08-16 19:11:07.384567: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:3 -> /job:tpu_worker/replica:0/task:0/device:TPU:3
2019-08-16 19:11:07.384572: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:4 -> /job:tpu_worker/replica:0/task:0/device:TPU:4
2019-08-16 19:11:07.384578: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:5 -> /job:tpu_worker/replica:0/task:0/device:TPU:5
2019-08-16 19:11:07.384584: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:6 -> /job:tpu_worker/replica:0/task:0/device:TPU:6
2019-08-16 19:11:07.384589: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) TPU:7 -> /job:tpu_worker/replica:0/task:0/device:TPU:7
2019-08-16 19:11:07.384617: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:200] Worker grpc://10.1.4.2:8470 for /job:tpu_worker/replica:0/task:0
2019-08-16 19:11:07.384623: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:204] XRT default device: TPU:0
2019-08-16 19:11:07.386730: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:1086] Configuring TPU for worker tpu_worker:0 at grpc://10.1.4.2:8470
2019-08-16 19:11:10.203782: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:1102] TPU topology: mesh_shape: 2
mesh_shape: 2
mesh_shape: 2
num_tasks: 1
num_tpu_devices_per_task: 8
device_coordinates: 0
device_coordinates: 0
device_coordinates: 0
device_coordinates: 0
device_coordinates: 0
device_coordinates: 1
device_coordinates: 0
device_coordinates: 1
device_coordinates: 0
device_coordinates: 0
device_coordinates: 1
device_coordinates: 1
device_coordinates: 1
device_coordinates: 0
device_coordinates: 0
device_coordinates: 1
device_coordinates: 0
device_coordinates: 1
device_coordinates: 1
device_coordinates: 1
device_coordinates: 0
device_coordinates: 1
device_coordinates: 1
device_coordinates: 1
| [en] dictionary: 35662 types
| [de] dictionary: 35662 types
| /home/taylanbil/data/wmt18_en_de_bpej32k valid en-de 52385 examples
TransformerModel(
(encoder): TransformerEncoder(
(embed_tokens): Embedding(35662, 1024, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(1): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(2): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(3): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(4): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(5): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
(decoder): TransformerDecoder(
(embed_tokens): Embedding(35662, 1024, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(1): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(2): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(3): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(4): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(5): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
)
| model transformer_vaswani_wmt_en_de_big, criterion LabelSmoothedCrossEntropyCriterion
| num. model params: 212875264 (num. trained: 212875264)
| no existing checkpoint found checkpoints/checkpoint_last.pt
| loading train data for epoch 0
| /home/taylanbil/data/wmt18_en_de_bpej32k train en-de 5186259 examples
| WARNING: 240829 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[1422704, 2718830, 2897878, 3673048, 2016896, 2200333, 3886976, 2097242, 3124502, 2871279]
Epoch 1 begin 19:12:18
training/ 19:13:50, device xla:1, step 10, Rate=147.54, Global Rate=54.28
training/ 19:13:57, device xla:1, step 20, Rate=181.96, Global Rate=83.93
training/ 19:14:04, device xla:1, step 30, Rate=186.06, Global Rate=102.69
training/ 19:14:11, device xla:1, step 40, Rate=188.02, Global Rate=115.84
training/ 19:14:18, device xla:1, step 50, Rate=187.74, Global Rate=125.44
training/ 19:14:25, device xla:1, step 60, Rate=185.40, Global Rate=132.60
training/ 19:14:31, device xla:1, step 70, Rate=188.47, Global Rate=138.48
training/ 19:14:38, device xla:1, step 80, Rate=188.61, Global Rate=143.22
training/ 19:14:45, device xla:1, step 90, Rate=186.87, Global Rate=147.07
training/ 19:14:52, device xla:1, step 100, Rate=186.01, Global Rate=150.22
training/ 19:14:59, device xla:1, step 110, Rate=187.23, Global Rate=152.94
training/ 19:15:05, device xla:1, step 120, Rate=187.16, Global Rate=155.34
training/ 19:15:12, device xla:1, step 130, Rate=185.56, Global Rate=157.29
training/ 19:15:19, device xla:1, step 140, Rate=185.94, Global Rate=159.04
training/ 19:15:26, device xla:1, step 150, Rate=186.13, Global Rate=160.60
training/ 19:15:33, device xla:1, step 160, Rate=186.80, Global Rate=162.02
training/ 19:15:40, device xla:1, step 170, Rate=187.05, Global Rate=163.31
training/ 19:15:47, device xla:1, step 180, Rate=187.78, Global Rate=164.50
training/ 19:15:53, device xla:1, step 190, Rate=186.71, Global Rate=165.55
training/ 19:16:00, device xla:1, step 200, Rate=187.25, Global Rate=166.49
training/ 19:16:07, device xla:1, step 210, Rate=187.82, Global Rate=167.38
training/ 19:16:14, device xla:1, step 220, Rate=187.15, Global Rate=168.20
training/ 19:16:21, device xla:1, step 230, Rate=186.59, Global Rate=168.93
training/ 19:16:28, device xla:1, step 240, Rate=186.66, Global Rate=169.58
training/ 19:16:35, device xla:1, step 250, Rate=186.69, Global Rate=170.20
training/ 19:16:41, device xla:1, step 260, Rate=187.25, Global Rate=170.79
training/ 19:16:48, device xla:1, step 270, Rate=187.82, Global Rate=171.38
training/ 19:16:55, device xla:1, step 280, Rate=187.10, Global Rate=171.90
training/ 19:17:02, device xla:1, step 290, Rate=186.56, Global Rate=172.36
training/ 19:17:09, device xla:1, step 300, Rate=187.18, Global Rate=172.81
training/ 19:17:16, device xla:1, step 310, Rate=188.27, Global Rate=173.27
training/ 19:17:22, device xla:1, step 320, Rate=187.49, Global Rate=173.69
training/ 19:17:29, device xla:1, step 330, Rate=188.12, Global Rate=174.10
training/ 19:17:36, device xla:1, step 340, Rate=186.50, Global Rate=174.44
training/ 19:17:43, device xla:1, step 350, Rate=187.66, Global Rate=174.79
training/ 19:17:50, device xla:1, step 360, Rate=187.33, Global Rate=175.12
training/ 19:17:57, device xla:1, step 370, Rate=188.36, Global Rate=175.46
training/ 19:18:03, device xla:1, step 380, Rate=186.55, Global Rate=175.73
training/ 19:19:30, device xla:1, step 390, Rate=168.56, Global Rate=137.32
training/ 19:19:37, device xla:1, step 400, Rate=184.89, Global Rate=138.24
training/ 19:19:44, device xla:1, step 410, Rate=186.70, Global Rate=139.13
training/ 19:19:51, device xla:1, step 420, Rate=186.37, Global Rate=139.97
training/ 19:19:58, device xla:1, step 430, Rate=186.65, Global Rate=140.78
training/ 19:20:04, device xla:1, step 440, Rate=187.37, Global Rate=141.58
training/ 19:20:11, device xla:1, step 450, Rate=186.85, Global Rate=142.35
training/ 19:20:18, device xla:1, step 460, Rate=185.47, Global Rate=143.07
training/ 19:20:25, device xla:1, step 470, Rate=186.31, Global Rate=143.78
training/ 19:20:32, device xla:1, step 480, Rate=186.04, Global Rate=144.46
training/ 19:20:39, device xla:1, step 490, Rate=186.27, Global Rate=145.13
training/ 19:20:46, device xla:1, step 500, Rate=186.74, Global Rate=145.78
training/ 19:20:53, device xla:1, step 510, Rate=184.84, Global Rate=146.39
training/ 19:20:59, device xla:1, step 520, Rate=185.34, Global Rate=146.99
training/ 19:21:06, device xla:1, step 530, Rate=186.37, Global Rate=147.58
training/ 19:21:13, device xla:1, step 540, Rate=185.27, Global Rate=148.13
training/ 19:21:20, device xla:1, step 550, Rate=186.04, Global Rate=148.68
training/ 19:21:27, device xla:1, step 560, Rate=186.03, Global Rate=149.21
training/ 19:21:34, device xla:1, step 570, Rate=187.00, Global Rate=149.74
training/ 19:21:41, device xla:1, step 580, Rate=185.08, Global Rate=150.24
training/ 19:21:48, device xla:1, step 590, Rate=184.48, Global Rate=150.69
training/ 19:21:55, device xla:1, step 600, Rate=186.42, Global Rate=151.17
training/ 19:22:02, device xla:1, step 610, Rate=185.78, Global Rate=151.63
training/ 19:22:08, device xla:1, step 620, Rate=186.52, Global Rate=152.09
training/ 19:22:15, device xla:1, step 630, Rate=186.41, Global Rate=152.54
training/ 19:22:22, device xla:1, step 640, Rate=186.55, Global Rate=152.97
training/ 19:22:29, device xla:1, step 650, Rate=186.48, Global Rate=153.40
training/ 19:22:36, device xla:1, step 660, Rate=184.41, Global Rate=153.76
training/ 19:22:43, device xla:1, step 670, Rate=186.34, Global Rate=154.17
training/ 19:22:50, device xla:1, step 680, Rate=186.05, Global Rate=154.55
training/ 19:22:57, device xla:1, step 690, Rate=184.91, Global Rate=154.92
training/ 19:23:04, device xla:1, step 700, Rate=184.10, Global Rate=155.27
training/ 19:23:11, device xla:1, step 710, Rate=184.34, Global Rate=155.62
training/ 19:23:17, device xla:1, step 720, Rate=185.56, Global Rate=155.98
training/ 19:23:24, device xla:1, step 730, Rate=186.65, Global Rate=156.32
training/ 19:23:31, device xla:1, step 740, Rate=186.87, Global Rate=156.67
training/ 19:23:38, device xla:1, step 750, Rate=187.02, Global Rate=157.01
training/ 19:23:45, device xla:1, step 760, Rate=186.58, Global Rate=157.34
training/ 19:23:52, device xla:1, step 770, Rate=187.04, Global Rate=157.66
training/ 19:23:59, device xla:1, step 780, Rate=185.92, Global Rate=157.96
training/ 19:24:06, device xla:1, step 790, Rate=186.37, Global Rate=158.27
training/ 19:24:12, device xla:1, step 800, Rate=185.71, Global Rate=158.56
training/ 19:24:19, device xla:1, step 810, Rate=183.70, Global Rate=158.83
training/ 19:24:26, device xla:1, step 820, Rate=185.45, Global Rate=159.11
training/ 19:24:33, device xla:1, step 830, Rate=185.46, Global Rate=159.37
training/ 19:24:40, device xla:1, step 840, Rate=186.16, Global Rate=159.65
training/ 19:24:47, device xla:1, step 850, Rate=182.81, Global Rate=159.89
training/ 19:24:54, device xla:1, step 860, Rate=186.54, Global Rate=160.15
training/ 19:25:01, device xla:1, step 870, Rate=185.59, Global Rate=160.41
training/ 19:25:08, device xla:1, step 880, Rate=186.54, Global Rate=160.66
training/ 19:25:15, device xla:1, step 890, Rate=187.43, Global Rate=160.92
training/ 19:25:21, device xla:1, step 900, Rate=187.47, Global Rate=161.17
training/ 19:25:28, device xla:1, step 910, Rate=186.96, Global Rate=161.42
training/ 19:25:35, device xla:1, step 920, Rate=184.70, Global Rate=161.63
training/ 19:25:42, device xla:1, step 930, Rate=186.57, Global Rate=161.86
training/ 19:25:49, device xla:1, step 940, Rate=185.84, Global Rate=162.08
training/ 19:25:56, device xla:1, step 950, Rate=185.63, Global Rate=162.30
training/ 19:26:03, device xla:1, step 960, Rate=186.33, Global Rate=162.52
training/ 19:26:10, device xla:1, step 970, Rate=186.88, Global Rate=162.74
training/ 19:26:16, device xla:1, step 980, Rate=187.71, Global Rate=162.96
training/ 19:26:23, device xla:1, step 990, Rate=186.77, Global Rate=163.16
training/ 19:26:30, device xla:1, step 1000, Rate=187.25, Global Rate=163.37
training/ 19:26:37, device xla:1, step 1010, Rate=185.91, Global Rate=163.56
training/ 19:26:44, device xla:1, step 1020, Rate=186.25, Global Rate=163.76
training/ 19:26:51, device xla:1, step 1030, Rate=185.78, Global Rate=163.95
training/ 19:26:58, device xla:1, step 1040, Rate=185.39, Global Rate=164.13
training/ 19:27:05, device xla:1, step 1050, Rate=184.32, Global Rate=164.30
training/ 19:27:12, device xla:1, step 1060, Rate=182.69, Global Rate=164.46
training/ 19:27:18, device xla:1, step 1070, Rate=186.17, Global Rate=164.64
training/ 19:27:25, device xla:1, step 1080, Rate=186.41, Global Rate=164.81
training/ 19:27:32, device xla:1, step 1090, Rate=183.01, Global Rate=164.97
training/ 19:27:39, device xla:1, step 1100, Rate=184.42, Global Rate=165.13
training/ 19:27:46, device xla:1, step 1110, Rate=185.77, Global Rate=165.29
training/ 19:27:53, device xla:1, step 1120, Rate=186.76, Global Rate=165.46
training/ 19:28:00, device xla:1, step 1130, Rate=186.92, Global Rate=165.63
training/ 19:28:07, device xla:1, step 1140, Rate=186.51, Global Rate=165.79
training/ 19:28:14, device xla:1, step 1150, Rate=186.30, Global Rate=165.95
training/ 19:28:21, device xla:1, step 1160, Rate=185.14, Global Rate=166.10
training/ 19:28:27, device xla:1, step 1170, Rate=184.13, Global Rate=166.24
training/ 19:28:34, device xla:1, step 1180, Rate=185.06, Global Rate=166.39
training/ 19:28:41, device xla:1, step 1190, Rate=186.86, Global Rate=166.54
training/ 19:28:48, device xla:1, step 1200, Rate=186.99, Global Rate=166.69
training/ 19:28:55, device xla:1, step 1210, Rate=186.67, Global Rate=166.83
training/ 19:29:02, device xla:1, step 1220, Rate=186.29, Global Rate=166.97
training/ 19:29:09, device xla:1, step 1230, Rate=184.54, Global Rate=167.10
training/ 19:29:16, device xla:1, step 1240, Rate=185.07, Global Rate=167.23
training/ 19:29:23, device xla:1, step 1250, Rate=185.30, Global Rate=167.36
training/ 19:29:30, device xla:1, step 1260, Rate=185.01, Global Rate=167.48
training/ 19:29:36, device xla:1, step 1270, Rate=185.68, Global Rate=167.62
training/ 19:29:43, device xla:1, step 1280, Rate=186.37, Global Rate=167.75
training/ 19:29:50, device xla:1, step 1290, Rate=187.15, Global Rate=167.88
training/ 19:29:57, device xla:1, step 1300, Rate=186.82, Global Rate=168.01
training/ 19:30:04, device xla:1, step 1310, Rate=182.35, Global Rate=168.12
training/ 19:30:11, device xla:1, step 1320, Rate=186.00, Global Rate=168.25
training/ 19:30:18, device xla:1, step 1330, Rate=186.45, Global Rate=168.37
training/ 19:30:25, device xla:1, step 1340, Rate=184.82, Global Rate=168.48
training/ 19:30:32, device xla:1, step 1350, Rate=186.35, Global Rate=168.60
training/ 19:30:38, device xla:1, step 1360, Rate=186.34, Global Rate=168.72
training/ 19:30:45, device xla:1, step 1370, Rate=186.30, Global Rate=168.83
training/ 19:30:52, device xla:1, step 1380, Rate=185.49, Global Rate=168.94
training/ 19:30:59, device xla:1, step 1390, Rate=184.25, Global Rate=169.03
training/ 19:31:06, device xla:1, step 1400, Rate=185.99, Global Rate=169.15
training/ 19:31:13, device xla:1, step 1410, Rate=186.44, Global Rate=169.26
training/ 19:31:20, device xla:1, step 1420, Rate=183.82, Global Rate=169.36
training/ 19:31:27, device xla:1, step 1430, Rate=186.68, Global Rate=169.47
training/ 19:31:34, device xla:1, step 1440, Rate=186.40, Global Rate=169.57
training/ 19:31:41, device xla:1, step 1450, Rate=185.86, Global Rate=169.66
training/ 19:31:47, device xla:1, step 1460, Rate=185.60, Global Rate=169.76
training/ 19:31:54, device xla:1, step 1470, Rate=185.72, Global Rate=169.86
training/ 19:32:01, device xla:1, step 1480, Rate=186.44, Global Rate=169.96
training/ 19:32:08, device xla:1, step 1490, Rate=185.70, Global Rate=170.06
training/ 19:32:15, device xla:1, step 1500, Rate=185.56, Global Rate=170.15
training/ 19:32:22, device xla:1, step 1510, Rate=185.26, Global Rate=170.24
training/ 19:32:29, device xla:1, step 1520, Rate=185.42, Global Rate=170.33
training/ 19:32:36, device xla:1, step 1530, Rate=185.11, Global Rate=170.42
training/ 19:32:43, device xla:1, step 1540, Rate=185.86, Global Rate=170.51
training/ 19:32:50, device xla:1, step 1550, Rate=185.25, Global Rate=170.60
training/ 19:32:57, device xla:1, step 1560, Rate=184.66, Global Rate=170.68
training/ 19:33:03, device xla:1, step 1570, Rate=185.51, Global Rate=170.77
training/ 19:33:10, device xla:1, step 1580, Rate=186.02, Global Rate=170.86
training/ 19:33:17, device xla:1, step 1590, Rate=185.21, Global Rate=170.94
training/ 19:33:24, device xla:1, step 1600, Rate=185.99, Global Rate=171.03
training/ 19:33:31, device xla:1, step 1610, Rate=186.51, Global Rate=171.12
training/ 19:33:38, device xla:1, step 1620, Rate=183.23, Global Rate=171.19
training/ 19:33:45, device xla:1, step 1630, Rate=184.66, Global Rate=171.28
training/ 19:33:52, device xla:1, step 1640, Rate=184.81, Global Rate=171.35
training/ 19:33:59, device xla:1, step 1650, Rate=185.64, Global Rate=171.43
training/ 19:34:05, device xla:1, step 1660, Rate=184.85, Global Rate=171.51
training/ 19:34:12, device xla:1, step 1670, Rate=186.26, Global Rate=171.59
training/ 19:34:19, device xla:1, step 1680, Rate=186.42, Global Rate=171.67
training/ 19:34:26, device xla:1, step 1690, Rate=186.36, Global Rate=171.75
training/ 19:34:33, device xla:1, step 1700, Rate=182.40, Global Rate=171.81
training/ 19:34:40, device xla:1, step 1710, Rate=185.47, Global Rate=171.89
training/ 19:34:47, device xla:1, step 1720, Rate=186.88, Global Rate=171.97
training/ 19:34:54, device xla:1, step 1730, Rate=185.80, Global Rate=172.05
training/ 19:35:01, device xla:1, step 1740, Rate=183.99, Global Rate=172.11
training/ 19:35:08, device xla:1, step 1750, Rate=185.54, Global Rate=172.18
training/ 19:35:14, device xla:1, step 1760, Rate=187.24, Global Rate=172.26
training/ 19:35:21, device xla:1, step 1770, Rate=184.00, Global Rate=172.33
training/ 19:35:28, device xla:1, step 1780, Rate=187.41, Global Rate=172.40
training/ 19:35:35, device xla:1, step 1790, Rate=187.14, Global Rate=172.48
training/ 19:35:42, device xla:1, step 1800, Rate=182.60, Global Rate=172.54
training/ 19:35:49, device xla:1, step 1810, Rate=185.93, Global Rate=172.61
training/ 19:35:56, device xla:1, step 1820, Rate=184.25, Global Rate=172.68
training/ 19:36:03, device xla:1, step 1830, Rate=186.53, Global Rate=172.74
training/ 19:36:09, device xla:1, step 1840, Rate=186.16, Global Rate=172.81
training/ 19:36:16, device xla:1, step 1850, Rate=187.09, Global Rate=172.88
training/ 19:36:23, device xla:1, step 1860, Rate=186.81, Global Rate=172.94
training/ 19:36:30, device xla:1, step 1870, Rate=187.45, Global Rate=173.01
training/ 19:36:37, device xla:1, step 1880, Rate=183.32, Global Rate=173.07
training/ 19:36:44, device xla:1, step 1890, Rate=187.30, Global Rate=173.14
training/ 19:36:51, device xla:1, step 1900, Rate=186.92, Global Rate=173.21
training/ 19:36:58, device xla:1, step 1910, Rate=186.06, Global Rate=173.27
training/ 19:37:04, device xla:1, step 1920, Rate=186.16, Global Rate=173.33
training/ 19:37:11, device xla:1, step 1930, Rate=186.65, Global Rate=173.40
training/ 19:37:18, device xla:1, step 1940, Rate=186.83, Global Rate=173.46
training/ 19:37:25, device xla:1, step 1950, Rate=186.19, Global Rate=173.52
training/ 19:37:32, device xla:1, step 1960, Rate=185.87, Global Rate=173.58
training/ 19:37:39, device xla:1, step 1970, Rate=185.04, Global Rate=173.63
training/ 19:37:46, device xla:1, step 1980, Rate=184.88, Global Rate=173.69
training/ 19:37:53, device xla:1, step 1990, Rate=185.78, Global Rate=173.74
training/ 19:38:00, device xla:1, step 2000, Rate=185.22, Global Rate=173.79
training/ 19:38:07, device xla:1, step 2010, Rate=184.74, Global Rate=173.85
training/ 19:38:13, device xla:1, step 2020, Rate=186.94, Global Rate=173.91
training/ 19:38:20, device xla:1, step 2030, Rate=187.25, Global Rate=173.97
training/ 19:38:27, device xla:1, step 2040, Rate=186.30, Global Rate=174.02
training/ 19:38:34, device xla:1, step 2050, Rate=186.69, Global Rate=174.08
training/ 19:38:41, device xla:1, step 2060, Rate=184.38, Global Rate=174.13
training/ 19:38:48, device xla:1, step 2070, Rate=184.52, Global Rate=174.18
training/ 19:38:55, device xla:1, step 2080, Rate=184.12, Global Rate=174.22
training/ 19:39:02, device xla:1, step 2090, Rate=186.34, Global Rate=174.27
training/ 19:39:09, device xla:1, step 2100, Rate=185.15, Global Rate=174.33
training/ 19:39:16, device xla:1, step 2110, Rate=182.75, Global Rate=174.36
training/ 19:39:22, device xla:1, step 2120, Rate=187.01, Global Rate=174.42
training/ 19:39:29, device xla:1, step 2130, Rate=187.61, Global Rate=174.47
training/ 19:39:36, device xla:1, step 2140, Rate=187.50, Global Rate=174.53
training/ 19:39:43, device xla:1, step 2150, Rate=186.67, Global Rate=174.58
training/ 19:39:50, device xla:1, step 2160, Rate=185.00, Global Rate=174.63
training/ 19:39:57, device xla:1, step 2170, Rate=187.03, Global Rate=174.68
training/ 19:40:04, device xla:1, step 2180, Rate=184.96, Global Rate=174.72
training/ 19:40:11, device xla:1, step 2190, Rate=186.88, Global Rate=174.77
training/ 19:40:17, device xla:1, step 2200, Rate=187.45, Global Rate=174.83
training/ 19:40:24, device xla:1, step 2210, Rate=187.77, Global Rate=174.88
training/ 19:40:31, device xla:1, step 2220, Rate=185.48, Global Rate=174.92
training/ 19:40:38, device xla:1, step 2230, Rate=187.14, Global Rate=174.97
training/ 19:40:45, device xla:1, step 2240, Rate=187.18, Global Rate=175.02
training/ 19:40:52, device xla:1, step 2250, Rate=187.61, Global Rate=175.07
training/ 19:40:59, device xla:1, step 2260, Rate=183.84, Global Rate=175.11
training/ 19:41:05, device xla:1, step 2270, Rate=186.06, Global Rate=175.16
training/ 19:41:12, device xla:1, step 2280, Rate=185.68, Global Rate=175.20
training/ 19:41:19, device xla:1, step 2290, Rate=187.09, Global Rate=175.25
training/ 19:41:26, device xla:1, step 2300, Rate=185.77, Global Rate=175.30
training/ 19:41:33, device xla:1, step 2310, Rate=187.42, Global Rate=175.35
training/ 19:41:40, device xla:1, step 2320, Rate=185.84, Global Rate=175.39
training/ 19:41:47, device xla:1, step 2330, Rate=187.35, Global Rate=175.44
training/ 19:41:53, device xla:1, step 2340, Rate=186.20, Global Rate=175.48
training/ 19:42:00, device xla:1, step 2350, Rate=184.80, Global Rate=175.52
training/ 19:42:07, device xla:1, step 2360, Rate=185.02, Global Rate=175.55
training/ 19:42:14, device xla:1, step 2370, Rate=186.69, Global Rate=175.60
training/ 19:42:21, device xla:1, step 2380, Rate=185.29, Global Rate=175.64
training/ 19:42:28, device xla:1, step 2390, Rate=186.32, Global Rate=175.68
training/ 19:42:35, device xla:1, step 2400, Rate=184.87, Global Rate=175.72
training/ 19:42:42, device xla:1, step 2410, Rate=185.60, Global Rate=175.75
training/ 19:42:49, device xla:1, step 2420, Rate=184.61, Global Rate=175.79
training/ 19:42:56, device xla:1, step 2430, Rate=185.29, Global Rate=175.83
training/ 19:43:02, device xla:1, step 2440, Rate=186.20, Global Rate=175.87
training/ 19:43:09, device xla:1, step 2450, Rate=185.62, Global Rate=175.90
training/ 19:43:16, device xla:1, step 2460, Rate=185.54, Global Rate=175.94
training/ 19:43:23, device xla:1, step 2470, Rate=186.19, Global Rate=175.98
training/ 19:43:30, device xla:1, step 2480, Rate=187.04, Global Rate=176.02
training/ 19:43:37, device xla:1, step 2490, Rate=187.18, Global Rate=176.07
training/ 19:43:44, device xla:1, step 2500, Rate=185.32, Global Rate=176.10
training/ 19:43:51, device xla:1, step 2510, Rate=186.24, Global Rate=176.14
training/ 19:43:58, device xla:1, step 2520, Rate=184.88, Global Rate=176.17
training/ 19:44:05, device xla:1, step 2530, Rate=184.22, Global Rate=176.20
training/ 19:44:11, device xla:1, step 2540, Rate=186.43, Global Rate=176.24
training/ 19:44:18, device xla:1, step 2550, Rate=188.05, Global Rate=176.28
training/ 19:44:25, device xla:1, step 2560, Rate=186.71, Global Rate=176.32
training/ 19:44:32, device xla:1, step 2570, Rate=185.65, Global Rate=176.35
training/ 19:44:39, device xla:1, step 2580, Rate=186.18, Global Rate=176.39
training/ 19:44:46, device xla:1, step 2590, Rate=187.28, Global Rate=176.43
training/ 19:44:53, device xla:1, step 2600, Rate=187.04, Global Rate=176.46
training/ 19:45:00, device xla:1, step 2610, Rate=185.51, Global Rate=176.49
training/ 19:45:06, device xla:1, step 2620, Rate=185.94, Global Rate=176.53
training/ 19:45:13, device xla:1, step 2630, Rate=186.56, Global Rate=176.56
training/ 19:45:20, device xla:1, step 2640, Rate=186.46, Global Rate=176.60
training/ 19:45:27, device xla:1, step 2650, Rate=187.12, Global Rate=176.63
training/ 19:45:34, device xla:1, step 2660, Rate=186.38, Global Rate=176.67
training/ 19:45:41, device xla:1, step 2670, Rate=187.29, Global Rate=176.70
training/ 19:45:48, device xla:1, step 2680, Rate=186.93, Global Rate=176.74
training/ 19:45:54, device xla:1, step 2690, Rate=184.46, Global Rate=176.77
training/ 19:46:01, device xla:1, step 2700, Rate=186.10, Global Rate=176.80
training/ 19:46:08, device xla:1, step 2710, Rate=186.33, Global Rate=176.83
training/ 19:46:15, device xla:1, step 2720, Rate=186.85, Global Rate=176.87
training/ 19:46:22, device xla:1, step 2730, Rate=186.76, Global Rate=176.90
training/ 19:46:29, device xla:1, step 2740, Rate=185.95, Global Rate=176.93
training/ 19:46:36, device xla:1, step 2750, Rate=185.79, Global Rate=176.96
training/ 19:46:43, device xla:1, step 2760, Rate=186.42, Global Rate=176.99
training/ 19:46:49, device xla:1, step 2770, Rate=186.29, Global Rate=177.02
training/ 19:46:56, device xla:1, step 2780, Rate=185.82, Global Rate=177.05
training/ 19:47:03, device xla:1, step 2790, Rate=186.96, Global Rate=177.08
training/ 19:47:10, device xla:1, step 2800, Rate=185.89, Global Rate=177.11
training/ 19:47:17, device xla:1, step 2810, Rate=185.72, Global Rate=177.14
training/ 19:47:24, device xla:1, step 2820, Rate=185.63, Global Rate=177.17
training/ 19:47:31, device xla:1, step 2830, Rate=186.92, Global Rate=177.20
training/ 19:47:38, device xla:1, step 2840, Rate=186.66, Global Rate=177.23
training/ 19:47:45, device xla:1, step 2850, Rate=186.86, Global Rate=177.26
training/ 19:47:52, device xla:1, step 2860, Rate=185.88, Global Rate=177.29
training/ 19:47:58, device xla:1, step 2870, Rate=186.70, Global Rate=177.32
training/ 19:48:05, device xla:1, step 2880, Rate=184.38, Global Rate=177.34
training/ 19:48:12, device xla:1, step 2890, Rate=184.60, Global Rate=177.37
training/ 19:48:19, device xla:1, step 2900, Rate=186.22, Global Rate=177.39
training/ 19:48:26, device xla:1, step 2910, Rate=185.67, Global Rate=177.42
training/ 19:48:33, device xla:1, step 2920, Rate=185.91, Global Rate=177.45
training/ 19:48:40, device xla:1, step 2930, Rate=187.05, Global Rate=177.48
training/ 19:48:47, device xla:1, step 2940, Rate=186.36, Global Rate=177.51
training/ 19:48:53, device xla:1, step 2950, Rate=186.66, Global Rate=177.54
training/ 19:49:00, device xla:1, step 2960, Rate=185.06, Global Rate=177.56
training/ 19:49:07, device xla:1, step 2970, Rate=183.32, Global Rate=177.58
training/ 19:49:14, device xla:1, step 2980, Rate=185.81, Global Rate=177.61
training/ 19:49:21, device xla:1, step 2990, Rate=185.97, Global Rate=177.63
training/ 19:49:28, device xla:1, step 3000, Rate=185.00, Global Rate=177.65
training/ 19:49:35, device xla:1, step 3010, Rate=184.52, Global Rate=177.68
training/ 19:49:42, device xla:1, step 3020, Rate=184.83, Global Rate=177.70
training/ 19:49:49, device xla:1, step 3030, Rate=185.96, Global Rate=177.72
training/ 19:49:56, device xla:1, step 3040, Rate=184.87, Global Rate=177.75
training/ 19:50:03, device xla:1, step 3050, Rate=184.97, Global Rate=177.77
training/ 19:50:10, device xla:1, step 3060, Rate=184.35, Global Rate=177.79
training/ 19:50:17, device xla:1, step 3070, Rate=184.86, Global Rate=177.81
training/ 19:50:24, device xla:1, step 3080, Rate=184.17, Global Rate=177.83
training/ 19:50:30, device xla:1, step 3090, Rate=184.89, Global Rate=177.85
training/ 19:50:37, device xla:1, step 3100, Rate=185.02, Global Rate=177.87
training/ 19:50:44, device xla:1, step 3110, Rate=185.20, Global Rate=177.90
training/ 19:50:51, device xla:1, step 3120, Rate=185.28, Global Rate=177.92
training/ 19:50:58, device xla:1, step 3130, Rate=182.50, Global Rate=177.93
training/ 19:51:05, device xla:1, step 3140, Rate=184.60, Global Rate=177.95
training/ 19:51:12, device xla:1, step 3150, Rate=184.41, Global Rate=177.97
training/ 19:51:19, device xla:1, step 3160, Rate=184.43, Global Rate=177.99
training/ 19:51:26, device xla:1, step 3170, Rate=184.85, Global Rate=178.01
training/ 19:51:33, device xla:1, step 3180, Rate=184.59, Global Rate=178.03
training/ 19:51:40, device xla:1, step 3190, Rate=184.01, Global Rate=178.05
training/ 19:51:47, device xla:1, step 3200, Rate=184.48, Global Rate=178.07
training/ 19:51:54, device xla:1, step 3210, Rate=185.00, Global Rate=178.09
training/ 19:52:01, device xla:1, step 3220, Rate=184.02, Global Rate=178.11
training/ 19:52:08, device xla:1, step 3230, Rate=184.78, Global Rate=178.13
training/ 19:52:15, device xla:1, step 3240, Rate=184.84, Global Rate=178.15
training/ 19:52:21, device xla:1, step 3250, Rate=184.98, Global Rate=178.17
training/ 19:52:28, device xla:1, step 3260, Rate=182.50, Global Rate=178.18
training/ 19:52:35, device xla:1, step 3270, Rate=183.98, Global Rate=178.20
training/ 19:52:42, device xla:1, step 3280, Rate=184.47, Global Rate=178.22
training/ 19:52:49, device xla:1, step 3290, Rate=184.91, Global Rate=178.24
training/ 19:52:56, device xla:1, step 3300, Rate=185.48, Global Rate=178.26
training/ 19:53:03, device xla:1, step 3310, Rate=184.60, Global Rate=178.28
training/ 19:53:10, device xla:1, step 3320, Rate=185.40, Global Rate=178.30
training/ 19:53:17, device xla:1, step 3330, Rate=186.50, Global Rate=178.32
training/ 19:53:24, device xla:1, step 3340, Rate=185.36, Global Rate=178.34
training/ 19:53:31, device xla:1, step 3350, Rate=184.37, Global Rate=178.36
training/ 19:53:38, device xla:1, step 3360, Rate=182.94, Global Rate=178.37
training/ 19:53:45, device xla:1, step 3370, Rate=184.70, Global Rate=178.39
training/ 19:53:52, device xla:1, step 3380, Rate=185.10, Global Rate=178.41
training/ 19:53:59, device xla:1, step 3390, Rate=184.81, Global Rate=178.42
training/ 19:54:06, device xla:1, step 3400, Rate=184.23, Global Rate=178.44
training/ 19:54:12, device xla:1, step 3410, Rate=184.59, Global Rate=178.46
training/ 19:54:19, device xla:1, step 3420, Rate=182.64, Global Rate=178.47
training/ 19:54:26, device xla:1, step 3430, Rate=184.44, Global Rate=178.49
training/ 19:54:33, device xla:1, step 3440, Rate=185.02, Global Rate=178.51
training/ 19:54:40, device xla:1, step 3450, Rate=184.51, Global Rate=178.52
training/ 19:54:47, device xla:1, step 3460, Rate=182.53, Global Rate=178.53
training/ 19:54:54, device xla:1, step 3470, Rate=184.23, Global Rate=178.55
training/ 19:55:01, device xla:1, step 3480, Rate=184.97, Global Rate=178.57
training/ 19:55:08, device xla:1, step 3490, Rate=180.61, Global Rate=178.58
training/ 19:55:15, device xla:1, step 3500, Rate=184.30, Global Rate=178.59
training/ 19:55:22, device xla:1, step 3510, Rate=184.60, Global Rate=178.61
training/ 19:55:29, device xla:1, step 3520, Rate=184.40, Global Rate=178.63
training/ 19:55:36, device xla:1, step 3530, Rate=184.22, Global Rate=178.64
training/ 19:55:43, device xla:1, step 3540, Rate=184.30, Global Rate=178.66
training/ 19:55:50, device xla:1, step 3550, Rate=184.92, Global Rate=178.67
training/ 19:55:57, device xla:1, step 3560, Rate=180.55, Global Rate=178.68
training/ 19:56:04, device xla:1, step 3570, Rate=185.13, Global Rate=178.70
training/ 19:56:11, device xla:1, step 3580, Rate=185.13, Global Rate=178.72
training/ 19:56:18, device xla:1, step 3590, Rate=186.37, Global Rate=178.74
training/ 19:56:24, device xla:1, step 3600, Rate=185.00, Global Rate=178.75
training/ 19:56:31, device xla:1, step 3610, Rate=184.01, Global Rate=178.76
training/ 19:56:38, device xla:1, step 3620, Rate=184.43, Global Rate=178.78
training/ 19:56:45, device xla:1, step 3630, Rate=183.48, Global Rate=178.79
training/ 19:56:52, device xla:1, step 3640, Rate=184.70, Global Rate=178.81
training/ 19:56:59, device xla:1, step 3650, Rate=184.28, Global Rate=178.82
training/ 19:57:06, device xla:1, step 3660, Rate=184.75, Global Rate=178.84
training/ 19:57:13, device xla:1, step 3670, Rate=184.62, Global Rate=178.85
training/ 19:57:20, device xla:1, step 3680, Rate=184.91, Global Rate=178.87
training/ 19:57:27, device xla:1, step 3690, Rate=183.08, Global Rate=178.88
training/ 19:57:34, device xla:1, step 3700, Rate=185.00, Global Rate=178.90
training/ 19:57:41, device xla:1, step 3710, Rate=183.67, Global Rate=178.91
training/ 19:57:48, device xla:1, step 3720, Rate=184.84, Global Rate=178.92
training/ 19:57:55, device xla:1, step 3730, Rate=185.10, Global Rate=178.94
training/ 19:58:02, device xla:1, step 3740, Rate=184.52, Global Rate=178.95
training/ 19:58:09, device xla:1, step 3750, Rate=184.10, Global Rate=178.97
training/ 19:58:16, device xla:1, step 3760, Rate=184.61, Global Rate=178.98
training/ 19:58:23, device xla:1, step 3770, Rate=184.77, Global Rate=179.00
training/ 19:58:29, device xla:1, step 3780, Rate=184.43, Global Rate=179.01
training/ 19:58:36, device xla:1, step 3790, Rate=185.06, Global Rate=179.03
training/ 19:58:43, device xla:1, step 3800, Rate=184.58, Global Rate=179.04
training/ 19:58:50, device xla:1, step 3810, Rate=184.80, Global Rate=179.05
training/ 19:58:57, device xla:1, step 3820, Rate=185.10, Global Rate=179.07
training/ 19:59:04, device xla:1, step 3830, Rate=184.53, Global Rate=179.08
training/ 19:59:11, device xla:1, step 3840, Rate=183.44, Global Rate=179.09
training/ 19:59:18, device xla:1, step 3850, Rate=185.00, Global Rate=179.11
training/ 19:59:25, device xla:1, step 3860, Rate=182.18, Global Rate=179.12
training/ 19:59:32, device xla:1, step 3870, Rate=186.18, Global Rate=179.14
training/ 19:59:39, device xla:1, step 3880, Rate=185.45, Global Rate=179.15
training/ 19:59:46, device xla:1, step 3890, Rate=184.87, Global Rate=179.17
training/ 19:59:53, device xla:1, step 3900, Rate=185.05, Global Rate=179.18
training/ 20:00:00, device xla:1, step 3910, Rate=184.54, Global Rate=179.19
training/ 20:00:07, device xla:1, step 3920, Rate=184.50, Global Rate=179.20
training/ 20:00:13, device xla:1, step 3930, Rate=184.12, Global Rate=179.22
training/ 20:00:20, device xla:1, step 3940, Rate=183.98, Global Rate=179.23
training/ 20:00:27, device xla:1, step 3950, Rate=184.81, Global Rate=179.24
training/ 20:00:34, device xla:1, step 3960, Rate=184.47, Global Rate=179.26
training/ 20:00:41, device xla:1, step 3970, Rate=183.64, Global Rate=179.27
training/ 20:00:48, device xla:1, step 3980, Rate=184.64, Global Rate=179.28
training/ 20:00:55, device xla:1, step 3990, Rate=185.54, Global Rate=179.30
training/ 20:01:02, device xla:1, step 4000, Rate=184.86, Global Rate=179.31
training/ 20:01:09, device xla:1, step 4010, Rate=183.76, Global Rate=179.32
training/ 20:01:16, device xla:1, step 4020, Rate=184.67, Global Rate=179.33
training/ 20:01:23, device xla:1, step 4030, Rate=184.09, Global Rate=179.34
training/ 20:01:30, device xla:1, step 4040, Rate=183.91, Global Rate=179.35
training/ 20:01:37, device xla:1, step 4050, Rate=184.08, Global Rate=179.36
training/ 20:01:44, device xla:1, step 4060, Rate=183.66, Global Rate=179.37
training/ 20:01:51, device xla:1, step 4070, Rate=185.29, Global Rate=179.39
training/ 20:01:58, device xla:1, step 4080, Rate=184.99, Global Rate=179.40
training/ 20:02:05, device xla:1, step 4090, Rate=184.46, Global Rate=179.41
training/ 20:02:11, device xla:1, step 4100, Rate=185.81, Global Rate=179.43
training/ 20:02:18, device xla:1, step 4110, Rate=181.29, Global Rate=179.43
training/ 20:02:25, device xla:1, step 4120, Rate=183.51, Global Rate=179.45
training/ 20:02:32, device xla:1, step 4130, Rate=184.93, Global Rate=179.46
training/ 20:02:39, device xla:1, step 4140, Rate=184.43, Global Rate=179.47
training/ 20:02:46, device xla:1, step 4150, Rate=185.54, Global Rate=179.48
training/ 20:02:53, device xla:1, step 4160, Rate=186.84, Global Rate=179.50
training/ 20:03:00, device xla:1, step 4170, Rate=186.37, Global Rate=179.52
training/ 20:03:07, device xla:1, step 4180, Rate=186.73, Global Rate=179.53
training/ 20:03:14, device xla:1, step 4190, Rate=186.04, Global Rate=179.54
training/ 20:03:21, device xla:1, step 4200, Rate=185.56, Global Rate=179.56
training/ 20:03:28, device xla:1, step 4210, Rate=185.96, Global Rate=179.57
training/ 20:03:34, device xla:1, step 4220, Rate=185.33, Global Rate=179.58
training/ 20:03:41, device xla:1, step 4230, Rate=184.41, Global Rate=179.60
training/ 20:03:48, device xla:1, step 4240, Rate=186.06, Global Rate=179.61
training/ 20:03:55, device xla:1, step 4250, Rate=185.43, Global Rate=179.62
training/ 20:04:02, device xla:1, step 4260, Rate=184.33, Global Rate=179.63
training/ 20:04:09, device xla:1, step 4270, Rate=182.67, Global Rate=179.64
training/ 20:04:16, device xla:1, step 4280, Rate=184.95, Global Rate=179.65
training/ 20:04:23, device xla:1, step 4290, Rate=185.47, Global Rate=179.66
training/ 20:04:30, device xla:1, step 4300, Rate=185.81, Global Rate=179.68
training/ 20:04:37, device xla:1, step 4310, Rate=184.13, Global Rate=179.68
training/ 20:04:44, device xla:1, step 4320, Rate=186.04, Global Rate=179.70
training/ 20:04:51, device xla:1, step 4330, Rate=184.92, Global Rate=179.71
training/ 20:04:58, device xla:1, step 4340, Rate=184.99, Global Rate=179.72
training/ 20:05:05, device xla:1, step 4350, Rate=184.67, Global Rate=179.73
training/ 20:05:12, device xla:1, step 4360, Rate=183.92, Global Rate=179.74
training/ 20:05:18, device xla:1, step 4370, Rate=185.62, Global Rate=179.75
training/ 20:05:25, device xla:1, step 4380, Rate=186.31, Global Rate=179.77
training/ 20:05:32, device xla:1, step 4390, Rate=182.13, Global Rate=179.77
training/ 20:05:39, device xla:1, step 4400, Rate=184.22, Global Rate=179.78
training/ 20:05:46, device xla:1, step 4410, Rate=184.41, Global Rate=179.80
training/ 20:05:53, device xla:1, step 4420, Rate=185.14, Global Rate=179.81
training/ 20:06:00, device xla:1, step 4430, Rate=185.52, Global Rate=179.82
training/ 20:06:07, device xla:1, step 4440, Rate=185.96, Global Rate=179.83
training/ 20:06:14, device xla:1, step 4450, Rate=185.45, Global Rate=179.84
training/ 20:06:21, device xla:1, step 4460, Rate=185.09, Global Rate=179.86
training/ 20:06:28, device xla:1, step 4470, Rate=185.39, Global Rate=179.87
training/ 20:06:35, device xla:1, step 4480, Rate=185.03, Global Rate=179.88
training/ 20:06:41, device xla:1, step 4490, Rate=185.32, Global Rate=179.89
training/ 20:06:48, device xla:1, step 4500, Rate=184.37, Global Rate=179.90
training/ 20:06:55, device xla:1, step 4510, Rate=185.33, Global Rate=179.91
training/ 20:07:02, device xla:1, step 4520, Rate=185.21, Global Rate=179.92
training/ 20:07:09, device xla:1, step 4530, Rate=181.29, Global Rate=179.93
training/ 20:07:16, device xla:1, step 4540, Rate=184.37, Global Rate=179.94
training/ 20:07:23, device xla:1, step 4550, Rate=185.50, Global Rate=179.95
training/ 20:07:30, device xla:1, step 4560, Rate=186.30, Global Rate=179.96
training/ 20:07:37, device xla:1, step 4570, Rate=186.17, Global Rate=179.98
training/ 20:07:44, device xla:1, step 4580, Rate=185.64, Global Rate=179.99
training/ 20:07:51, device xla:1, step 4590, Rate=185.36, Global Rate=180.00
training/ 20:07:58, device xla:1, step 4600, Rate=184.98, Global Rate=180.01
training/ 20:08:04, device xla:1, step 4610, Rate=185.62, Global Rate=180.02
training/ 20:08:11, device xla:1, step 4620, Rate=185.13, Global Rate=180.03
training/ 20:08:18, device xla:1, step 4630, Rate=185.40, Global Rate=180.04
training/ 20:08:25, device xla:1, step 4640, Rate=184.19, Global Rate=180.05
training/ 20:08:32, device xla:1, step 4650, Rate=185.39, Global Rate=180.06
training/ 20:08:39, device xla:1, step 4660, Rate=185.08, Global Rate=180.07
training/ 20:08:46, device xla:1, step 4670, Rate=184.83, Global Rate=180.08
training/ 20:08:53, device xla:1, step 4680, Rate=184.92, Global Rate=180.09
training/ 20:09:00, device xla:1, step 4690, Rate=184.18, Global Rate=180.10
training/ 20:09:07, device xla:1, step 4700, Rate=181.46, Global Rate=180.11
training/ 20:09:14, device xla:1, step 4710, Rate=184.14, Global Rate=180.12
training/ 20:09:21, device xla:1, step 4720, Rate=184.61, Global Rate=180.13
training/ 20:09:28, device xla:1, step 4730, Rate=184.62, Global Rate=180.13
training/ 20:09:35, device xla:1, step 4740, Rate=185.51, Global Rate=180.14
training/ 20:09:42, device xla:1, step 4750, Rate=184.28, Global Rate=180.15
training/ 20:09:48, device xla:1, step 4760, Rate=184.91, Global Rate=180.16
training/ 20:09:55, device xla:1, step 4770, Rate=185.63, Global Rate=180.17
training/ 20:10:02, device xla:1, step 4780, Rate=185.33, Global Rate=180.18
training/ 20:10:09, device xla:1, step 4790, Rate=185.73, Global Rate=180.19
training/ 20:10:16, device xla:1, step 4800, Rate=185.66, Global Rate=180.20
training/ 20:10:23, device xla:1, step 4810, Rate=185.84, Global Rate=180.22
training/ 20:10:30, device xla:1, step 4820, Rate=185.04, Global Rate=180.22
training/ 20:10:37, device xla:1, step 4830, Rate=184.70, Global Rate=180.23
training/ 20:10:44, device xla:1, step 4840, Rate=183.76, Global Rate=180.24
training/ 20:10:51, device xla:1, step 4850, Rate=184.20, Global Rate=180.25
training/ 20:10:58, device xla:1, step 4860, Rate=182.48, Global Rate=180.26
training/ 20:11:05, device xla:1, step 4870, Rate=185.28, Global Rate=180.27
training/ 20:11:12, device xla:1, step 4880, Rate=184.99, Global Rate=180.28
training/ 20:11:18, device xla:1, step 4890, Rate=186.69, Global Rate=180.29
training/ 20:11:25, device xla:1, step 4900, Rate=186.26, Global Rate=180.30
training/ 20:11:32, device xla:1, step 4910, Rate=185.47, Global Rate=180.31
training/ 20:11:39, device xla:1, step 4920, Rate=186.24, Global Rate=180.32
training/ 20:11:46, device xla:1, step 4930, Rate=185.97, Global Rate=180.33
training/ 20:11:53, device xla:1, step 4940, Rate=185.74, Global Rate=180.34
training/ 20:12:00, device xla:1, step 4950, Rate=185.92, Global Rate=180.35
training/ 20:12:07, device xla:1, step 4960, Rate=184.29, Global Rate=180.36
training/ 20:12:14, device xla:1, step 4970, Rate=184.20, Global Rate=180.37
training/ 20:12:21, device xla:1, step 4980, Rate=182.72, Global Rate=180.37
training/ 20:12:28, device xla:1, step 4990, Rate=185.12, Global Rate=180.38
training/ 20:12:34, device xla:1, step 5000, Rate=185.75, Global Rate=180.39
training/ 20:12:41, device xla:1, step 5010, Rate=185.78, Global Rate=180.40
training/ 20:12:48, device xla:1, step 5020, Rate=186.27, Global Rate=180.41
training/ 20:12:55, device xla:1, step 5030, Rate=185.28, Global Rate=180.42
training/ 20:13:02, device xla:1, step 5040, Rate=184.89, Global Rate=180.43
training/ 20:13:09, device xla:1, step 5050, Rate=184.21, Global Rate=180.44
training/ 20:13:16, device xla:1, step 5060, Rate=182.68, Global Rate=180.44
training/ 20:13:23, device xla:1, step 5070, Rate=185.52, Global Rate=180.45
training/ 20:13:30, device xla:1, step 5080, Rate=185.04, Global Rate=180.46
training/ 20:13:37, device xla:1, step 5090, Rate=184.76, Global Rate=180.47
training/ 20:13:44, device xla:1, step 5100, Rate=184.64, Global Rate=180.48
training/ 20:13:51, device xla:1, step 5110, Rate=185.83, Global Rate=180.49
training/ 20:13:58, device xla:1, step 5120, Rate=181.69, Global Rate=180.49
training/ 20:14:04, device xla:1, step 5130, Rate=185.52, Global Rate=180.50
training/ 20:14:11, device xla:1, step 5140, Rate=185.26, Global Rate=180.51
training/ 20:14:18, device xla:1, step 5150, Rate=185.32, Global Rate=180.52
training/ 20:14:25, device xla:1, step 5160, Rate=185.56, Global Rate=180.53
training/ 20:14:32, device xla:1, step 5170, Rate=184.30, Global Rate=180.53
training/ 20:14:39, device xla:1, step 5180, Rate=185.40, Global Rate=180.54
training/ 20:14:46, device xla:1, step 5190, Rate=184.32, Global Rate=180.55
training/ 20:14:53, device xla:1, step 5200, Rate=186.25, Global Rate=180.56
training/ 20:15:00, device xla:1, step 5210, Rate=186.09, Global Rate=180.57
training/ 20:15:07, device xla:1, step 5220, Rate=186.04, Global Rate=180.58
training/ 20:15:14, device xla:1, step 5230, Rate=185.66, Global Rate=180.59
training/ 20:15:21, device xla:1, step 5240, Rate=184.36, Global Rate=180.60
training/ 20:15:27, device xla:1, step 5250, Rate=185.18, Global Rate=180.60
training/ 20:15:34, device xla:1, step 5260, Rate=185.04, Global Rate=180.61
training/ 20:15:41, device xla:1, step 5270, Rate=185.70, Global Rate=180.62
training/ 20:15:48, device xla:1, step 5280, Rate=186.23, Global Rate=180.63
training/ 20:15:55, device xla:1, step 5290, Rate=185.61, Global Rate=180.64
training/ 20:16:02, device xla:1, step 5300, Rate=185.99, Global Rate=180.65
training/ 20:16:09, device xla:1, step 5310, Rate=185.52, Global Rate=180.65
training/ 20:16:16, device xla:1, step 5320, Rate=185.40, Global Rate=180.66
training/ 20:16:23, device xla:1, step 5330, Rate=186.41, Global Rate=180.67
training/ 20:16:30, device xla:1, step 5340, Rate=184.68, Global Rate=180.68
training/ 20:16:37, device xla:1, step 5350, Rate=186.40, Global Rate=180.69
training/ 20:16:44, device xla:1, step 5360, Rate=183.49, Global Rate=180.69
training/ 20:16:50, device xla:1, step 5370, Rate=184.93, Global Rate=180.70
training/ 20:16:57, device xla:1, step 5380, Rate=184.91, Global Rate=180.71
training/ 20:17:04, device xla:1, step 5390, Rate=185.72, Global Rate=180.72
training/ 20:17:11, device xla:1, step 5400, Rate=184.79, Global Rate=180.73
training/ 20:17:18, device xla:1, step 5410, Rate=185.15, Global Rate=180.73
training/ 20:17:25, device xla:1, step 5420, Rate=184.92, Global Rate=180.74
training/ 20:17:32, device xla:1, step 5430, Rate=185.82, Global Rate=180.75
training/ 20:17:39, device xla:1, step 5440, Rate=186.08, Global Rate=180.76
training/ 20:17:46, device xla:1, step 5450, Rate=186.40, Global Rate=180.77
training/ 20:17:53, device xla:1, step 5460, Rate=185.79, Global Rate=180.78
training/ 20:17:59, device xla:1, step 5470, Rate=186.32, Global Rate=180.79
training/ 20:18:06, device xla:1, step 5480, Rate=182.10, Global Rate=180.79
training/ 20:18:13, device xla:1, step 5490, Rate=185.62, Global Rate=180.80
training/ 20:18:20, device xla:1, step 5500, Rate=185.15, Global Rate=180.81
training/ 20:18:27, device xla:1, step 5510, Rate=184.24, Global Rate=180.81
training/ 20:18:34, device xla:1, step 5520, Rate=184.59, Global Rate=180.82
training/ 20:18:41, device xla:1, step 5530, Rate=184.66, Global Rate=180.82
training/ 20:18:48, device xla:1, step 5540, Rate=185.58, Global Rate=180.83
training/ 20:18:55, device xla:1, step 5550, Rate=185.13, Global Rate=180.84
training/ 20:19:02, device xla:1, step 5560, Rate=185.48, Global Rate=180.85
training/ 20:19:09, device xla:1, step 5570, Rate=184.70, Global Rate=180.85
training/ 20:19:16, device xla:1, step 5580, Rate=184.12, Global Rate=180.86
training/ 20:19:23, device xla:1, step 5590, Rate=185.93, Global Rate=180.87
training/ 20:19:30, device xla:1, step 5600, Rate=185.26, Global Rate=180.88
training/ 20:19:36, device xla:1, step 5610, Rate=185.02, Global Rate=180.88
training/ 20:19:43, device xla:1, step 5620, Rate=185.30, Global Rate=180.89
training/ 20:19:50, device xla:1, step 5630, Rate=185.09, Global Rate=180.90
training/ 20:19:57, device xla:1, step 5640, Rate=185.54, Global Rate=180.90
training/ 20:20:04, device xla:1, step 5650, Rate=182.91, Global Rate=180.91
training/ 20:20:11, device xla:1, step 5660, Rate=183.52, Global Rate=180.91
training/ 20:20:18, device xla:1, step 5670, Rate=184.21, Global Rate=180.92
training/ 20:20:25, device xla:1, step 5680, Rate=185.17, Global Rate=180.93
training/ 20:20:32, device xla:1, step 5690, Rate=184.65, Global Rate=180.93
training/ 20:20:39, device xla:1, step 5700, Rate=184.55, Global Rate=180.94
training/ 20:20:46, device xla:1, step 5710, Rate=183.17, Global Rate=180.94
training/ 20:20:53, device xla:1, step 5720, Rate=184.56, Global Rate=180.95
training/ 20:21:00, device xla:1, step 5730, Rate=184.97, Global Rate=180.96
training/ 20:21:07, device xla:1, step 5740, Rate=185.47, Global Rate=180.96
training/ 20:21:14, device xla:1, step 5750, Rate=185.45, Global Rate=180.97
training/ 20:21:20, device xla:1, step 5760, Rate=185.80, Global Rate=180.98
training/ 20:21:27, device xla:1, step 5770, Rate=185.58, Global Rate=180.99
training/ 20:21:34, device xla:1, step 5780, Rate=185.28, Global Rate=180.99
training/ 20:21:41, device xla:1, step 5790, Rate=184.46, Global Rate=181.00
training/ 20:21:48, device xla:1, step 5800, Rate=185.36, Global Rate=181.01
training/ 20:21:55, device xla:1, step 5810, Rate=182.88, Global Rate=181.01
training/ 20:22:02, device xla:1, step 5820, Rate=183.70, Global Rate=181.02
training/ 20:22:09, device xla:1, step 5830, Rate=184.42, Global Rate=181.02
training/ 20:22:16, device xla:1, step 5840, Rate=184.25, Global Rate=181.03
training/ 20:22:23, device xla:1, step 5850, Rate=184.68, Global Rate=181.03
training/ 20:22:30, device xla:1, step 5860, Rate=186.29, Global Rate=181.04
training/ 20:22:37, device xla:1, step 5870, Rate=186.18, Global Rate=181.05
training/ 20:22:43, device xla:1, step 5880, Rate=185.92, Global Rate=181.06
training/ 20:22:50, device xla:1, step 5890, Rate=186.62, Global Rate=181.07
training/ 20:22:57, device xla:1, step 5900, Rate=186.12, Global Rate=181.07
training/ 20:23:04, device xla:1, step 5910, Rate=186.18, Global Rate=181.08
training/ 20:23:11, device xla:1, step 5920, Rate=185.46, Global Rate=181.09
training/ 20:23:18, device xla:1, step 5930, Rate=185.46, Global Rate=181.10
training/ 20:23:25, device xla:1, step 5940, Rate=184.59, Global Rate=181.10
training/ 20:23:32, device xla:1, step 5950, Rate=184.78, Global Rate=181.11
training/ 20:23:39, device xla:1, step 5960, Rate=185.98, Global Rate=181.12
training/ 20:23:46, device xla:1, step 5970, Rate=185.91, Global Rate=181.12
training/ 20:23:52, device xla:1, step 5980, Rate=186.22, Global Rate=181.13
training/ 20:23:59, device xla:1, step 5990, Rate=185.64, Global Rate=181.14
training/ 20:24:06, device xla:1, step 6000, Rate=185.40, Global Rate=181.14
training/ 20:24:13, device xla:1, step 6010, Rate=185.85, Global Rate=181.15
training/ 20:24:20, device xla:1, step 6020, Rate=185.61, Global Rate=181.16
training/ 20:24:27, device xla:1, step 6030, Rate=182.92, Global Rate=181.16
training/ 20:24:34, device xla:1, step 6040, Rate=184.37, Global Rate=181.17
training/ 20:24:41, device xla:1, step 6050, Rate=185.14, Global Rate=181.18
training/ 20:24:48, device xla:1, step 6060, Rate=183.93, Global Rate=181.18
training/ 20:24:55, device xla:1, step 6070, Rate=185.73, Global Rate=181.19
training/ 20:25:02, device xla:1, step 6080, Rate=186.07, Global Rate=181.20
training/ 20:25:09, device xla:1, step 6090, Rate=185.70, Global Rate=181.20
training/ 20:25:15, device xla:1, step 6100, Rate=184.13, Global Rate=181.21
training/ 20:25:22, device xla:1, step 6110, Rate=185.77, Global Rate=181.21
training/ 20:25:29, device xla:1, step 6120, Rate=185.89, Global Rate=181.22
training/ 20:25:36, device xla:1, step 6130, Rate=184.87, Global Rate=181.23
training/ 20:25:43, device xla:1, step 6140, Rate=182.43, Global Rate=181.23
training/ 20:25:50, device xla:1, step 6150, Rate=184.29, Global Rate=181.24
training/ 20:25:57, device xla:1, step 6160, Rate=185.89, Global Rate=181.24
training/ 20:26:04, device xla:1, step 6170, Rate=181.09, Global Rate=181.25
training/ 20:26:11, device xla:1, step 6180, Rate=185.26, Global Rate=181.25
training/ 20:26:18, device xla:1, step 6190, Rate=184.15, Global Rate=181.26
training/ 20:26:25, device xla:1, step 6200, Rate=184.21, Global Rate=181.26
training/ 20:26:32, device xla:1, step 6210, Rate=183.63, Global Rate=181.26
training/ 20:26:39, device xla:1, step 6220, Rate=185.38, Global Rate=181.27
training/ 20:26:46, device xla:1, step 6230, Rate=185.63, Global Rate=181.28
training/ 20:26:53, device xla:1, step 6240, Rate=186.51, Global Rate=181.28
training/ 20:26:59, device xla:1, step 6250, Rate=186.07, Global Rate=181.29
training/ 20:27:06, device xla:1, step 6260, Rate=186.09, Global Rate=181.30
training/ 20:27:13, device xla:1, step 6270, Rate=183.59, Global Rate=181.30
training/ 20:27:20, device xla:1, step 6280, Rate=184.84, Global Rate=181.31
training/ 20:27:27, device xla:1, step 6290, Rate=185.35, Global Rate=181.31
training/ 20:27:34, device xla:1, step 6300, Rate=185.60, Global Rate=181.32
training/ 20:27:41, device xla:1, step 6310, Rate=185.61, Global Rate=181.32
training/ 20:27:48, device xla:1, step 6320, Rate=184.65, Global Rate=181.33
training/ 20:27:55, device xla:1, step 6330, Rate=186.02, Global Rate=181.34
training/ 20:28:02, device xla:1, step 6340, Rate=185.89, Global Rate=181.34
training/ 20:28:09, device xla:1, step 6350, Rate=184.92, Global Rate=181.35
training/ 20:28:16, device xla:1, step 6360, Rate=184.23, Global Rate=181.35
training/ 20:28:22, device xla:1, step 6370, Rate=183.99, Global Rate=181.36
training/ 20:28:29, device xla:1, step 6380, Rate=185.29, Global Rate=181.36
training/ 20:28:36, device xla:1, step 6390, Rate=186.31, Global Rate=181.37
training/ 20:28:43, device xla:1, step 6400, Rate=185.47, Global Rate=181.38
training/ 20:28:50, device xla:1, step 6410, Rate=185.67, Global Rate=181.38
training/ 20:28:57, device xla:1, step 6420, Rate=185.41, Global Rate=181.39
training/ 20:29:04, device xla:1, step 6430, Rate=186.25, Global Rate=181.40
training/ 20:29:11, device xla:1, step 6440, Rate=185.70, Global Rate=181.40./rmyle-tpu.sh: line 63: 49584 Killed python tpu-examples/fairseq_train_tpu.py $data_path --arch=transformer_vaswani_wmt_en_de_big --max-sentences=$batch_size --max-sentences-valid=$batch_size --max-source-positions=$n_words --max-target-positions=$n_words --required-batch-size-multiple=$batch_size --no-save --attention-dropout=0.1 --no-progress-bar --criterion=label_smoothed_cross_entropy --log-interval=100 --source-lang=en --lr-scheduler=inverse_sqrt --min-lr 1e-09 --skip-invalid-size-inputs-valid-test --target-lang=de --label-smoothing=0.1 --update-freq=1 --optimizer adam --adam-betas '(0.9, 0.98)' --warmup-init-lr 1e-07 --lr 0.0005 --warmup-updates 4000 --share-all-embeddings --dropout 0.3 --weight-decay 0.0 --valid-subset=valid --curriculum=4 --max-epoch=50 --num_cores=1 --metrics_debug --pad_to_length=$n_words --log_steps=10
Fri Aug 16 20:30:10 UTC 2019
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment