Skip to content

Instantly share code, notes, and snippets.

@ManfeiBai
Created September 5, 2024 20:43
Show Gist options
  • Save ManfeiBai/f215d9497d7d63b97c29df64ff69d107 to your computer and use it in GitHub Desktop.
Save ManfeiBai/f215d9497d7d63b97c29df64ff69d107 to your computer and use it in GitHub Desktop.
log
(torch310) root@6e1dc6c462da:/pytorch/xla# PJRT_DEVICE=TPU test/tpu/run_tests.sh
+ python3 test/test_operations.py -v
test_mp_decorator (__main__.MpDecoratorTest) ... ok
test_multi_init_xla_backend (__main__.RegisterXLAKeyTest) ... ok
test_dropout (__main__.TestActivationCheckpoint) ... /pytorch/xla/torch_xla/utils/checkpoint.py:93: DeprecationWarning: torch.get_autocast_gpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cuda') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:787.)
"dtype": torch.get_autocast_gpu_dtype(),
/pytorch/xla/torch_xla/utils/checkpoint.py:97: DeprecationWarning: torch.is_autocast_cpu_enabled() is deprecated. Please use torch.is_autocast_enabled('cpu') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:663.)
"enabled": torch.is_autocast_cpu_enabled(),
/pytorch/xla/torch_xla/utils/checkpoint.py:98: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:796.)
"dtype": torch.get_autocast_cpu_dtype(),
/pytorch/xla/torch_xla/utils/checkpoint.py:183: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs), \
/pytorch/xla/torch_xla/utils/checkpoint.py:184: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):
ok
test_opt_barrier (__main__.TestActivationCheckpoint) ... ok
test (__main__.TestAtenTensorTo) ... ok
test_add_mixed_device (__main__.TestAtenXlaTensor) ... ok
test_addmm_integer_types (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_ailing_slice (__main__.TestAtenXlaTensor) ... ok
test_amp_foreach_non_finite_check_and_unscale_ (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_amp_norm_append_dtype (__main__.TestAtenXlaTensor) ... ok
test_arange_nan (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1 (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1_dim1 (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1_slice (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1_t (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1_t_off (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r1_t_slice (__main__.TestAtenXlaTensor) ... ok
test_as_strided_r2_t_update (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_empty_args (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_gap (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_gap_no_unit_stride (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_overlap (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_overlap_and_gap (__main__.TestAtenXlaTensor) ... ok
test_as_strided_with_overlap_zero_stride (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_utils.py:316: UserWarning: Use of index_put_ on expanded tensors is deprecated. Please clone() the tensor before performing this operation. This also applies to advanced indexing e.g. tensor[indices] = tensor (Triggered internally at /pytorch/aten/src/ATen/native/TensorAdvancedIndexing.cpp:792.)
expected[nan_mask] = 0
/pytorch/xla/test/test_utils.py:316: UserWarning: Use of masked_fill_ on expanded tensors is deprecated. Please clone() the tensor before performing this operation. This also applies to advanced indexing e.g. tensor[mask] = scalar (Triggered internally at /pytorch/aten/src/ATen/native/TensorAdvancedIndexing.cpp:1990.)
expected[nan_mask] = 0
/pytorch/xla/test/test_utils.py:320: UserWarning: Use of index_put_ on expanded tensors is deprecated. Please clone() the tensor before performing this operation. This also applies to advanced indexing e.g. tensor[indices] = tensor (Triggered internally at /pytorch/aten/src/ATen/native/TensorAdvancedIndexing.cpp:792.)
expected[inf_mask] = 0
/pytorch/xla/test/test_utils.py:320: UserWarning: Use of masked_fill_ on expanded tensors is deprecated. Please clone() the tensor before performing this operation. This also applies to advanced indexing e.g. tensor[mask] = scalar (Triggered internally at /pytorch/aten/src/ATen/native/TensorAdvancedIndexing.cpp:1990.)
expected[inf_mask] = 0
ok
test_baddmm_integer_types (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_basic_bfloat16 (__main__.TestAtenXlaTensor) ... ok
test_bfloat16_float32_cast (__main__.TestAtenXlaTensor) ... ok
test_binaryop_order (__main__.TestAtenXlaTensor) ... ok
test_bitwise_and_not (__main__.TestAtenXlaTensor) ... ok
test_byte_dtype (__main__.TestAtenXlaTensor) ... ok
test_cached_addcdiv (__main__.TestAtenXlaTensor) ... ok
test_cat_empty_tensor (__main__.TestAtenXlaTensor) ... ok
test_clamp (__main__.TestAtenXlaTensor) ... ok
test_clip_grad_norm_ (__main__.TestAtenXlaTensor) ... ok
test_clip_grad_norm_zero (__main__.TestAtenXlaTensor) ... ok
test_conv2d_backward (__main__.TestAtenXlaTensor) ... ok
test_deepcopy (__main__.TestAtenXlaTensor) ... ok
test_diagonal_scatter_negative_dim (__main__.TestAtenXlaTensor) ... ok
test_diagonal_write (__main__.TestAtenXlaTensor) ... ok
test_diagonal_write_transposed (__main__.TestAtenXlaTensor) ... ok
test_diagonal_write_transposed_r3 (__main__.TestAtenXlaTensor) ... ok
test_div_mixed_device (__main__.TestAtenXlaTensor) ... ok
test_emb_bf16 (__main__.TestAtenXlaTensor) ... ok
test_embedding_bag_backward_fallback (__main__.TestAtenXlaTensor) ... /root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/autograd/graph.py:818: UserWarning: XLA does not support EmbeddingBag sparse backward function. Falling back to the dense function. (Triggered internally at torch_xla/csrc/aten_xla_type.cpp:1515.)
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
ok
test_embedding_int_indices (__main__.TestAtenXlaTensor) ... ok
test_empty_advanced_indexing (__main__.TestAtenXlaTensor) ... ok
test_empty_strided (__main__.TestAtenXlaTensor) ... skipped 'grad_input produces wrong results after functionalization. pytorch/pytorch#91199'
test_expand_default (__main__.TestAtenXlaTensor) ... ok
test_flip (__main__.TestAtenXlaTensor) ... ok
test_flip_check_throws (__main__.TestAtenXlaTensor) ... ok
test_flip_empty_tensor (__main__.TestAtenXlaTensor) ... ok
test_flip_expand (__main__.TestAtenXlaTensor) ... ok
test_flip_rectangular (__main__.TestAtenXlaTensor) ... ok
test_flip_shape (__main__.TestAtenXlaTensor) ... ok
test_float32_bfloat16_cast (__main__.TestAtenXlaTensor) ... ok
test_frac_negative (__main__.TestAtenXlaTensor) ... ok
test_gelu_backward_different_types (__main__.TestAtenXlaTensor) ... ok
test_get_real_xla_devices (__main__.TestAtenXlaTensor) ... ok
test_index_bool (__main__.TestAtenXlaTensor) ... ok
test_index_put (__main__.TestAtenXlaTensor) ... ok
test_index_select_0dim (__main__.TestAtenXlaTensor) ... ok
test_index_select_out (__main__.TestAtenXlaTensor) ... ok
test_index_types (__main__.TestAtenXlaTensor) ... ok
test_index_zero_tensor_by_zero_tensor (__main__.TestAtenXlaTensor) ... ok
test_inplace_copy_different_sizes (__main__.TestAtenXlaTensor) ... ok
test_inplace_mul_scalar_different_dtype (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_backprop_base (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_backprop_view (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_operations.py:942: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
self.assertIsNone(a.grad)
ok
test_inplace_view_backprop_view_of_view (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_gradcheck (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_makes_base_require_grad (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_modify_base (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_multiple_outputs (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_non_contig (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_of_view (__main__.TestAtenXlaTensor) ... ok
test_inplace_view_python (__main__.TestAtenXlaTensor) ... ok
test_manual_seed (__main__.TestAtenXlaTensor) ... ok
test_masked_fill_in_out_place (__main__.TestAtenXlaTensor) ... ok
test_masked_fill_with_tensor (__main__.TestAtenXlaTensor) ... ok
test_matmul_integer_types (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_max_broadcast (__main__.TestAtenXlaTensor) ... ok
test_mul_mixed_device (__main__.TestAtenXlaTensor) ... ok
test_multi_view (__main__.TestAtenXlaTensor) ... ok
test_nan_to_num_in_place (__main__.TestAtenXlaTensor) ... ok
test_nan_to_num_in_place_with_inf (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_native_dropout_backward (__main__.TestAtenXlaTensor) ... ok
test_negative_cat (__main__.TestAtenXlaTensor) ... ok
test_negative_slice (__main__.TestAtenXlaTensor) ... ok
test_no_storage (__main__.TestAtenXlaTensor) ... ok
test_norm_p0 (__main__.TestAtenXlaTensor) ... ok
test_one_hot_no_fallback (__main__.TestAtenXlaTensor) ... ok
test_patched_linear_1D_bias (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_patched_linear_2D_bias (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_patched_linear_3D (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_patched_linear_3D_bias (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_pow_constant (__main__.TestAtenXlaTensor) ... ok
test_pow_dtype_promotion (__main__.TestAtenXlaTensor) ... ok
test_pow_integer_types (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_pred_and_u8 (__main__.TestAtenXlaTensor) ... ok
test_pred_one_hot (__main__.TestAtenXlaTensor) ... ok
test_pred_type (__main__.TestAtenXlaTensor) ... ok
test_print (__main__.TestAtenXlaTensor) ... ok
test_print_executation (__main__.TestAtenXlaTensor) ... tensor([[-0.7458, 0.1188, -0.4456, -1.2744]], device='xla:0')
tensor([[-0.7458, 0.1188, -0.4456, -1.2744]], device='xla:0')
tensor([[-0.7458, 0.1188, -0.4456, -1.2744]], device='xla:0')
tensor([[-0.0038, -0.4741, -1.1976, 0.3020]], device='xla:0')
tensor([[-0.0038, -0.4741, -1.1976, 0.3020]], device='xla:0')
tensor([[-0.0038, -0.4741, -1.1976, 0.3020]], device='xla:0')
tensor([[-1.1619, 0.2727, 0.3339, -0.8061]])
tensor([[-1.1619, 0.2727, 0.3339, -0.8061]])
tensor([[-1.1619, 0.2727, 0.3339, -0.8061]])
ok
test_rand (__main__.TestAtenXlaTensor) ... ok
test_rand_like (__main__.TestAtenXlaTensor) ... ok
test_randint_like (__main__.TestAtenXlaTensor) ... ok
test_randn_like (__main__.TestAtenXlaTensor) ... ok
test_randperm (__main__.TestAtenXlaTensor) ... ok
test_reduction_0dim (__main__.TestAtenXlaTensor) ... ok
test_reduction_unordered_dim (__main__.TestAtenXlaTensor) ... ok
test_replace_xla_tensor (__main__.TestAtenXlaTensor) ... ok
test_rrelu_module (__main__.TestAtenXlaTensor) ... ok
test_s_copy_dtype (__main__.TestAtenXlaTensor) ... ok
test_save (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_operations.py:1428: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
x_loaded = torch.load(tf.name)
ok
test_save_api (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_operations.py:1454: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
state_dict = torch.load(tf.name)
ok
test_save_bf16 (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_operations.py:1436: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
x_loaded = torch.load(tf.name)
ok
test_save_tuple (__main__.TestAtenXlaTensor) ... /pytorch/xla/test/test_operations.py:1445: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
x_loaded, number_loaded = torch.load(tf.name)
ok
test_save_view_alias_check (__main__.TestAtenXlaTensor) ... skipped 'Works only when functionalization is not disabled. Reason: views do not exist.'
test_scatter_add_bool (__main__.TestAtenXlaTensor) ... ok
test_scatter_add_small_target (__main__.TestAtenXlaTensor) ... ok
test_serialization_api (__main__.TestAtenXlaTensor) ... /pytorch/xla/torch_xla/utils/serialization.py:87: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
ref_data = torch.load(path)
/pytorch/xla/torch_xla/utils/serialization.py:94: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
torch.load(_get_tensor_file(tensor_folder, t.tid)))
ok
test_set (__main__.TestAtenXlaTensor) ... skipped ''
test_sgn (__main__.TestAtenXlaTensor) ... ok
test_sigmoid_bounds (__main__.TestAtenXlaTensor) ... ok
test_slice_assign (__main__.TestAtenXlaTensor) ... ok
test_slice_copy (__main__.TestAtenXlaTensor) ... ok
test_slice_rnd_stepped_assign (__main__.TestAtenXlaTensor) ... ok
test_slice_start_end (__main__.TestAtenXlaTensor) ... ok
test_slice_stepped_assign (__main__.TestAtenXlaTensor) ... ok
test_slice_stepped_other_assign (__main__.TestAtenXlaTensor) ... ok
test_slice_zero_sized_dim (__main__.TestAtenXlaTensor) ... ok
test_split_empty_dim (__main__.TestAtenXlaTensor) ... ok
test_spooky_ailing (__main__.TestAtenXlaTensor) ... ok
test_squeeze_nonzero (__main__.TestAtenXlaTensor) ... ok
test_stack_different_types (__main__.TestAtenXlaTensor) ... ok
test_stack_pred (__main__.TestAtenXlaTensor) ... ok
test_sub_mixed_device (__main__.TestAtenXlaTensor) ... ok
test_too_many_parameter (__main__.TestAtenXlaTensor) ... ok
test_transpose_1d (__main__.TestAtenXlaTensor) ... ok
test_transpose_1d_inplace (__main__.TestAtenXlaTensor) ... ok
test_trilinear_interpolate (__main__.TestAtenXlaTensor) ... ok
test_upsample_bilinear_double (__main__.TestAtenXlaTensor) ... skipped 'Not supported on TPU'
test_view_1718 (__main__.TestAtenXlaTensor) ... ok
test_view_and_copy_ (__main__.TestAtenXlaTensor) ... ok
test_view_and_multi_mark_step (__main__.TestAtenXlaTensor) ... ok
test_view_empty (__main__.TestAtenXlaTensor) ... ok
test_view_out_computation (__main__.TestAtenXlaTensor) ... ok
test_writeable_tensors_updates (__main__.TestAtenXlaTensor) ... ok
test_cross_entropy_loss (__main__.TestBinaryCrossEntropyLimitValue) ... ok
test_dlpack_non_default_layout (__main__.TestDLPack)
TestDLPack.test_dlpack_non_default_layout ... skipped 'requires PyTorch CUDA support'
test_dlpack_pytorch_cuda_to_xla (__main__.TestDLPack)
TestDLPack.test_dlpack_pytorch_cuda_to_xla ... skipped 'requires PyTorch CUDA support'
test_dlpack_pytorch_cuda_to_xla_protocol_conversion (__main__.TestDLPack)
TestDLPack.test_dlpack_pytorch_cuda_to_xla_protocol_conversion ... skipped 'requires PyTorch CUDA support'
test_dlpack_roundtrip_bool (__main__.TestDLPack)
TestDLPack.test_dlpack_roundtrip_bool ... skipped 'requires PyTorch CUDA support'
test_dlpack_roundtrip_scalar (__main__.TestDLPack)
TestDLPack.test_dlpack_roundtrip_scalar
Callable and iterable class for producing new test cases. ... skipped 'requires PyTorch CUDA support'
test_dlpack_roundtrip_tensor (__main__.TestDLPack)
TestDLPack.test_dlpack_roundtrip_tensor
Callable and iterable class for producing new test cases. ... skipped 'requires PyTorch CUDA support'
test_dlpack_xla_to_pytorch_cuda (__main__.TestDLPack)
TestDLPack.test_dlpack_xla_to_pytorch_cuda ... skipped 'requires PyTorch CUDA support'
test_mixed_dtype_tuple (__main__.TestDataType) ... ok
test_get_xla_tensor_debug_info (__main__.TestDebuggingUtil) ... ok
test_masked_select_shape (__main__.TestDynamicShape) ... ok
test_nonzero_cast (__main__.TestDynamicShape) ... ok
test_nonzero_shape (__main__.TestDynamicShape) ... ok
test_as_strided_input_larger (__main__.TestGeneric) ... ok
test_aten_move_cuda_to_xla (__main__.TestGeneric) ... skipped 'requires PyTorch CUDA support'
test_aten_move_scalar_cuda_to_xla (__main__.TestGeneric) ... skipped 'requires PyTorch CUDA support'
test_data_wrapper (__main__.TestGeneric) ... ok
test_git_revisons (__main__.TestGeneric) ... ok
test_send_to_device_grad (__main__.TestGeneric) ... ok
test_send_to_device_single (__main__.TestGeneric) ... ok
test_unsafe_buffer_pointer (__main__.TestGeneric) ... ok
test_util_foreach_api (__main__.TestGeneric) ... ok
test_util_foreach_api_cycle (__main__.TestGeneric) ... ok
test_zeros_like_patch (__main__.TestGeneric) ... ok
test_repeat_extended (__main__.TestHelperFunction) ... ok
test_repeat_special (__main__.TestHelperFunction) ... ok
test_repeat_truncated (__main__.TestHelperFunction) ... ok
test_inter_op_sync (__main__.TestInterOpSyncTensors) ... ok
test (__main__.TestLongGraphChain) ... ok
test_api (__main__.TestLoweringContext) ... ok
test (__main__.TestModelComparator) ... /pytorch/xla/torch_xla/debug/model_comparator.py:134: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
tensor1 = torch.load(path1)
/pytorch/xla/torch_xla/debug/model_comparator.py:135: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
tensor2 = torch.load(path2)
ok
test_legacy (__main__.TestNMS) ... skipped 'XLA_EXPERIMENTAL=nms required'
test_nms_input_errors (__main__.TestNMS) ... skipped 'XLA_EXPERIMENTAL=nms required'
test_nms_ref (__main__.TestNMS) ... skipped 'XLA_EXPERIMENTAL=nms required'
test_nll_loss_inf (__main__.TestNllLossLimitValue) ... ok
test_nll_loss_nan (__main__.TestNllLossLimitValue) ... ok
test_add (__main__.TestOpBuilder) ... ok
test_conditional (__main__.TestOpBuilder) ... ok
test_mul (__main__.TestOpBuilder) ... ok
test_triangular_solve (__main__.TestOpBuilder) ... /pytorch/xla/test/test_operations.py:2524: UserWarning: torch.triangular_solve is deprecated in favor of torch.linalg.solve_triangularand will be removed in a future PyTorch release.
torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at /pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2190.)
return torch.triangular_solve(
ERROR
test_while (__main__.TestOpBuilder) ... ok
test_optimization_barrier_correctness (__main__.TestOptimizationBarrier) ... ok
test (__main__.TestParallelLoader) ... ok
test (__main__.TestParallelTensorMNIST) ... ok
test_random_from_to_bool (__main__.TestRandom) ... ok
test_get_xla_tensor (__main__.TestSelect) ... ok
test_masked_fill_scalar (__main__.TestSelect) ... ok
test (__main__.TestToXlaTensorArena) ... ok
test_wait_device_ops (__main__.TestWaitDeviceOps) ... ok
test_non_empty_scope (__main__.XpTraceTest) ... ok
test_non_empty_scope_decorator (__main__.XpTraceTest) ... ok
======================================================================
ERROR: test_triangular_solve (__main__.TestOpBuilder)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/pytorch/xla/test/test_operations.py", line 2531, in test_triangular_solve
self.runOpBuilderTest(
File "/pytorch/xla/test/test_operations.py", line 2437, in runOpBuilderTest
results = xu.as_list(aten_fn(*tensors, **kwargs))
File "/pytorch/xla/test/test_operations.py", line 2524, in aten_fn
return torch.triangular_solve(
RuntimeError: Calling torch.triangular_solve on a CPU tensor requires compiling PyTorch with BLAS. Please use PyTorch built with BLAS support.
----------------------------------------------------------------------
Ran 199 tests in 116.818s
FAILED (errors=1, skipped=26)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f6bcece1ce1,7f6bcefdf13f,7f6bced2a6d9&map=
*** SIGABRT received by PID 487057 (TID 489916) on cpu 174 from PID 487057; stack trace: ***
PC: @ 0x7f6bcece1ce1 (unknown) raise
@ 0x7f66b6fa91a1 1888 (unknown)
@ 0x7f6bcefdf140 2320 (unknown)
@ 0x7f6bced2a6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f6bcece1ce1,7f66b6fa91a0,7f6bcefdf13f,7f6bced2a6d9,0&map=
E0905 20:06:41.088437 489916 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:06:41.088453 489916 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:06:41.088457 489916 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:06:41.088477 489916 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:06:41.088483 489916 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:07:20.519880 489916 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 6: 487057 Aborted (core dumped) python3 test/test_operations.py -v
+ python3 test/pjrt/test_runtime_tpu.py
Running tests under Python 3.10.14: /root/miniconda3/envs/torch310/bin/python3
[ RUN ] TestExperimentalPjrtTpu.test_default_xla_devices
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
[ OK ] TestExperimentalPjrtTpu.test_default_xla_devices
[ RUN ] TestExperimentalPjrtTpu.test_execute_time_metric
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fd6f82d8ce1,7fd6f85d613f,7fd6f83216d9&map=
*** SIGABRT received by PID 491373 (TID 491373) on cpu 16 from PID 491373; stack trace: ***
PC: @ 0x7fd6f82d8ce1 (unknown) raise
@ 0x7fd1defa91a1 1888 (unknown)
@ 0x7fd6f85d6140 232932960 (unknown)
@ 0x7fd6f83216da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fd6f82d8ce1,7fd1defa91a0,7fd6f85d613f,7fd6f83216d9,0&map=
E0905 20:07:39.636828 491373 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:07:39.636846 491373 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:07:39.636851 491373 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:07:39.636869 491373 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:07:39.636875 491373 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fc956b00ce1,7fc956dfe13f,7fc956b496d9&map=
*** SIGABRT received by PID 491372 (TID 491372) on cpu 48 from PID 491372; stack trace: ***
PC: @ 0x7fc956b00ce1 (unknown) raise
@ 0x7fc442fa91a1 1888 (unknown)
@ 0x7fc956dfe140 59857808 (unknown)
@ 0x7fc956b496da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fc956b00ce1,7fc442fa91a0,7fc956dfe13f,7fc956b496d9,0&map=
E0905 20:07:39.684388 491372 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:07:39.684399 491372 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:07:39.684404 491372 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
free(): corrupted unsorted chunks
E0905 20:07:39.684421 491372 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:07:39.684430 491372 coredump_hook.cc:472] RAW: Dumping core locally.
https://symbolize.stripped_domain/r/?trace=7f156de24ce1,7f156e12213f,7f156de6d6d9&map=
*** SIGABRT received by PID 491369 (TID 491369) on cpu 36 from PID 491369; stack trace: ***
PC: @ 0x7f156de24ce1 (unknown) raise
@ 0x7f1056fa91a1 1888 (unknown)
@ 0x7f156e122140 (unknown) (unknown)
@ 0x7f156de6d6da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f2a805c9ce1,7f2a808c713f,7f2a806126d9&map=
*** SIGABRT received by PID 491374 (TID 491374) on cpu 41 from PID 491374; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f156de24ce1,7f1056fa91a0,7f156e12213f,7f156de6d6d9,0&map=
E0905 20:07:39.689042 491369 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:07:39.689061 491369 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:07:39.689065 491369 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:07:39.689086 491369 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:07:39.689093 491369 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7f2a805c9ce1 (unknown) raise
@ 0x7f256efa91a1 1888 (unknown)
@ 0x7f2a808c7140 164251072 (unknown)
@ 0x7f2a806126da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f2a805c9ce1,7f256efa91a0,7f2a808c713f,7f2a806126d9,0&map=
E0905 20:07:39.692738 491374 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:07:39.692755 491374 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:07:39.692760 491374 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:07:39.692780 491374 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:07:39.692786 491374 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:07:49.558503 491373 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:07:49.562326 491372 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:07:49.684596 491369 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:07:49.687070 491374 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_execute_time_metric
[ RUN ] TestExperimentalPjrtTpu.test_global_ordinal
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fbafc167ce1,7fbafc46513f,7fbafc1b06d9&map=
*** SIGABRT received by PID 495753 (TID 499060) on cpu 86 from PID 495753; stack trace: ***
PC: @ 0x7fbafc167ce1 (unknown) raise
@ 0x7fb5e2fa91a1 1888 (unknown)
@ 0x7fbafc465140 2320 (unknown)
@ 0x7fbafc1b06da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fbafc167ce1,7fb5e2fa91a0,7fbafc46513f,7fbafc1b06d9,0&map=
E0905 20:08:18.839440 499060 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:08:18.839460 499060 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:08:18.839465 499060 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:08:18.839488 499060 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:08:18.839494 499060 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:08:24.081848 499060 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_global_ordinal
[ RUN ] TestExperimentalPjrtTpu.test_global_runtime_device_attributes
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fb37108fce1,7fb37138d13f,7fb3710d86d9&map=
*** SIGABRT received by PID 499170 (TID 499170) on cpu 179 from PID 499170; stack trace: ***
PC: @ 0x7fb37108fce1 (unknown) raise
@ 0x7fae5afa91a1 1888 (unknown)
@ 0x7fb37138d140 (unknown) (unknown)
@ 0x7fb3710d86da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fb37108fce1,7fae5afa91a0,7fb37138d13f,7fb3710d86d9,0&map=
E0905 20:08:33.937522 499170 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:08:33.937543 499170 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:08:33.937546 499170 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:08:33.937566 499170 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:08:33.937572 499170 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:08:38.980171 499170 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_global_runtime_device_attributes
[ RUN ] TestExperimentalPjrtTpu.test_local_ordinal
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fdcc6d48ce1,7fdcc704613f,7fdcc6d916d9&map=
*** SIGABRT received by PID 502439 (TID 502439) on cpu 26 from PID 502439; stack trace: ***
PC: @ 0x7fdcc6d48ce1 (unknown) raise
@ 0x7fd7aefa91a1 1888 (unknown)
@ 0x7fdcc7046140 (unknown) (unknown)
@ 0x7fdcc6d916da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fdcc6d48ce1,7fd7aefa91a0,7fdcc704613f,7fdcc6d916d9,0&map=
E0905 20:08:48.630954 502439 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:08:48.630970 502439 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:08:48.630976 502439 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:08:48.630997 502439 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:08:48.631002 502439 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fa5168bece1,7fa516bbc13f,7fa5169076d9&map=
*** SIGABRT received by PID 502438 (TID 502438) on cpu 37 from PID 502438; stack trace: ***
PC: @ 0x7fa5168bece1 (unknown) raise
@ 0x7fa002fa91a1 1888 (unknown)
@ 0x7fa516bbc140 274109424 (unknown)
@ 0x7fa5169076da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f45f1816ce1,7f45f1b1413f,7f45f185f6d9&map=
*** SIGABRT received by PID 502440 (TID 502440) on cpu 60 from PID 502440; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fa5168bece1,7fa002fa91a0,7fa516bbc13f,7fa5169076d9,0&map=
E0905 20:08:48.639643 502438 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:08:48.639658 502438 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:08:48.639665 502438 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:08:48.639686 502438 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:08:48.639693 502438 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7f45f1816ce1 (unknown) raise
@ 0x7f40defa91a1 1888 (unknown)
@ 0x7f45f1b14140 (unknown) (unknown)
@ 0x7f45f185f6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f45f1816ce1,7f40defa91a0,7f45f1b1413f,7f45f185f6d9,0&map=
E0905 20:08:48.643104 502440 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:08:48.643136 502440 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:08:48.643140 502440 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:08:48.643167 502440 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:08:48.643173 502440 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:08:54.792504 502439 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:08:54.804572 502440 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:08:54.984486 502438 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_local_ordinal
[ RUN ] TestExperimentalPjrtTpu.test_local_ordinal_with_discontiguous_global_ordinal_v4
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f3fad3bcce1,7f3fad6ba13f,7f3fad4056d9&map=
*** SIGABRT received by PID 505703 (TID 505703) on cpu 24 from PID 505703; stack trace: ***
PC: @ 0x7f3fad3bcce1 (unknown) raise
@ 0x7f3a96fa91a1 1888 (unknown)
@ 0x7f3fad6ba140 159055408 (unknown)
@ 0x7f3fad4056da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fcf7a317ce1,7fcf7a61513f,7fcf7a3606d9&map=
*** SIGABRT received by PID 505704 (TID 505704) on cpu 162 from PID 505704; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f3fad3bcce1,7f3a96fa91a0,7f3fad6ba13f,7f3fad4056d9,0&map=
E0905 20:09:03.731861 505703 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:09:03.731878 505703 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:09:03.731883 505703 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:09:03.731906 505703 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:09:03.731911 505703 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fcf7a317ce1 (unknown) raise
@ 0x7fca62fa91a1 1888 (unknown)
@ 0x7fcf7a615140 825122784 (unknown)
@ 0x7fcf7a3606da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fcf7a317ce1,7fca62fa91a0,7fcf7a61513f,7fcf7a3606d9,0&map=
E0905 20:09:03.735807 505704 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:09:03.735825 505704 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:09:03.735829 505704 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:09:03.735855 505704 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:09:03.735860 505704 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f19eba30ce1,7f19ebd2e13f,7f19eba796d9&map=
*** SIGABRT received by PID 505700 (TID 505700) on cpu 12 from PID 505700; stack trace: ***
PC: @ 0x7f19eba30ce1 (unknown) raise
@ 0x7f14d6fa91a1 1888 (unknown)
@ 0x7f19ebd2e140 (unknown) (unknown)
@ 0x7f19eba796da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f19eba30ce1,7f14d6fa91a0,7f19ebd2e13f,7f19eba796d9,0&map=
E0905 20:09:03.776636 505700 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:09:03.776653 505700 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:09:03.776661 505700 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:09:03.776677 505700 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:09:03.776683 505700 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:09:09.662542 505700 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:09:09.663329 505703 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:09:09.671708 505704 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_local_ordinal_with_discontiguous_global_ordinal_v4
[ RUN ] TestExperimentalPjrtTpu.test_local_ordinal_with_discontiguous_global_ordinal_v4_threaded
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f7c2f213ce1,7f7c2f51113f,7f7c2f25c6d9&map=
*** SIGABRT received by PID 508994 (TID 510038) on cpu 209 from PID 508994; stack trace: ***
PC: @ 0x7f7c2f213ce1 (unknown) raise
@ 0x7f7716fa91a1 1888 (unknown)
@ 0x7f7c2f511140 2320 (unknown)
@ 0x7f7c2f25c6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f7c2f213ce1,7f7716fa91a0,7f7c2f51113f,7f7c2f25c6d9,0&map=
E0905 20:09:18.618403 510038 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:09:18.618422 510038 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:09:18.618427 510038 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:09:18.618451 510038 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:09:18.618457 510038 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:09:23.939006 510038 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_local_ordinal_with_discontiguous_global_ordinal_v4_threaded
[ RUN ] TestExperimentalPjrtTpu.test_memory_usage
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ffad1042ce1,7ffad134013f,7ffad108b6d9&map=
*** SIGABRT received by PID 510176 (TID 510176) on cpu 74 from PID 510176; stack trace: ***
PC: @ 0x7ffad1042ce1 (unknown) raise
@ 0x7ff5bafa91a1 1888 (unknown)
@ 0x7ffad1340140 (unknown) (unknown)
@ 0x7ffad108b6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ffad1042ce1,7ff5bafa91a0,7ffad134013f,7ffad108b6d9,0&map=
E0905 20:09:53.044249 510176 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:09:53.044268 510176 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:09:53.044273 510176 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:09:53.044294 510176 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:09:53.044300 510176 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:09:57.183774 510176 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_memory_usage
[ RUN ] TestExperimentalPjrtTpu.test_runtime_device_attributes
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f7a5da5bce1,7f7a5dd5913f,7f7a5daa46d9&map=
*** SIGABRT received by PID 513609 (TID 513609) on cpu 233 from PID 513609; stack trace: ***
PC: @ 0x7f7a5da5bce1 (unknown) raise
@ 0x7f754afa91a1 1888 (unknown)
@ 0x7f7a5dd59140 1910185600 (unknown)
@ 0x7f7a5daa46da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f7a5da5bce1,7f754afa91a0,7f7a5dd5913f,7f7a5daa46d9,0&map=
E0905 20:10:06.681245 513609 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:06.681265 513609 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:06.681270 513609 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:06.681293 513609 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:06.681299 513609 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fbd586abce1,7fbd589a913f,7fbd586f46d9&map=
*** SIGABRT received by PID 513608 (TID 513608) on cpu 133 from PID 513608; stack trace: ***
PC: @ 0x7fbd586abce1 (unknown) raise
@ 0x7fb842fa91a1 1888 (unknown)
@ 0x7fbd589a9140 1152314000 (unknown)
@ 0x7fbd586f46da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fbd586abce1,7fb842fa91a0,7fbd589a913f,7fbd586f46d9,0&map=
E0905 20:10:06.714677 513608 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:06.714695 513608 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:06.714700 513608 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:06.714723 513608 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:06.714728 513608 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f89ab6efce1,7f89ab9ed13f,7f89ab7386d9&map=
*** SIGABRT received by PID 513604 (TID 513604) on cpu 76 from PID 513604; stack trace: ***
PC: @ 0x7f89ab6efce1 (unknown) raise
@ 0x7f8496fa91a1 1888 (unknown)
@ 0x7f89ab9ed140 999734400 (unknown)
@ 0x7f89ab7386da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f89ab6efce1,7f8496fa91a0,7f89ab9ed13f,7f89ab7386d9,0&map=
E0905 20:10:06.758349 513604 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:06.758368 513604 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:06.758374 513604 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:06.758395 513604 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:06.758402 513604 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f05e1879ce1,7f05e1b7713f,7f05e18c26d9&map=
*** SIGABRT received by PID 513607 (TID 513607) on cpu 100 from PID 513607; stack trace: ***
PC: @ 0x7f05e1879ce1 (unknown) raise
@ 0x7f00cefa91a1 1888 (unknown)
@ 0x7f05e1b77140 (unknown) (unknown)
@ 0x7f05e18c26da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f05e1879ce1,7f00cefa91a0,7f05e1b7713f,7f05e18c26d9,0&map=
E0905 20:10:06.798860 513607 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:06.798877 513607 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:06.798882 513607 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:06.798904 513607 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:06.798910 513607 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:10:14.181120 513609 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:10:14.185052 513608 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:10:14.188564 513604 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:10:14.192765 513607 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_runtime_device_attributes
[ RUN ] TestExperimentalPjrtTpu.test_spawn_error
[ OK ] TestExperimentalPjrtTpu.test_spawn_error
[ RUN ] TestExperimentalPjrtTpu.test_spawn_threads
[ OK ] TestExperimentalPjrtTpu.test_spawn_threads
[ RUN ] TestExperimentalPjrtTpu.test_xla_devices_multiprocess
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f34cead6ce1,7f34cedd413f,7f34ceb1f6d9&map=
*** SIGABRT received by PID 518958 (TID 520729) on cpu 89 from PID 518958; stack trace: ***
PC: @ 0x7f34cead6ce1 (unknown) raise
@ 0x7f2fbafa91a1 1888 (unknown)
@ 0x7f34cedd4140 2320 (unknown)
@ 0x7f34ceb1f6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f34cead6ce1,7f2fbafa91a0,7f34cedd413f,7f34ceb1f6d9,0&map=
E0905 20:10:35.068106 520729 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:35.068121 520729 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:35.068126 520729 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:35.068147 520729 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:35.068153 520729 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7efda0981ce1,7efda0c7f13f,7efda09ca6d9&map=
*** SIGABRT received by PID 518962 (TID 518962) on cpu 104 from PID 518962; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fa380f24ce1,7fa38122213f,7fa380f6d6d9&map=
*** SIGABRT received by PID 518961 (TID 518961) on cpu 199 from PID 518961; stack trace: ***
PC: @ 0x7efda0981ce1 (unknown) raise
@ 0x7ef88afa91a1 1888 (unknown)
@ 0x7efda0c7f140 2062114288 (unknown)
@ 0x7efda09ca6da (unknown) (unknown)
PC: @ 0x7fa380f24ce1 (unknown) raise
@ 0x7f9e6afa91a1 1888 (unknown)
@ 0x7fa381222140 700327488 (unknown)
@ 0x7fa380f6d6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7efda0981ce1,7ef88afa91a0,7efda0c7f13f,7efda09ca6d9,0&map=
E0905 20:10:35.360205 518962 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:35.360222 518962 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:35.360228 518962 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:35.360249 518962 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:35.360254 518962 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fa380f24ce1,7f9e6afa91a0,7fa38122213f,7fa380f6d6d9,0&map=
E0905 20:10:35.360690 518961 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:35.360708 518961 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:35.360713 518961 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:35.360732 518961 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:35.360738 518961 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f045bc2bce1,7f045bf2913f,7f045bc746d9&map=
*** SIGABRT received by PID 518963 (TID 518963) on cpu 116 from PID 518963; stack trace: ***
PC: @ 0x7f045bc2bce1 (unknown) raise
@ 0x7eff46fa91a1 1888 (unknown)
@ 0x7f045bf29140 370410080 (unknown)
@ 0x7f045bc746da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f045bc2bce1,7eff46fa91a0,7f045bf2913f,7f045bc746d9,0&map=
E0905 20:10:35.387360 518963 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:10:35.387377 518963 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:10:35.387383 518963 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:10:35.387405 518963 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:10:35.387411 518963 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:10:42.707055 518961 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:10:42.716878 518962 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:10:42.719445 518963 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_xla_devices_multiprocess
[ RUN ] TestExperimentalPjrtTpu.test_xla_devices_single_process_all_chips
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f5f71a25ce1,7f5f71d2313f,7f5f71a6e6d9&map=
*** SIGABRT received by PID 522363 (TID 523400) on cpu 229 from PID 522363; stack trace: ***
PC: @ 0x7f5f71a25ce1 (unknown) raise
@ 0x7f5a5efa91a1 1888 (unknown)
@ 0x7f5f71d23140 2320 (unknown)
@ 0x7f5f71a6e6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f5f71a25ce1,7f5a5efa91a0,7f5f71d2313f,7f5f71a6e6d9,0&map=
E0905 20:11:05.623720 523400 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:05.623740 523400 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:05.623745 523400 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:05.623765 523400 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:05.623772 523400 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:11:10.966654 523400 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestExperimentalPjrtTpu.test_xla_devices_single_process_all_chips
[ RUN ] TestExperimentalPjrtTpu.test_xla_devices_single_process_one_chip
[ OK ] TestExperimentalPjrtTpu.test_xla_devices_single_process_one_chip
[ RUN ] TestExperimentalPjrtTpu.test_xla_devices_single_process_one_chip_one_device_spawn
[ OK ] TestExperimentalPjrtTpu.test_xla_devices_single_process_one_chip_one_device_spawn
----------------------------------------------------------------------
Ran 15 tests in 233.539s
OK
+ python3 test/pjrt/test_collective_ops_tpu.py
Running tests under Python 3.10.14: /root/miniconda3/envs/torch310/bin/python3
[ RUN ] TestDistCollectiveOpsTpu.test_all_gather_dynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_gather_dynamo
[ RUN ] TestDistCollectiveOpsTpu.test_all_gather_into_tensor_dynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_gather_into_tensor_dynamo
[ RUN ] TestDistCollectiveOpsTpu.test_all_gather_into_tensor_nondynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_gather_into_tensor_nondynamo
[ RUN ] TestDistCollectiveOpsTpu.test_all_gather_nondynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_gather_nondynamo
[ RUN ] TestDistCollectiveOpsTpu.test_all_reduce_dynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_reduce_dynamo
[ RUN ] TestDistCollectiveOpsTpu.test_all_reduce_nondynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_all_reduce_nondynamo
[ RUN ] TestDistCollectiveOpsTpu.test_reduce_scatter_dynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_reduce_scatter_dynamo
[ RUN ] TestDistCollectiveOpsTpu.test_reduce_scatter_nondynamo
[ SKIPPED ] TestDistCollectiveOpsTpu.test_reduce_scatter_nondynamo
[ RUN ] TestXMCollectiveOpsTpu.test_all_gather_pinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f01b8423ce1,7f01b872113f,7f01b846c6d9&map=
*** SIGABRT received by PID 524488 (TID 524488) on cpu 29 from PID 524488; stack trace: ***
PC: @ 0x7f01b8423ce1 (unknown) raise
@ 0x7efc9efa91a1 1888 (unknown)
@ 0x7f01b8721140 2063829600 (unknown)
@ 0x7f01b846c6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f01b8423ce1,7efc9efa91a0,7f01b872113f,7f01b846c6d9,0&map=
E0905 20:11:31.792499 524488 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:31.792513 524488 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:31.792518 524488 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:31.792538 524488 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:31.792544 524488 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f89a3f31ce1,7f89a422f13f,7f89a3f7a6d9&map=
*** SIGABRT received by PID 524483 (TID 524483) on cpu 175 from PID 524483; stack trace: ***
PC: @ 0x7f89a3f31ce1 (unknown) raise
@ 0x7f848afa91a1 1888 (unknown)
@ 0x7f89a422f140 (unknown) (unknown)
@ 0x7f89a3f7a6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f89a3f31ce1,7f848afa91a0,7f89a422f13f,7f89a3f7a6d9,0&map=
E0905 20:11:31.884371 524483 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:31.884384 524483 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:31.884388 524483 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:31.884409 524483 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:31.884414 524483 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f70b7968ce1,7f70b7c6613f,7f70b79b16d9&map=
*** SIGABRT received by PID 524486 (TID 524486) on cpu 3 from PID 524486; stack trace: ***
PC: @ 0x7f70b7968ce1 (unknown) raise
@ 0x7f6ba2fa91a1 1888 (unknown)
@ 0x7f70b7c66140 (unknown) (unknown)
@ 0x7f70b79b16da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f70b7968ce1,7f6ba2fa91a0,7f70b7c6613f,7f70b79b16d9,0&map=
E0905 20:11:31.903504 524486 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:31.903516 524486 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:31.903521 524486 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:31.903536 524486 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:31.903542 524486 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fed98e7cce1,7fed9917a13f,7fed98ec56d9&map=
*** SIGABRT received by PID 524487 (TID 524487) on cpu 183 from PID 524487; stack trace: ***
PC: @ 0x7fed98e7cce1 (unknown) raise
@ 0x7fe882fa91a1 1888 (unknown)
@ 0x7fed9917a140 (unknown) (unknown)
@ 0x7fed98ec56da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fed98e7cce1,7fe882fa91a0,7fed9917a13f,7fed98ec56d9,0&map=
E0905 20:11:31.919913 524487 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:31.919932 524487 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:31.919937 524487 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:31.919956 524487 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:31.919963 524487 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:11:38.091422 524488 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:11:38.649462 524483 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:11:38.651233 524486 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:11:38.651368 524487 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_gather_pinned
[ RUN ] TestXMCollectiveOpsTpu.test_all_gather_unpinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f83b811bce1,7f83b841913f,7f83b81646d9&map=
*** SIGABRT received by PID 528791 (TID 528791) on cpu 90 from PID 528791; stack trace: ***
PC: @ 0x7f83b811bce1 (unknown) raise
@ 0x7f7e9efa91a1 1888 (unknown)
@ 0x7f83b8419140 (unknown) (unknown)
@ 0x7f83b81646da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f83b811bce1,7f7e9efa91a0,7f83b841913f,7f83b81646d9,0&map=
E0905 20:11:48.938294 528791 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:48.938308 528791 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:48.938313 528791 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:48.938332 528791 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:48.938338 528791 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f02eb2dbce1,7f02eb5d913f,7f02eb3246d9&map=
*** SIGABRT received by PID 528792 (TID 528792) on cpu 186 from PID 528792; stack trace: ***
PC: @ 0x7f02eb2dbce1 (unknown) raise
@ 0x7efdd2fa91a1 1888 (unknown)
@ 0x7f02eb5d9140 (unknown) (unknown)
@ 0x7f02eb3246da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f02eb2dbce1,7efdd2fa91a0,7f02eb5d913f,7f02eb3246d9,0&map=
E0905 20:11:48.956252 528792 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:48.956267 528792 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:48.956272 528792 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:48.956291 528792 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:48.956297 528792 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ff09d1d1ce1,7ff09d4cf13f,7ff09d21a6d9&map=
*** SIGABRT received by PID 528793 (TID 528793) on cpu 132 from PID 528793; stack trace: ***
PC: @ 0x7ff09d1d1ce1 (unknown) raise
@ 0x7feb86fa91a1 1888 (unknown)
@ 0x7ff09d4cf140 (unknown) (unknown)
@ 0x7ff09d21a6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ff09d1d1ce1,7feb86fa91a0,7ff09d4cf13f,7ff09d21a6d9,0&map=
E0905 20:11:48.963243 528793 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:11:48.963254 528793 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:11:48.963259 528793 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:11:48.963275 528793 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:11:48.963281 528793 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:11:55.576920 528792 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:11:55.578706 528793 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:11:55.580796 528791 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_gather_unpinned
[ RUN ] TestXMCollectiveOpsTpu.test_all_reduce_pinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f1d5bb6dce1,7f1d5be6b13f,7f1d5bbb66d9&map=
*** SIGABRT received by PID 533098 (TID 533098) on cpu 32 from PID 533098; stack trace: ***
PC: @ 0x7f1d5bb6dce1 (unknown) raise
@ 0x7f1846fa91a1 1888 (unknown)
@ 0x7f1d5be6b140 (unknown) (unknown)
@ 0x7f1d5bbb66da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f1d5bb6dce1,7f1846fa91a0,7f1d5be6b13f,7f1d5bbb66d9,0&map=
E0905 20:12:05.308659 533098 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:05.308669 533098 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:05.308675 533098 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:05.308697 533098 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:05.308702 533098 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ff6769e7ce1,7ff676ce513f,7ff676a306d9&map=
*** SIGABRT received by PID 533097 (TID 533097) on cpu 32 from PID 533097; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f33685ecce1,7f33688ea13f,7f33686356d9&map=
*** SIGABRT received by PID 533093 (TID 533093) on cpu 9 from PID 533093; stack trace: ***
PC: @ 0x7ff6769e7ce1 (unknown) raise
@ 0x7ff162fa91a1 1888 (unknown)
@ 0x7ff676ce5140 (unknown) (unknown)
@ 0x7ff676a306da (unknown) (unknown)
PC: @ 0x7f33685ecce1 (unknown) raise
@ 0x7f2e52fa91a1 1888 (unknown)
@ 0x7f33688ea140 (unknown) (unknown)
@ 0x7f33686356da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ff6769e7ce1,7ff162fa91a0,7ff676ce513f,7ff676a306d9,0&map=
E0905 20:12:05.316306 533097 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:05.316317 533097 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:05.316323 533097 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:05.316336 533097 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:05.316341 533097 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f33685ecce1,7f2e52fa91a0,7f33688ea13f,7f33686356d9,0&map=
E0905 20:12:05.317360 533093 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:05.317372 533093 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:05.317377 533093 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:05.317395 533093 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:05.317400 533093 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ffa848dfce1,7ffa84bdd13f,7ffa849286d9&map=
*** SIGABRT received by PID 533096 (TID 533096) on cpu 28 from PID 533096; stack trace: ***
PC: @ 0x7ffa848dfce1 (unknown) raise
@ 0x7ff56efa91a1 1888 (unknown)
@ 0x7ffa84bdd140 (unknown) (unknown)
@ 0x7ffa849286da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ffa848dfce1,7ff56efa91a0,7ffa84bdd13f,7ffa849286d9,0&map=
E0905 20:12:05.324216 533096 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:05.324228 533096 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:05.324234 533096 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:05.324252 533096 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:05.324257 533096 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:12:11.790911 533096 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:12.117953 533097 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:12.127411 533093 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:12.182072 533098 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_reduce_pinned
[ RUN ] TestXMCollectiveOpsTpu.test_all_reduce_unpinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fa99f829ce1,7fa99fb2713f,7fa99f8726d9&map=
*** SIGABRT received by PID 537384 (TID 537384) on cpu 47 from PID 537384; stack trace: ***
PC: @ 0x7fa99f829ce1 (unknown) raise
@ 0x7fa48afa91a1 1888 (unknown)
@ 0x7fa99fb27140 1943869024 (unknown)
@ 0x7fa99f8726da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fa99f829ce1,7fa48afa91a0,7fa99fb2713f,7fa99f8726d9,0&map=
E0905 20:12:22.273895 537384 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:22.273907 537384 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:22.273913 537384 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:22.273929 537384 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:22.273935 537384 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f73c8723ce1,7f73c8a2113f,7f73c876c6d9&map=
*** SIGABRT received by PID 537389 (TID 540436) on cpu 116 from PID 537389; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f41835fcce1,7f41838fa13f,7f41836456d9&map=
*** SIGABRT received by PID 537388 (TID 540434) on cpu 142 from PID 537388; stack trace: ***
PC: @ 0x7f73c8723ce1 (unknown) raise
@ 0x7f6eb2fa91a1 1888 (unknown)
@ 0x7f73c8a21140 2320 (unknown)
@ 0x7f73c876c6da (unknown) (unknown)
PC: @ 0x7f41835fcce1 (unknown) raise
@ 0x7f3c6efa91a1 1888 (unknown)
@ 0x7f41838fa140 2320 (unknown)
@ 0x7f41836456da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f73c8723ce1,7f6eb2fa91a0,7f73c8a2113f,7f73c876c6d9,0&map=
E0905 20:12:22.346136 540436 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:22.346152 540436 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:22.346157 540436 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:22.346178 540436 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:22.346184 540436 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f41835fcce1,7f3c6efa91a0,7f41838fa13f,7f41836456d9,0&map=
E0905 20:12:22.347760 540434 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:22.347773 540434 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:22.347781 540434 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:22.347800 540434 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:22.347806 540434 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:12:29.195190 540434 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:29.198803 540436 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:29.199498 537384 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_reduce_unpinned
[ RUN ] TestXMCollectiveOpsTpu.test_all_to_all_pinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f0982680ce1,7f098297e13f,7f09826c96d9&map=
*** SIGABRT received by PID 541667 (TID 544700) on cpu 58 from PID 541667; stack trace: ***
PC: @ 0x7f0982680ce1 (unknown) raise
@ 0x7f046efa91a1 1888 (unknown)
@ 0x7f098297e140 2320 (unknown)
@ 0x7f09826c96da 1416024544 (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f0982680ce1,7f046efa91a0,7f098297e13f,7f09826c96d9,0&map=
E0905 20:12:38.513020 544700 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:38.513038 544700 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:38.513044 544700 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:38.513062 544700 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:38.513068 544700 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f32d1fbcce1,7f32d22ba13f,7f32d20056d9&map=
*** SIGABRT received by PID 541663 (TID 541663) on cpu 33 from PID 541663; stack trace: ***
free(): corrupted unsorted chunks
PC: @ 0x7f32d1fbcce1 (unknown) raise
@ 0x7f2dbafa91a1 1888 (unknown)
https://symbolize.stripped_domain/r/?trace=7f6e0e8c4ce1, @ 0x7f32d22ba140 (unknown) (unknown)
7f6e0ebc213f,7f6e0e90d6d9&map=
*** SIGABRT received by PID 541668 (TID 541668) on cpu 130 from PID 541668; stack trace: ***
@ 0x7f32d20056da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f32d1fbcce1,7f2dbafa91a0,7f32d22ba13f,7f32d20056d9,0&map=
E0905 20:12:38.525262 541663 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:38.525273 541663 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:38.525277 541663 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:38.525298 541663 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:38.525303 541663 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7f6e0e8c4ce1 (unknown) raise
@ 0x7f68fafa91a1 1888 (unknown)
@ 0x7f6e0ebc2140 1765943232 (unknown)
@ 0x7f6e0e90d6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f6e0e8c4ce1,7f68fafa91a0,7f6e0ebc213f,7f6e0e90d6d9,0&map=
E0905 20:12:38.527777 541668 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:38.527793 541668 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:38.527797 541668 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:38.527815 541668 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:38.527821 541668 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f9956b20ce1,7f9956e1e13f,7f9956b696d9&map=
*** SIGABRT received by PID 541666 (TID 541666) on cpu 24 from PID 541666; stack trace: ***
PC: @ 0x7f9956b20ce1 (unknown) raise
@ 0x7f9442fa91a1 1888 (unknown)
@ 0x7f9956e1e140 172804336 (unknown)
@ 0x7f9956b696da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f9956b20ce1,7f9442fa91a0,7f9956e1e13f,7f9956b696d9,0&map=
E0905 20:12:38.559990 541666 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:38.560004 541666 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:38.560009 541666 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:38.560026 541666 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:38.560031 541666 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:12:45.405202 541668 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:45.405445 541663 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:45.405466 541666 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:12:45.409866 544700 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_to_all_pinned
[ RUN ] TestXMCollectiveOpsTpu.test_all_to_all_unpinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f14d1c30ce1,7f14d1f2e13f,7f14d1c796d9&map=
*** SIGABRT received by PID 545930 (TID 545930) on cpu 99 from PID 545930; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fe951577ce1,7fe95187513f,7fe9515c06d9&map=
*** SIGABRT received by PID 545927 (TID 545927) on cpu 90 from PID 545927; stack trace: ***
PC: @ 0x7f14d1c30ce1 (unknown) raise
@ 0x7f0fbafa91a1 1888 (unknown)
@ 0x7f14d1f2e140 588276112 (unknown)
@ 0x7f14d1c796da (unknown) (unknown)
PC: @ 0x7fe951577ce1 (unknown) raise
@ 0x7fe43efa91a1 1888 (unknown)
@ 0x7fe951875140 2085197664 (unknown)
@ 0x7fe9515c06da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f14d1c30ce1,7f0fbafa91a0,7f14d1f2e13f,7f14d1c796d9,0&map=
E0905 20:12:55.346901 545930 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:55.346919 545930 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:55.346925 545930 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:55.346945 545930 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:55.346951 545930 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fe951577ce1,7fe43efa91a0,7fe95187513f,7fe9515c06d9,0&map=
E0905 20:12:55.348060 545927 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:55.348080 545927 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:55.348084 545927 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:55.348105 545927 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:55.348111 545927 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f545fcf3ce1,7f545fff113f,7f545fd3c6d9&map=
*** SIGABRT received by PID 545931 (TID 548980) on cpu 8 from PID 545931; stack trace: ***
PC: @ 0x7f545fcf3ce1 (unknown) raise
@ 0x7f4f46fa91a1 1888 (unknown)
@ 0x7f545fff1140 2320 (unknown)
@ 0x7f545fd3c6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f545fcf3ce1,7f4f46fa91a0,7f545fff113f,7f545fd3c6d9,0&map=
E0905 20:12:55.366214 548980 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:55.366231 548980 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:55.366236 548980 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:55.366259 548980 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:55.366264 548980 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f246e7b5ce1,7f246eab313f,7f246e7fe6d9&map=
*** SIGABRT received by PID 545932 (TID 545932) on cpu 101 from PID 545932; stack trace: ***
PC: @ 0x7f246e7b5ce1 (unknown) raise
@ 0x7f1f5afa91a1 1888 (unknown)
@ 0x7f246eab3140 1241085136 (unknown)
@ 0x7f246e7fe6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f246e7b5ce1,7f1f5afa91a0,7f246eab313f,7f246e7fe6d9,0&map=
E0905 20:12:55.373810 545932 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:12:55.373819 545932 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:12:55.373828 545932 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:12:55.373840 545932 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:12:55.373846 545932 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:13:01.368411 545927 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:03.032214 545930 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:03.033954 545932 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:03.039261 548980 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_all_to_all_unpinned
[ RUN ] TestXMCollectiveOpsTpu.test_broadcast_master_param_synchronized_parameters
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fc614816ce1,7fc614b1413f,7fc61485f6d9&map=
*** SIGABRT received by PID 550221 (TID 550221) on cpu 123 from PID 550221; stack trace: ***
PC: @ 0x7fc614816ce1 (unknown) raise
@ 0x7fc0fefa91a1 1888 (unknown)
@ 0x7fc614b14140 521010656 (unknown)
@ 0x7fc61485f6da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fe2416c6ce1,7fe2419c413f,7fe24170f6d9&map=
*** SIGABRT received by PID 550226 (TID 550226) on cpu 215 from PID 550226; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fc614816ce1,7fc0fefa91a0,7fc614b1413f,7fc61485f6d9,0&map=
E0905 20:13:13.099475 550221 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:13.099486 550221 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:13.099492 550221 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:13.099512 550221 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:13.099518 550221 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fe2416c6ce1 (unknown) raise
@ 0x7fdd2efa91a1 1888 (unknown)
@ 0x7fe2419c4140 1322544720 (unknown)
@ 0x7fe24170f6da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fa6ef6a1ce1,7fa6ef99f13f,7fa6ef6ea6d9&map=
*** SIGABRT received by PID 550227 (TID 550227) on cpu 19 from PID 550227; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fe2416c6ce1,7fdd2efa91a0,7fe2419c413f,7fe24170f6d9,0&map=
E0905 20:13:13.103718 550226 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:13.103734 550226 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:13.103739 550226 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:13.103760 550226 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:13.103767 550226 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fa6ef6a1ce1 (unknown) raise
@ 0x7fa1dafa91a1 1888 (unknown)
@ 0x7fa6ef99f140 1107225152 (unknown)
@ 0x7fa6ef6ea6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fa6ef6a1ce1,7fa1dafa91a0,7fa6ef99f13f,7fa6ef6ea6d9,0&map=
E0905 20:13:13.107677 550227 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:13.107691 550227 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:13.107696 550227 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:13.107716 550227 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:13.107722 550227 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f7cb833ace1,7f7cb863813f,7f7cb83836d9&map=
*** SIGABRT received by PID 550225 (TID 550225) on cpu 60 from PID 550225; stack trace: ***
PC: @ 0x7f7cb833ace1 (unknown) raise
@ 0x7f77a2fa91a1 1888 (unknown)
@ 0x7f7cb8638140 (unknown) (unknown)
@ 0x7f7cb83836da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f7cb833ace1,7f77a2fa91a0,7f7cb863813f,7f7cb83836d9,0&map=
E0905 20:13:13.204998 550225 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:13.205015 550225 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:13.205020 550225 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:13.205039 550225 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:13.205044 550225 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:13:19.583546 550227 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:20.650979 550221 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:20.651187 550226 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:20.667376 550225 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_broadcast_master_param_synchronized_parameters
[ RUN ] TestXMCollectiveOpsTpu.test_broadcast_master_param_unsynchronized_parameters
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f6d7b52fce1,7f6d7b82d13f,7f6d7b5786d9&map=
*** SIGABRT received by PID 554526 (TID 558629) on cpu 147 from PID 554526; stack trace: ***
PC: @ 0x7f6d7b52fce1 (unknown) raise
@ 0x7f6866fa91a1 1888 (unknown)
@ 0x7f6d7b82d140 2320 (unknown)
@ 0x7f6d7b5786da 2046833120 (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f6d7b52fce1,7f6866fa91a0,7f6d7b82d13f,7f6d7b5786d9,0&map=
E0905 20:13:30.448092 558629 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:30.448109 558629 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:30.448115 558629 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:30.448135 558629 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:30.448142 558629 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7efce950dce1,7efce980b13f,7efce95566d9&map=
*** SIGABRT received by PID 554528 (TID 554528) on cpu 60 from PID 554528; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f62ba92dce1,7f62bac2b13f,7f62ba9766d9&map=
*** SIGABRT received by PID 554523 (TID 554523) on cpu 116 from PID 554523; stack trace: ***
PC: @ 0x7efce950dce1 (unknown) raise
@ 0x7ef7d6fa91a1 1888 (unknown)
@ 0x7efce980b140 (unknown) (unknown)
@ 0x7efce95566da (unknown) (unknown)
PC: @ 0x7f62ba92dce1 (unknown) raise
@ 0x7f5da6fa91a1 1888 (unknown)
@ 0x7f62bac2b140 1986376912 (unknown)
@ 0x7f62ba9766da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7efce950dce1,7ef7d6fa91a0,7efce980b13f,7efce95566d9,0&map=
E0905 20:13:30.466157 554528 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:30.466179 554528 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:30.466184 554528 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:30.466204 554528 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:30.466210 554528 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f62ba92dce1,7f5da6fa91a0,7f62bac2b13f,7f62ba9766d9,0&map=
E0905 20:13:30.467523 554523 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:30.467541 554523 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:30.467546 554523 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:30.467566 554523 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:30.467572 554523 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f8dc9f44ce1,7f8dca24213f,7f8dc9f8d6d9&map=
*** SIGABRT received by PID 554527 (TID 554527) on cpu 35 from PID 554527; stack trace: ***
PC: @ 0x7f8dc9f44ce1 (unknown) raise
@ 0x7f88b2fa91a1 1888 (unknown)
@ 0x7f8dca242140 (unknown) (unknown)
@ 0x7f8dc9f8d6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f8dc9f44ce1,7f88b2fa91a0,7f8dca24213f,7f8dc9f8d6d9,0&map=
E0905 20:13:30.505259 554527 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:30.505275 554527 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:30.505280 554527 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:30.505298 554527 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:30.505304 554527 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:13:36.728520 554528 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:36.731616 558629 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:36.732890 554523 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:36.742421 554527 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_broadcast_master_param_unsynchronized_parameters
[ RUN ] TestXMCollectiveOpsTpu.test_reduce_scatter_pinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f43d1c53ce1,7f43d1f5113f,7f43d1c9c6d9&map=
*** SIGABRT received by PID 558779 (TID 558779) on cpu 2 from PID 558779; stack trace: ***
PC: @ 0x7f43d1c53ce1 (unknown) raise
@ 0x7f3ebafa91a1 1888 (unknown)
@ 0x7f43d1f51140 (unknown) (unknown)
@ 0x7f43d1c9c6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f43d1c53ce1,7f3ebafa91a0,7f43d1f5113f,7f43d1c9c6d9,0&map=
E0905 20:13:46.619469 558779 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:46.619484 558779 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:46.619489 558779 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:46.619512 558779 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:46.619518 558779 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f3070ea6ce1,7f30711a413f,7f3070eef6d9&map=
*** SIGABRT received by PID 558775 (TID 558775) on cpu 222 from PID 558775; stack trace: ***
PC: @ 0x7f3070ea6ce1 (unknown) raise
@ 0x7f2b5afa91a1 1888 (unknown)
@ 0x7f30711a4140 (unknown) (unknown)
@ 0x7f3070eef6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f3070ea6ce1,7f2b5afa91a0,7f30711a413f,7f3070eef6d9,0&map=
E0905 20:13:46.720299 558775 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:46.720311 558775 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:46.720318 558775 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:46.720335 558775 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:46.720341 558775 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f314fb75ce1,7f314fe7313f,7f314fbbe6d9&map=
*** SIGABRT received by PID 558778 (TID 558778) on cpu 61 from PID 558778; stack trace: ***
PC: @ 0x7f314fb75ce1 (unknown) raise
@ 0x7f2c3afa91a1 1888 (unknown)
@ 0x7f314fe73140 406888160 (unknown)
@ 0x7f314fbbe6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f314fb75ce1,7f2c3afa91a0,7f314fe7313f,7f314fbbe6d9,0&map=
E0905 20:13:46.780810 558778 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:46.780827 558778 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:46.780832 558778 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:46.780852 558778 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:46.780859 558778 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fd3d8825ce1,7fd3d8b2313f,7fd3d886e6d9&map=
*** SIGABRT received by PID 558780 (TID 558780) on cpu 96 from PID 558780; stack trace: ***
PC: @ 0x7fd3d8825ce1 (unknown) raise
@ 0x7fcec2fa91a1 1888 (unknown)
@ 0x7fd3d8b23140 1265681040 (unknown)
@ 0x7fd3d886e6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fd3d8825ce1,7fcec2fa91a0,7fd3d8b2313f,7fd3d886e6d9,0&map=
E0905 20:13:46.795217 558780 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:13:46.795250 558780 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:13:46.795255 558780 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:13:46.795292 558780 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:13:46.795298 558780 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:13:55.700193 558775 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:55.708580 558778 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:57.984232 558779 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:13:57.995124 558780 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_reduce_scatter_pinned
[ RUN ] TestXMCollectiveOpsTpu.test_reduce_scatter_unpinned
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f43a63ddce1,7f43a66db13f,7f43a64266d9&map=
*** SIGABRT received by PID 563106 (TID 563106) on cpu 180 from PID 563106; stack trace: ***
PC: @ 0x7f43a63ddce1 (unknown) raise
@ 0x7f3e8efa91a1 1888 (unknown)
@ 0x7f43a66db140 (unknown) (unknown)
@ 0x7f43a64266da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f43a63ddce1,7f3e8efa91a0,7f43a66db13f,7f43a64266d9,0&map=
E0905 20:14:07.465575 563106 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:14:07.465594 563106 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:14:07.465599 563106 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:14:07.465620 563106 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:14:07.465625 563106 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f0e4ad19ce1,7f0e4b01713f,7f0e4ad626d9&map=
*** SIGABRT received by PID 563104 (TID 563104) on cpu 217 from PID 563104; stack trace: ***
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f77e6225ce1,7f77e652313f,7f77e626e6d9&map=
*** SIGABRT received by PID 563101 (TID 563101) on cpu 63 from PID 563101; stack trace: ***
PC: @ 0x7f0e4ad19ce1 (unknown) raise
@ 0x7f0932fa91a1 1888 (unknown)
@ 0x7f0e4b017140 2141687728 (unknown)
@ 0x7f0e4ad626da (unknown) (unknown)
PC: @ 0x7f77e6225ce1 (unknown) raise
@ 0x7f72cefa91a1 1888 (unknown)
@ 0x7f77e6523140 (unknown) (unknown)
@ 0x7f77e626e6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f0e4ad19ce1,7f0932fa91a0,7f0e4b01713f,7f0e4ad626d9,0&map=
E0905 20:14:07.668320 563104 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:14:07.668337 563104 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:14:07.668342 563104 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:14:07.668364 563104 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:14:07.668370 563104 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f77e6225ce1,7f72cefa91a0,7f77e652313f,7f77e626e6d9,0&map=
E0905 20:14:07.669421 563101 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:14:07.669467 563101 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:14:07.669472 563101 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:14:07.669497 563101 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:14:07.669503 563101 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:14:13.421191 563106 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:14:15.211460 563101 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:14:15.211749 563104 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestXMCollectiveOpsTpu.test_reduce_scatter_unpinned
----------------------------------------------------------------------
Ran 18 tests in 172.592s
OK (skipped=8)
+ python3 test/spmd/test_xla_sharding.py
..................................................................
----------------------------------------------------------------------
Ran 66 tests in 2.641s
OK
+ python3 test/spmd/test_xla_virtual_device.py
............
----------------------------------------------------------------------
Ran 12 tests in 3.521s
OK
+ python3 test/spmd/test_xla_distributed_checkpoint.py
E0905 20:14:43.885054159 570182 server_chttp2.cc:40] {"created":"@1725567283.885023779","description":"Only 1 addresses added out of total 2 resolved","file":"external/com_github_grpc_grpc/src/core/ext/transport/chttp2/server/chttp2_server.cc","file_line":404,"referenced_errors":[{"created":"@1725567283.885019079","description":"Address family not supported by protocol","errno":97,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":420,"os_error":"Address family not supported by protocol","syscall":"socket","target_address":"[::1]:8547"}]}
WARNING:root:Preemption sync point reached at step 10. Triggering a checkpoint.
/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/checkpoint/state_dict_saver.py:289: UserWarning: The function definition for SavePlanner.set_up_planner has been updated to include the storage_meta argument. Please update your implementation to include this parameter.
warnings.warn(
/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/checkpoint/filesystem.py:116: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
if tensor.storage().size() != tensor.numel():
.......2024-09-05 20:14:45.898401: W external/xla/xla/tsl/distributed_runtime/preemption/preemption_notifier.cc:89] SIGTERM caught at 2024-09-05T20:14:45.898352467+00:00
./pytorch/xla/torch_xla/runtime.py:239: UserWarning: Replicating tensors already initialized on non-virtual XLA device for SPMD to force SPMD mode. This is one-time overhead to setup, and to minimize such, please set SPMD mode before initializting tensors (i.e., call use_spmd() in the beginning of the program).
warnings.warn(
.s/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/checkpoint/state_dict_saver.py:143: UserWarning: torch.distributed is unavailable or uninitialized, assuming the intent is to save in a single process.
warnings.warn(
/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/checkpoint/state_dict_loader.py:142: UserWarning: torch.distributed is unavailable or uninitialized, assuming the intent is to load in a single process.
warnings.warn(
.....E0905 20:14:46.792738 571613 preemption_sync_manager.cc:247] Preemption sync failed - could not inform service of current call counter: ALREADY_EXISTS: Config key PREEMPTION_CURRENT_COUNTER//job:jax_worker/task:0 already exists.
Additional GRPC error information from remote target coordination_service while calling /tensorflow.CoordinationService/InsertKeyValue:
:{"created":"@1725567286.792676236","description":"Error received from peer ipv4:127.0.0.1:8547","file":"external/com_github_grpc_grpc/src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"Config key PREEMPTION_CURRENT_COUNTER//job:jax_worker/task:0 already exists.","grpc_status":6} [type.googleapis.com/tensorflow.CoordinationServiceError='']
E0905 20:14:46.792954 571271 preemption_sync_manager.cc:303] Failed to cancel preemption barrier: FAILED_PRECONDITION: Barrier (PREEMPTION_SYNC_BARRIER) has already been passed with status code: 0
Additional GRPC error information from remote target coordination_service while calling /tensorflow.CoordinationService/CancelBarrier:
:{"created":"@1725567286.792938796","description":"Error received from peer ipv4:127.0.0.1:8547","file":"external/com_github_grpc_grpc/src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"Barrier (PREEMPTION_SYNC_BARRIER) has already been passed with status code: 0","grpc_status":9} [type.googleapis.com/tensorflow.CoordinationServiceError='']
/pytorch/xla/torch_xla/experimental/distributed_checkpoint/planners.py:250: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
torch.load(value),
..........
----------------------------------------------------------------------
Ran 25 tests in 3.967s
OK (skipped=1)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fae67ddbce1,7fae680d913f,7fae67e246d9&map=
*** SIGABRT received by PID 570182 (TID 571645) on cpu 5 from PID 570182; stack trace: ***
PC: @ 0x7fae67ddbce1 (unknown) raise
@ 0x7fa94efa91a1 1888 (unknown)
@ 0x7fae680d9140 2320 (unknown)
@ 0x7fae67e246da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fae67ddbce1,7fa94efa91a0,7fae680d913f,7fae67e246d9,0&map=
E0905 20:14:49.773018 571645 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:14:49.773034 571645 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:14:49.773039 571645 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:14:49.773059 571645 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:14:49.773065 571645 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:14:55.417331 571645 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 11: 570182 Aborted (core dumped) python3 test/spmd/test_xla_distributed_checkpoint.py
+ python3 test/spmd/test_train_spmd_linear_model.py
Start training loop...
===> Preparing data..
num_devices: 4
Epoch 0 step 0 loss 0.0
Epoch 0 step 10 loss 0.0
Epoch 0 step 20 loss 0.0
Epoch 0 step 30 loss 0.0
Epoch 0 step 40 loss 0.0
Epoch 0 step 50 loss 0.0
Epoch 0 step 60 loss 0.0
Epoch 0 step 70 loss 0.0
Epoch 0 step 80 loss 0.0
Epoch 0 step 90 loss 0.0
Epoch 0 step 100 loss 0.0
Epoch 0 step 110 loss 0.0
Epoch 0 step 120 loss 0.0
Epoch 0 step 130 loss 0.0
Epoch 0 step 140 loss 0.0
Epoch 0 step 150 loss 0.0
Epoch 0 step 160 loss 0.0
Epoch 0 step 170 loss 0.0
Epoch 0 step 180 loss 0.0
Epoch 0 step 190 loss 0.0
Epoch 0 step 200 loss 0.0
Epoch 0 step 210 loss 0.0
Epoch 0 step 220 loss 0.0
Epoch 0 step 230 loss 0.0
Epoch 0 step 240 loss 0.0
Epoch 0 step 250 loss 0.0
Epoch 0 step 260 loss 0.0
Epoch 0 step 270 loss 0.0
Epoch 0 step 280 loss 0.0
Epoch 0 step 290 loss 0.0
Epoch 0 step 300 loss 0.0
Epoch 0 step 310 loss 0.0
Epoch 0 step 320 loss 0.0
Epoch 0 step 330 loss 0.0
Epoch 0 step 340 loss 0.0
Epoch 0 step 350 loss 0.0
Epoch 0 step 360 loss 0.0
Epoch 0 step 370 loss 0.0
Epoch 0 step 380 loss 0.0
Epoch 0 step 390 loss 0.0
Epoch 0 step 400 loss 0.0
Epoch 0 step 410 loss 0.0
Epoch 0 step 420 loss 0.0
Epoch 0 step 430 loss 0.0
Epoch 0 step 440 loss 0.0
Epoch 0 step 450 loss 0.0
Epoch 0 step 460 loss 0.0
Epoch 0 step 470 loss 0.0
Epoch 0 step 480 loss 0.0
Epoch 0 step 490 loss 0.0
Epoch 0 step 500 loss 0.0
Epoch 0 step 510 loss 0.0
Epoch 0 step 520 loss 0.0
Epoch 0 step 530 loss 0.0
Epoch 0 step 540 loss 0.0
Epoch 0 step 550 loss 0.0
Epoch 0 step 560 loss 0.0
Epoch 0 step 570 loss 0.0
Epoch 0 step 580 loss 0.0
Epoch 0 step 590 loss 0.0
Epoch 0 step 600 loss 0.0
Epoch 0 step 610 loss 0.0
Epoch 0 step 620 loss 0.0
Epoch 0 step 630 loss 0.0
Epoch 0 step 640 loss 0.0
Epoch 0 step 650 loss 0.0
Epoch 0 step 660 loss 0.0
Epoch 0 step 670 loss 0.0
Epoch 0 step 680 loss 0.0
Epoch 0 step 690 loss 0.0
Epoch 0 step 700 loss 0.0
Epoch 0 step 710 loss 0.0
Epoch 0 step 720 loss 0.0
Epoch 0 step 730 loss 0.0
Epoch 0 step 740 loss 0.0
Epoch 0 step 750 loss 0.0
Epoch 0 step 760 loss 0.0
Epoch 0 step 770 loss 0.0
Epoch 0 step 780 loss 0.0
Epoch 0 step 790 loss 0.0
Epoch 0 step 800 loss 0.0
Epoch 0 step 810 loss 0.0
Epoch 0 step 820 loss 0.0
Epoch 0 step 830 loss 0.0
Epoch 0 step 840 loss 0.0
Epoch 0 step 850 loss 0.0
Epoch 0 step 860 loss 0.0
Epoch 0 step 870 loss 0.0
Epoch 0 step 880 loss 0.0
Epoch 0 step 890 loss 0.0
Epoch 0 step 900 loss 0.0
Epoch 0 step 910 loss 0.0
Epoch 0 step 920 loss 0.0
Epoch 0 step 930 loss 0.0
Epoch 0 step 940 loss 0.0
Epoch 0 step 950 loss 0.0
Epoch 0 step 960 loss 0.0
Epoch 0 step 970 loss 0.0
Epoch 0 step 980 loss 0.0
Epoch 0 step 990 loss 0.0
Epoch 0 step 1000 loss 0.0
Epoch 0 step 1010 loss 0.0
Epoch 0 step 1020 loss 0.0
Epoch 0 step 1030 loss 0.0
Epoch 0 step 1040 loss 0.0
Epoch 0 step 1050 loss 0.0
Epoch 0 step 1060 loss 0.0
Epoch 0 step 1070 loss 0.0
Epoch 0 step 1080 loss 0.0
Epoch 0 step 1090 loss 0.0
Epoch 0 step 1100 loss 0.0
Epoch 0 step 1110 loss 0.0
Epoch 0 step 1120 loss 0.0
Epoch 0 step 1130 loss 0.0
Epoch 0 step 1140 loss 0.0
Epoch 0 step 1150 loss 0.0
Epoch 0 step 1160 loss 0.0
Epoch 0 step 1170 loss 0.0
Epoch 0 step 1180 loss 0.0
Epoch 0 step 1190 loss 0.0
Epoch 0 step 1200 loss 0.0
Epoch 0 step 1210 loss 0.0
Epoch 0 step 1220 loss 0.0
Epoch 0 step 1230 loss 0.0
Epoch 0 step 1240 loss 0.0
Epoch 0 step 1250 loss 0.0
Epoch 0 step 1260 loss 0.0
Epoch 0 step 1270 loss 0.0
Epoch 0 step 1280 loss 0.0
Epoch 0 step 1290 loss 0.0
Epoch 0 step 1300 loss 0.0
Epoch 0 step 1310 loss 0.0
Epoch 0 step 1320 loss 0.0
Epoch 0 step 1330 loss 0.0
Epoch 0 step 1340 loss 0.0
Epoch 0 step 1350 loss 0.0
Epoch 0 step 1360 loss 0.0
Epoch 0 step 1370 loss 0.0
Epoch 0 step 1380 loss 0.0
Epoch 0 step 1390 loss 0.0
Epoch 0 step 1400 loss 0.0
Epoch 0 step 1410 loss 0.0
Epoch 0 step 1420 loss 0.0
Epoch 0 step 1430 loss 0.0
Epoch 0 step 1440 loss 0.0
Epoch 0 step 1450 loss 0.0
Epoch 0 step 1460 loss 0.0
Epoch 0 step 1470 loss 0.0
Epoch 0 step 1480 loss 0.0
Epoch 0 step 1490 loss 0.0
Epoch 0 step 1500 loss 0.0
Epoch 0 step 1510 loss 0.0
Epoch 0 step 1520 loss 0.0
Epoch 0 step 1530 loss 0.0
Epoch 0 step 1540 loss 0.0
Epoch 0 step 1550 loss 0.0
Epoch 0 step 1560 loss 0.0
Epoch 0 step 1570 loss 0.0
Epoch 0 step 1580 loss 0.0
Epoch 0 step 1590 loss 0.0
Epoch 0 step 1600 loss 0.0
Epoch 0 step 1610 loss 0.0
Epoch 0 step 1620 loss 0.0
Epoch 0 step 1630 loss 0.0
Epoch 0 step 1640 loss 0.0
Epoch 0 step 1650 loss 0.0
Epoch 0 step 1660 loss 0.0
Epoch 0 step 1670 loss 0.0
Epoch 0 step 1680 loss 0.0
Epoch 0 step 1690 loss 0.0
Epoch 0 step 1700 loss 0.0
Epoch 0 step 1710 loss 0.0
Epoch 0 step 1720 loss 0.0
Epoch 0 step 1730 loss 0.0
Epoch 0 step 1740 loss 0.0
Epoch 0 step 1750 loss 0.0
Epoch 0 step 1760 loss 0.0
Epoch 0 step 1770 loss 0.0
Epoch 0 step 1780 loss 0.0
Epoch 0 step 1790 loss 0.0
Epoch 0 step 1800 loss 0.0
Epoch 0 step 1810 loss 0.0
Epoch 0 step 1820 loss 0.0
Epoch 0 step 1830 loss 0.0
Epoch 0 step 1840 loss 0.0
Epoch 0 step 1850 loss 0.0
Epoch 0 step 1860 loss 0.0
Epoch 0 step 1870 loss 0.0
Epoch 0 step 1880 loss 0.0
Epoch 0 step 1890 loss 0.0
Epoch 0 step 1900 loss 0.0
Epoch 0 step 1910 loss 0.0
Epoch 0 step 1920 loss 0.0
Epoch 0 step 1930 loss 0.0
Epoch 0 step 1940 loss 0.0
Epoch 0 step 1950 loss 0.0
Epoch 0 step 1960 loss 0.0
Epoch 0 step 1970 loss 0.0
Epoch 0 step 1980 loss 0.0
Epoch 0 step 1990 loss 0.0
Epoch 0 step 2000 loss 0.0
Epoch 0 step 2010 loss 0.0
Epoch 0 step 2020 loss 0.0
Epoch 0 step 2030 loss 0.0
Epoch 0 step 2040 loss 0.0
Epoch 0 step 2050 loss 0.0
Epoch 0 step 2060 loss 0.0
Epoch 0 step 2070 loss 0.0
Epoch 0 step 2080 loss 0.0
Epoch 0 step 2090 loss 0.0
Epoch 0 step 2100 loss 0.0
Epoch 0 step 2110 loss 0.0
Epoch 0 step 2120 loss 0.0
Epoch 0 step 2130 loss 0.0
Epoch 0 step 2140 loss 0.0
Epoch 0 step 2150 loss 0.0
Epoch 0 step 2160 loss 0.0
Epoch 0 step 2170 loss 0.0
Epoch 0 step 2180 loss 0.0
Epoch 0 step 2190 loss 0.0
Epoch 0 step 2200 loss 0.0
Epoch 0 step 2210 loss 0.0
Epoch 0 step 2220 loss 0.0
Epoch 0 step 2230 loss 0.0
Epoch 0 step 2240 loss 0.0
Epoch 0 step 2250 loss 0.0
Epoch 0 step 2260 loss 0.0
Epoch 0 step 2270 loss 0.0
Epoch 0 step 2280 loss 0.0
Epoch 0 step 2290 loss 0.0
Epoch 0 step 2300 loss 0.0
Epoch 0 step 2310 loss 0.0
Epoch 0 step 2320 loss 0.0
Epoch 0 step 2330 loss 0.0
Epoch 0 step 2340 loss 0.0
Epoch 0 step 2350 loss 0.0
Epoch 0 step 2360 loss 0.0
Epoch 0 step 2370 loss 0.0
Epoch 0 step 2380 loss 0.0
Epoch 0 step 2390 loss 0.0
Epoch 0 step 2400 loss 0.0
Epoch 0 step 2410 loss 0.0
Epoch 0 step 2420 loss 0.0
Epoch 0 step 2430 loss 0.0
Epoch 0 step 2440 loss 0.0
Epoch 0 step 2450 loss 0.0
Epoch 0 step 2460 loss 0.0
Epoch 0 step 2470 loss 0.0
Epoch 0 step 2480 loss 0.0
Epoch 0 step 2490 loss 0.0
Epoch 0 step 2500 loss 0.0
Epoch 0 step 2510 loss 0.0
Epoch 0 step 2520 loss 0.0
Epoch 0 step 2530 loss 0.0
Epoch 0 step 2540 loss 0.0
Epoch 0 step 2550 loss 0.0
Epoch 0 step 2560 loss 0.0
Epoch 0 step 2570 loss 0.0
Epoch 0 step 2580 loss 0.0
Epoch 0 step 2590 loss 0.0
Epoch 0 step 2600 loss 0.0
Epoch 0 step 2610 loss 0.0
Epoch 0 step 2620 loss 0.0
Epoch 0 step 2630 loss 0.0
Epoch 0 step 2640 loss 0.0
Epoch 0 step 2650 loss 0.0
Epoch 0 step 2660 loss 0.0
Epoch 0 step 2670 loss 0.0
Epoch 0 step 2680 loss 0.0
Epoch 0 step 2690 loss 0.0
Epoch 0 step 2700 loss 0.0
Epoch 0 step 2710 loss 0.0
Epoch 0 step 2720 loss 0.0
Epoch 0 step 2730 loss 0.0
Epoch 0 step 2740 loss 0.0
Epoch 0 step 2750 loss 0.0
Epoch 0 step 2760 loss 0.0
Epoch 0 step 2770 loss 0.0
Epoch 0 step 2780 loss 0.0
Epoch 0 step 2790 loss 0.0
Epoch 0 step 2800 loss 0.0
Epoch 0 step 2810 loss 0.0
Epoch 0 step 2820 loss 0.0
Epoch 0 step 2830 loss 0.0
Epoch 0 step 2840 loss 0.0
Epoch 0 step 2850 loss 0.0
Epoch 0 step 2860 loss 0.0
Epoch 0 step 2870 loss 0.0
Epoch 0 step 2880 loss 0.0
Epoch 0 step 2890 loss 0.0
Epoch 0 step 2900 loss 0.0
Epoch 0 step 2910 loss 0.0
Epoch 0 step 2920 loss 0.0
Epoch 0 step 2930 loss 0.0
Epoch 0 step 2940 loss 0.0
Epoch 0 step 2950 loss 0.0
Epoch 0 step 2960 loss 0.0
Epoch 0 step 2970 loss 0.0
Epoch 0 step 2980 loss 0.0
Epoch 0 step 2990 loss 0.0
Epoch 0 step 3000 loss 0.0
Epoch 0 step 3010 loss 0.0
Epoch 0 step 3020 loss 0.0
Epoch 0 step 3030 loss 0.0
Epoch 0 step 3040 loss 0.0
Epoch 0 step 3050 loss 0.0
Epoch 0 step 3060 loss 0.0
Epoch 0 step 3070 loss 0.0
Epoch 0 step 3080 loss 0.0
Epoch 0 step 3090 loss 0.0
Epoch 0 step 3100 loss 0.0
Epoch 0 step 3110 loss 0.0
Epoch 0 step 3120 loss 0.0
Epoch 0 step 3130 loss 0.0
Epoch 0 step 3140 loss 0.0
Epoch 0 step 3150 loss 0.0
Epoch 0 step 3160 loss 0.0
Epoch 0 step 3170 loss 0.0
Epoch 0 step 3180 loss 0.0
Epoch 0 step 3190 loss 0.0
Epoch 0 step 3200 loss 0.0
Epoch 0 step 3210 loss 0.0
Epoch 0 step 3220 loss 0.0
Epoch 0 step 3230 loss 0.0
Epoch 0 step 3240 loss 0.0
Epoch 0 step 3250 loss 0.0
Epoch 0 step 3260 loss 0.0
Epoch 0 step 3270 loss 0.0
Epoch 0 step 3280 loss 0.0
Epoch 0 step 3290 loss 0.0
Epoch 0 step 3300 loss 0.0
Epoch 0 step 3310 loss 0.0
Epoch 0 step 3320 loss 0.0
Epoch 0 step 3330 loss 0.0
Epoch 0 step 3340 loss 0.0
Epoch 0 step 3350 loss 0.0
Epoch 0 step 3360 loss 0.0
Epoch 0 step 3370 loss 0.0
Epoch 0 step 3380 loss 0.0
Epoch 0 step 3390 loss 0.0
Epoch 0 step 3400 loss 0.0
Epoch 0 step 3410 loss 0.0
Epoch 0 step 3420 loss 0.0
Epoch 0 step 3430 loss 0.0
Epoch 0 step 3440 loss 0.0
Epoch 0 step 3450 loss 0.0
Epoch 0 step 3460 loss 0.0
Epoch 0 step 3470 loss 0.0
Epoch 0 step 3480 loss 0.0
Epoch 0 step 3490 loss 0.0
Epoch 0 step 3500 loss 0.0
Epoch 0 step 3510 loss 0.0
Epoch 0 step 3520 loss 0.0
Epoch 0 step 3530 loss 0.0
Epoch 0 step 3540 loss 0.0
Epoch 0 step 3550 loss 0.0
Epoch 0 step 3560 loss 0.0
Epoch 0 step 3570 loss 0.0
Epoch 0 step 3580 loss 0.0
Epoch 0 step 3590 loss 0.0
Epoch 0 step 3600 loss 0.0
Epoch 0 step 3610 loss 0.0
Epoch 0 step 3620 loss 0.0
Epoch 0 step 3630 loss 0.0
Epoch 0 step 3640 loss 0.0
Epoch 0 step 3650 loss 0.0
Epoch 0 step 3660 loss 0.0
Epoch 0 step 3670 loss 0.0
Epoch 0 step 3680 loss 0.0
Epoch 0 step 3690 loss 0.0
Epoch 0 step 3700 loss 0.0
Epoch 0 step 3710 loss 0.0
Epoch 0 step 3720 loss 0.0
Epoch 0 step 3730 loss 0.0
Epoch 0 step 3740 loss 0.0
Epoch 0 step 3750 loss 0.0
Epoch 0 step 3760 loss 0.0
Epoch 0 step 3770 loss 0.0
Epoch 0 step 3780 loss 0.0
Epoch 0 step 3790 loss 0.0
Epoch 0 step 3800 loss 0.0
Epoch 0 step 3810 loss 0.0
Epoch 0 step 3820 loss 0.0
Epoch 0 step 3830 loss 0.0
Epoch 0 step 3840 loss 0.0
Epoch 0 step 3850 loss 0.0
Epoch 0 step 3860 loss 0.0
Epoch 0 step 3870 loss 0.0
Epoch 0 step 3880 loss 0.0
Epoch 0 step 3890 loss 0.0
Epoch 0 step 3900 loss 0.0
Epoch 0 step 3910 loss 0.0
Epoch 0 step 3920 loss 0.0
Epoch 0 step 3930 loss 0.0
Epoch 0 step 3940 loss 0.0
Epoch 0 step 3950 loss 0.0
Epoch 0 step 3960 loss 0.0
Epoch 0 step 3970 loss 0.0
Epoch 0 step 3980 loss 0.0
Epoch 0 step 3990 loss 0.0
Epoch 0 step 4000 loss 0.0
Epoch 0 step 4010 loss 0.0
Epoch 0 step 4020 loss 0.0
Epoch 0 step 4030 loss 0.0
Epoch 0 step 4040 loss 0.0
Epoch 0 step 4050 loss 0.0
Epoch 0 step 4060 loss 0.0
Epoch 0 step 4070 loss 0.0
Epoch 0 step 4080 loss 0.0
Epoch 0 step 4090 loss 0.0
Epoch 0 step 4100 loss 0.0
Epoch 0 step 4110 loss 0.0
Epoch 0 step 4120 loss 0.0
Epoch 0 step 4130 loss 0.0
Epoch 0 step 4140 loss 0.0
Epoch 0 step 4150 loss 0.0
Epoch 0 step 4160 loss 0.0
Epoch 0 step 4170 loss 0.0
Epoch 0 step 4180 loss 0.0
Epoch 0 step 4190 loss 0.0
Epoch 0 step 4200 loss 0.0
Epoch 0 step 4210 loss 0.0
Epoch 0 step 4220 loss 0.0
Epoch 0 step 4230 loss 0.0
Epoch 0 step 4240 loss 0.0
Epoch 0 step 4250 loss 0.0
Epoch 0 step 4260 loss 0.0
Epoch 0 step 4270 loss 0.0
Epoch 0 step 4280 loss 0.0
Epoch 0 step 4290 loss 0.0
Epoch 0 step 4300 loss 0.0
Epoch 0 step 4310 loss 0.0
Epoch 0 step 4320 loss 0.0
Epoch 0 step 4330 loss 0.0
Epoch 0 step 4340 loss 0.0
Epoch 0 step 4350 loss 0.0
Epoch 0 step 4360 loss 0.0
Epoch 0 step 4370 loss 0.0
Epoch 0 step 4380 loss 0.0
Epoch 0 step 4390 loss 0.0
Epoch 0 step 4400 loss 0.0
Epoch 0 step 4410 loss 0.0
Epoch 0 step 4420 loss 0.0
Epoch 0 step 4430 loss 0.0
Epoch 0 step 4440 loss 0.0
Epoch 0 step 4450 loss 0.0
Epoch 0 step 4460 loss 0.0
Epoch 0 step 4470 loss 0.0
Epoch 0 step 4480 loss 0.0
Epoch 0 step 4490 loss 0.0
Epoch 0 step 4500 loss 0.0
Epoch 0 step 4510 loss 0.0
Epoch 0 step 4520 loss 0.0
Epoch 0 step 4530 loss 0.0
Epoch 0 step 4540 loss 0.0
Epoch 0 step 4550 loss 0.0
Epoch 0 step 4560 loss 0.0
Epoch 0 step 4570 loss 0.0
Epoch 0 step 4580 loss 0.0
Epoch 0 step 4590 loss 0.0
Epoch 0 step 4600 loss 0.0
Epoch 0 step 4610 loss 0.0
Epoch 0 step 4620 loss 0.0
Epoch 0 step 4630 loss 0.0
Epoch 0 step 4640 loss 0.0
Epoch 0 step 4650 loss 0.0
Epoch 0 step 4660 loss 0.0
Epoch 0 step 4670 loss 0.0
Epoch 0 step 4680 loss 0.0
Epoch 0 step 4690 loss 0.0
Epoch 0 step 4700 loss 0.0
Epoch 0 step 4710 loss 0.0
Epoch 0 step 4720 loss 0.0
Epoch 0 step 4730 loss 0.0
Epoch 0 step 4740 loss 0.0
Epoch 0 step 4750 loss 0.0
Epoch 0 step 4760 loss 0.0
Epoch 0 step 4770 loss 0.0
Epoch 0 step 4780 loss 0.0
Epoch 0 step 4790 loss 0.0
Epoch 0 step 4800 loss 0.0
Epoch 0 step 4810 loss 0.0
Epoch 0 step 4820 loss 0.0
Epoch 0 step 4830 loss 0.0
Epoch 0 step 4840 loss 0.0
Epoch 0 step 4850 loss 0.0
Epoch 0 step 4860 loss 0.0
Epoch 0 step 4870 loss 0.0
Epoch 0 step 4880 loss 0.0
Epoch 0 step 4890 loss 0.0
Epoch 0 step 4900 loss 0.0
Epoch 0 step 4910 loss 0.0
Epoch 0 step 4920 loss 0.0
Epoch 0 step 4930 loss 0.0
Epoch 0 step 4940 loss 0.0
Epoch 0 step 4950 loss 0.0
Epoch 0 step 4960 loss 0.0
Epoch 0 step 4970 loss 0.0
Epoch 0 step 4980 loss 0.0
Epoch 0 step 4990 loss 0.0
Epoch 0 step 5000 loss 0.0
Epoch 0 step 5010 loss 0.0
Epoch 0 step 5020 loss 0.0
Epoch 0 step 5030 loss 0.0
Epoch 0 step 5040 loss 0.0
Epoch 0 step 5050 loss 0.0
Epoch 0 step 5060 loss 0.0
Epoch 0 step 5070 loss 0.0
Epoch 0 step 5080 loss 0.0
Epoch 0 step 5090 loss 0.0
Epoch 0 step 5100 loss 0.0
Epoch 0 step 5110 loss 0.0
Epoch 0 step 5120 loss 0.0
Epoch 0 step 5130 loss 0.0
Epoch 0 step 5140 loss 0.0
Epoch 0 step 5150 loss 0.0
Epoch 0 step 5160 loss 0.0
Epoch 0 step 5170 loss 0.0
Epoch 0 step 5180 loss 0.0
Epoch 0 step 5190 loss 0.0
Epoch 0 step 5200 loss 0.0
Epoch 0 step 5210 loss 0.0
Epoch 0 step 5220 loss 0.0
Epoch 0 step 5230 loss 0.0
Epoch 0 step 5240 loss 0.0
Epoch 0 step 5250 loss 0.0
Epoch 0 step 5260 loss 0.0
Epoch 0 step 5270 loss 0.0
Epoch 0 step 5280 loss 0.0
Epoch 0 step 5290 loss 0.0
Epoch 0 step 5300 loss 0.0
Epoch 0 step 5310 loss 0.0
Epoch 0 step 5320 loss 0.0
Epoch 0 step 5330 loss 0.0
Epoch 0 step 5340 loss 0.0
Epoch 0 step 5350 loss 0.0
Epoch 0 step 5360 loss 0.0
Epoch 0 step 5370 loss 0.0
Epoch 0 step 5380 loss 0.0
Epoch 0 step 5390 loss 0.0
Epoch 0 step 5400 loss 0.0
Epoch 0 step 5410 loss 0.0
Epoch 0 step 5420 loss 0.0
Epoch 0 step 5430 loss 0.0
Epoch 0 step 5440 loss 0.0
Epoch 0 step 5450 loss 0.0
Epoch 0 step 5460 loss 0.0
Epoch 0 step 5470 loss 0.0
Epoch 0 step 5480 loss 0.0
Epoch 0 step 5490 loss 0.0
Epoch 0 step 5500 loss 0.0
Epoch 0 step 5510 loss 0.0
Epoch 0 step 5520 loss 0.0
Epoch 0 step 5530 loss 0.0
Epoch 0 step 5540 loss 0.0
Epoch 0 step 5550 loss 0.0
Epoch 0 step 5560 loss 0.0
Epoch 0 step 5570 loss 0.0
Epoch 0 step 5580 loss 0.0
Epoch 0 step 5590 loss 0.0
Epoch 0 step 5600 loss 0.0
Epoch 0 step 5610 loss 0.0
Epoch 0 step 5620 loss 0.0
Epoch 0 step 5630 loss 0.0
Epoch 0 step 5640 loss 0.0
Epoch 0 step 5650 loss 0.0
Epoch 0 step 5660 loss 0.0
Epoch 0 step 5670 loss 0.0
Epoch 0 step 5680 loss 0.0
Epoch 0 step 5690 loss 0.0
Epoch 0 step 5700 loss 0.0
Epoch 0 step 5710 loss 0.0
Epoch 0 step 5720 loss 0.0
Epoch 0 step 5730 loss 0.0
Epoch 0 step 5740 loss 0.0
Epoch 0 step 5750 loss 0.0
Epoch 0 step 5760 loss 0.0
Epoch 0 step 5770 loss 0.0
Epoch 0 step 5780 loss 0.0
Epoch 0 step 5790 loss 0.0
Epoch 0 step 5800 loss 0.0
Epoch 0 step 5810 loss 0.0
Epoch 0 step 5820 loss 0.0
Epoch 0 step 5830 loss 0.0
Epoch 0 step 5840 loss 0.0
Epoch 0 step 5850 loss 0.0
Epoch 0 step 5860 loss 0.0
Epoch 0 step 5870 loss 0.0
Epoch 0 step 5880 loss 0.0
Epoch 0 step 5890 loss 0.0
Epoch 0 step 5900 loss 0.0
Epoch 0 step 5910 loss 0.0
Epoch 0 step 5920 loss 0.0
Epoch 0 step 5930 loss 0.0
Epoch 0 step 5940 loss 0.0
Epoch 0 step 5950 loss 0.0
Epoch 0 step 5960 loss 0.0
Epoch 0 step 5970 loss 0.0
Epoch 0 step 5980 loss 0.0
Epoch 0 step 5990 loss 0.0
Epoch 0 step 6000 loss 0.0
Epoch 0 step 6010 loss 0.0
Epoch 0 step 6020 loss 0.0
Epoch 0 step 6030 loss 0.0
Epoch 0 step 6040 loss 0.0
Epoch 0 step 6050 loss 0.0
Epoch 0 step 6060 loss 0.0
Epoch 0 step 6070 loss 0.0
Epoch 0 step 6080 loss 0.0
Epoch 0 step 6090 loss 0.0
Epoch 0 step 6100 loss 0.0
Epoch 0 step 6110 loss 0.0
Epoch 0 step 6120 loss 0.0
Epoch 0 step 6130 loss 0.0
Epoch 0 step 6140 loss 0.0
Epoch 0 step 6150 loss 0.0
Epoch 0 step 6160 loss 0.0
Epoch 0 step 6170 loss 0.0
Epoch 0 step 6180 loss 0.0
Epoch 0 step 6190 loss 0.0
Epoch 0 step 6200 loss 0.0
Epoch 0 step 6210 loss 0.0
Epoch 0 step 6220 loss 0.0
Epoch 0 step 6230 loss 0.0
Epoch 0 step 6240 loss 0.0
Epoch 0 step 6250 loss 0.0
Epoch 0 step 6260 loss 0.0
Epoch 0 step 6270 loss 0.0
Epoch 0 step 6280 loss 0.0
Epoch 0 step 6290 loss 0.0
Epoch 0 step 6300 loss 0.0
Epoch 0 step 6310 loss 0.0
Epoch 0 step 6320 loss 0.0
Epoch 0 step 6330 loss 0.0
Epoch 0 step 6340 loss 0.0
Epoch 0 step 6350 loss 0.0
Epoch 0 step 6360 loss 0.0
Epoch 0 step 6370 loss 0.0
Epoch 0 step 6380 loss 0.0
Epoch 0 step 6390 loss 0.0
Epoch 0 step 6400 loss 0.0
Epoch 0 step 6410 loss 0.0
Epoch 0 step 6420 loss 0.0
Epoch 0 step 6430 loss 0.0
Epoch 0 step 6440 loss 0.0
Epoch 0 step 6450 loss 0.0
Epoch 0 step 6460 loss 0.0
Epoch 0 step 6470 loss 0.0
Epoch 0 step 6480 loss 0.0
Epoch 0 step 6490 loss 0.0
Epoch 0 step 6500 loss 0.0
Epoch 0 step 6510 loss 0.0
Epoch 0 step 6520 loss 0.0
Epoch 0 step 6530 loss 0.0
Epoch 0 step 6540 loss 0.0
Epoch 0 step 6550 loss 0.0
Epoch 0 step 6560 loss 0.0
Epoch 0 step 6570 loss 0.0
Epoch 0 step 6580 loss 0.0
Epoch 0 step 6590 loss 0.0
Epoch 0 step 6600 loss 0.0
Epoch 0 step 6610 loss 0.0
Epoch 0 step 6620 loss 0.0
Epoch 0 step 6630 loss 0.0
Epoch 0 step 6640 loss 0.0
Epoch 0 step 6650 loss 0.0
Epoch 0 step 6660 loss 0.0
Epoch 0 step 6670 loss 0.0
Epoch 0 step 6680 loss 0.0
Epoch 0 step 6690 loss 0.0
Epoch 0 step 6700 loss 0.0
Epoch 0 step 6710 loss 0.0
Epoch 0 step 6720 loss 0.0
Epoch 0 step 6730 loss 0.0
Epoch 0 step 6740 loss 0.0
Epoch 0 step 6750 loss 0.0
Epoch 0 step 6760 loss 0.0
Epoch 0 step 6770 loss 0.0
Epoch 0 step 6780 loss 0.0
Epoch 0 step 6790 loss 0.0
Epoch 0 step 6800 loss 0.0
Epoch 0 step 6810 loss 0.0
Epoch 0 step 6820 loss 0.0
Epoch 0 step 6830 loss 0.0
Epoch 0 step 6840 loss 0.0
Epoch 0 step 6850 loss 0.0
Epoch 0 step 6860 loss 0.0
Epoch 0 step 6870 loss 0.0
Epoch 0 step 6880 loss 0.0
Epoch 0 step 6890 loss 0.0
Epoch 0 step 6900 loss 0.0
Epoch 0 step 6910 loss 0.0
Epoch 0 step 6920 loss 0.0
Epoch 0 step 6930 loss 0.0
Epoch 0 step 6940 loss 0.0
Epoch 0 step 6950 loss 0.0
Epoch 0 step 6960 loss 0.0
Epoch 0 step 6970 loss 0.0
Epoch 0 step 6980 loss 0.0
Epoch 0 step 6990 loss 0.0
Epoch 0 step 7000 loss 0.0
Epoch 0 step 7010 loss 0.0
Epoch 0 step 7020 loss 0.0
Epoch 0 step 7030 loss 0.0
Epoch 0 step 7040 loss 0.0
Epoch 0 step 7050 loss 0.0
Epoch 0 step 7060 loss 0.0
Epoch 0 step 7070 loss 0.0
Epoch 0 step 7080 loss 0.0
Epoch 0 step 7090 loss 0.0
Epoch 0 step 7100 loss 0.0
Epoch 0 step 7110 loss 0.0
Epoch 0 step 7120 loss 0.0
Epoch 0 step 7130 loss 0.0
Epoch 0 step 7140 loss 0.0
Epoch 0 step 7150 loss 0.0
Epoch 0 step 7160 loss 0.0
Epoch 0 step 7170 loss 0.0
Epoch 0 step 7180 loss 0.0
Epoch 0 step 7190 loss 0.0
Epoch 0 step 7200 loss 0.0
Epoch 0 step 7210 loss 0.0
Epoch 0 step 7220 loss 0.0
Epoch 0 step 7230 loss 0.0
Epoch 0 step 7240 loss 0.0
Epoch 0 step 7250 loss 0.0
Epoch 0 step 7260 loss 0.0
Epoch 0 step 7270 loss 0.0
Epoch 0 step 7280 loss 0.0
Epoch 0 step 7290 loss 0.0
Epoch 0 step 7300 loss 0.0
Epoch 0 step 7310 loss 0.0
Epoch 0 step 7320 loss 0.0
Epoch 0 step 7330 loss 0.0
Epoch 0 step 7340 loss 0.0
Epoch 0 step 7350 loss 0.0
Epoch 0 step 7360 loss 0.0
Epoch 0 step 7370 loss 0.0
Epoch 0 step 7380 loss 0.0
Epoch 0 step 7390 loss 0.0
Epoch 0 step 7400 loss 0.0
Epoch 0 step 7410 loss 0.0
Epoch 0 step 7420 loss 0.0
Epoch 0 step 7430 loss 0.0
Epoch 0 step 7440 loss 0.0
Epoch 0 step 7450 loss 0.0
Epoch 0 step 7460 loss 0.0
Epoch 0 step 7470 loss 0.0
Epoch 0 step 7480 loss 0.0
Epoch 0 step 7490 loss 0.0
Epoch 0 step 7500 loss 0.0
Epoch 0 step 7510 loss 0.0
Epoch 0 step 7520 loss 0.0
Epoch 0 step 7530 loss 0.0
Epoch 0 step 7540 loss 0.0
Epoch 0 step 7550 loss 0.0
Epoch 0 step 7560 loss 0.0
Epoch 0 step 7570 loss 0.0
Epoch 0 step 7580 loss 0.0
Epoch 0 step 7590 loss 0.0
Epoch 0 step 7600 loss 0.0
Epoch 0 step 7610 loss 0.0
Epoch 0 step 7620 loss 0.0
Epoch 0 step 7630 loss 0.0
Epoch 0 step 7640 loss 0.0
Epoch 0 step 7650 loss 0.0
Epoch 0 step 7660 loss 0.0
Epoch 0 step 7670 loss 0.0
Epoch 0 step 7680 loss 0.0
Epoch 0 step 7690 loss 0.0
Epoch 0 step 7700 loss 0.0
Epoch 0 step 7710 loss 0.0
Epoch 0 step 7720 loss 0.0
Epoch 0 step 7730 loss 0.0
Epoch 0 step 7740 loss 0.0
Epoch 0 step 7750 loss 0.0
Epoch 0 step 7760 loss 0.0
Epoch 0 step 7770 loss 0.0
Epoch 0 step 7780 loss 0.0
Epoch 0 step 7790 loss 0.0
Epoch 0 step 7800 loss 0.0
Epoch 0 step 7810 loss 0.0
Epoch 0 step 7820 loss 0.0
Epoch 0 step 7830 loss 0.0
Epoch 0 step 7840 loss 0.0
Epoch 0 step 7850 loss 0.0
Epoch 0 step 7860 loss 0.0
Epoch 0 step 7870 loss 0.0
Epoch 0 step 7880 loss 0.0
Epoch 0 step 7890 loss 0.0
Epoch 0 step 7900 loss 0.0
Epoch 0 step 7910 loss 0.0
Epoch 0 step 7920 loss 0.0
Epoch 0 step 7930 loss 0.0
Epoch 0 step 7940 loss 0.0
Epoch 0 step 7950 loss 0.0
Epoch 0 step 7960 loss 0.0
Epoch 0 step 7970 loss 0.0
Epoch 0 step 7980 loss 0.0
Epoch 0 step 7990 loss 0.0
Epoch 0 step 8000 loss 0.0
Epoch 0 step 8010 loss 0.0
Epoch 0 step 8020 loss 0.0
Epoch 0 step 8030 loss 0.0
Epoch 0 step 8040 loss 0.0
Epoch 0 step 8050 loss 0.0
Epoch 0 step 8060 loss 0.0
Epoch 0 step 8070 loss 0.0
Epoch 0 step 8080 loss 0.0
Epoch 0 step 8090 loss 0.0
Epoch 0 step 8100 loss 0.0
Epoch 0 step 8110 loss 0.0
Epoch 0 step 8120 loss 0.0
Epoch 0 step 8130 loss 0.0
Epoch 0 step 8140 loss 0.0
Epoch 0 step 8150 loss 0.0
Epoch 0 step 8160 loss 0.0
Epoch 0 step 8170 loss 0.0
Epoch 0 step 8180 loss 0.0
Epoch 0 step 8190 loss 0.0
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f357d397ce1,7f357d69513f,7f357d3e06d9&map=
*** SIGABRT received by PID 571729 (TID 573707) on cpu 8 from PID 571729; stack trace: ***
PC: @ 0x7f357d397ce1 (unknown) raise
@ 0x7f2d2afa91a1 1888 (unknown)
@ 0x7f357d695140 2320 (unknown)
@ 0x7f357d3e06da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f357d397ce1,7f2d2afa91a0,7f357d69513f,7f357d3e06d9,0&map=
E0905 20:16:09.336207 573707 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:16:09.336226 573707 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:16:09.336231 573707 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:16:09.336251 573707 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:16:09.336257 573707 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:16:21.307568 573707 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 12: 571729 Aborted (core dumped) python3 test/spmd/test_train_spmd_linear_model.py
+ python3 test/spmd/test_xla_spmd_python_api_interaction.py
./pytorch/xla/torch_xla/runtime.py:239: UserWarning: Replicating tensors already initialized on non-virtual XLA device for SPMD to force SPMD mode. This is one-time overhead to setup, and to minimize such, please set SPMD mode before initializting tensors (i.e., call use_spmd() in the beginning of the program).
warnings.warn(
.............../pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
.......
----------------------------------------------------------------------
Ran 23 tests in 3.347s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f7cc2796ce1,7f7cc2a9413f,7f7cc27df6d9&map=
*** SIGABRT received by PID 573863 (TID 574893) on cpu 141 from PID 573863; stack trace: ***
PC: @ 0x7f7cc2796ce1 (unknown) raise
@ 0x7f77aefa91a1 1888 (unknown)
@ 0x7f7cc2a94140 2320 (unknown)
@ 0x7f7cc27df6da 530559456 (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f7cc2796ce1,7f77aefa91a0,7f7cc2a9413f,7f7cc27df6d9,0&map=
E0905 20:16:30.145166 574893 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:16:30.145183 574893 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:16:30.145187 574893 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:16:30.145206 574893 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:16:30.145212 574893 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:16:35.671237 574893 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 13: 573863 Aborted (core dumped) python3 test/spmd/test_xla_spmd_python_api_interaction.py
+ python3 test/spmd/test_xla_auto_sharding.py
/pytorch/xla/torch_xla/runtime.py:233: UserWarning: XLA_USE_SPMD is being deprecated. Use torch_xla.runtime.use_spmd() without setting XLA_USE_SPMD env-var.
warnings.warn("XLA_USE_SPMD is being deprecated. "
...
----------------------------------------------------------------------
Ran 3 tests in 4.606s
OK
+ python3 test/spmd/test_fsdp_v2.py
/pytorch/xla/torch_xla/runtime.py:233: UserWarning: XLA_USE_SPMD is being deprecated. Use torch_xla.runtime.use_spmd() without setting XLA_USE_SPMD env-var.
warnings.warn("XLA_USE_SPMD is being deprecated. "
..........
----------------------------------------------------------------------
Ran 10 tests in 3.746s
OK
+ XLA_EXPERIMENTAL=nonzero:masked_select:nms
+ python3 test/ds/test_dynamic_shape_models.py -v
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
test_backward_pass_with_dynamic_input (__main__.TestDynamicShapeModels) ... Finished training.
Test passed.
ok
test_forward_pass_dynamic_input_compile_once (__main__.TestDynamicShapeModels) ... ok
test_forward_pass_dynamic_input_correctness (__main__.TestDynamicShapeModels) ... Test passed.
ok
----------------------------------------------------------------------
Ran 3 tests in 1.964s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f5e201f6ce1,7f5e204f413f,7f5e2023f6d9&map=
*** SIGABRT received by PID 578628 (TID 579295) on cpu 46 from PID 578628; stack trace: ***
PC: @ 0x7f5e201f6ce1 (unknown) raise
@ 0x7f5906fa91a1 1888 (unknown)
@ 0x7f5e204f4140 2320 (unknown)
@ 0x7f5e2023f6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f5e201f6ce1,7f5906fa91a0,7f5e204f413f,7f5e2023f6d9,0&map=
E0905 20:17:06.081503 579295 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:17:06.081519 579295 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:17:06.081523 579295 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:17:06.081543 579295 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:17:06.081549 579295 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:17:12.378801 579295 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 16: 578628 Aborted (core dumped) XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shape_models.py -v
+ XLA_EXPERIMENTAL=nonzero:masked_select:nms
+ python3 test/ds/test_dynamic_shapes.py -v
test_SizeEq_should_not_compile_for_identical_symints (__main__.TestDynamicShapes) ... ok
test_abs (__main__.TestDynamicShapes) ... ok
test_add_dyn_with_static_broadcastable (__main__.TestDynamicShapes) ... ok
test_add_dyn_with_static_not_broadcastable (__main__.TestDynamicShapes) ... ok
test_add_two_dynamic_tensors (__main__.TestDynamicShapes) ... ok
test_clone (__main__.TestDynamicShapes) ... ok
test_empty_symint (__main__.TestDynamicShapes) ... ok
test_expand_symint_correctness (__main__.TestDynamicShapes) ... ok
test_fill_ (__main__.TestDynamicShapes) ... ok
test_masked_select_shape (__main__.TestDynamicShapes) ... ok
test_nonzero_cast (__main__.TestDynamicShapes) ... ok
test_nonzero_correctness (__main__.TestDynamicShapes) ... ok
test_nonzero_shape (__main__.TestDynamicShapes) ... ok
test_simple_expand (__main__.TestDynamicShapes) ... ok
test_simple_expand_add_dimension (__main__.TestDynamicShapes) ... ok
test_simple_expand_on_2d_tensor (__main__.TestDynamicShapes) ... ok
test_sizeAdd (__main__.TestDynamicShapes) ... ok
test_sizeGe (__main__.TestDynamicShapes) ... ok
test_sizeGt (__main__.TestDynamicShapes) ... ok
test_sizeLt (__main__.TestDynamicShapes) ... ok
test_sizeMod (__main__.TestDynamicShapes) ... ok
test_sizeNe (__main__.TestDynamicShapes) ... ok
test_sizeSub (__main__.TestDynamicShapes) ... ok
test_sub_dyn_with_static_broadcastable (__main__.TestDynamicShapes) ... ok
test_sub_dyn_with_static_not_broadcastable (__main__.TestDynamicShapes) ... ok
test_sub_two_dynamic_tensors (__main__.TestDynamicShapes) ... ok
test_t_copy (__main__.TestDynamicShapes) ... ok
test_unsqueeze_copy_dynamism (__main__.TestDynamicShapes) ... ok
test_view_copy_symint_with_dyn_input_dyn_input_shape (__main__.TestDynamicShapes) ... ok
test_view_copy_symint_with_dyn_input_static_input_shape (__main__.TestDynamicShapes) ... ok
test_view_copy_symint_with_static_input_dyn_input_shape (__main__.TestDynamicShapes) ... ok
test_view_copy_symint_with_static_input_dyn_input_shape2 (__main__.TestDynamicShapes) ... ok
test_wrap (__main__.TestDynamicShapes) ... ok
test_xlatensor_memoize_symsizes (__main__.TestDynamicShapes) ... ok
----------------------------------------------------------------------
Ran 34 tests in 2.038s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f9ad59a6ce1,7f9ad5ca413f,7f9ad59ef6d9&map=
*** SIGABRT received by PID 580131 (TID 581168) on cpu 135 from PID 580131; stack trace: ***
PC: @ 0x7f9ad59a6ce1 (unknown) raise
@ 0x7f95c2fa91a1 1888 (unknown)
@ 0x7f9ad5ca4140 2320 (unknown)
@ 0x7f9ad59ef6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f9ad59a6ce1,7f95c2fa91a0,7f9ad5ca413f,7f9ad59ef6d9,0&map=
E0905 20:17:23.162706 581168 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:17:23.162719 581168 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:17:23.162724 581168 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:17:23.162744 581168 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:17:23.162750 581168 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:17:29.335428 581168 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 17: 580131 Aborted (core dumped) XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shapes.py -v
+ python3 test/test_autocast.py
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
test_autocast_banned (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_linalg_fp16 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_methods_expect_builtin_promote (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_methods_fp16 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_methods_fp32 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_nn_fp16 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_nn_fp32 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_torch_bf16 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_torch_expect_builtin_promote (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_torch_fp32 (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_torch_need_autocast_promote (__main__.TestAutocastCuda) ... skipped 'CUDA autocast test.'
test_autocast_methods_bf16 (__main__.TestAutocastTPU) ... /pytorch/xla/test/test_autocast.py:283: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
self.assertFalse(self.is_autocast_enabled())
/pytorch/xla/test/test_autocast.py:285: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
self.assertTrue(self.is_autocast_enabled())
/pytorch/xla/test/test_autocast.py:335: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
self.assertFalse(self.is_autocast_enabled())
/pytorch/xla/test/test_autocast.py:346: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
self.assertTrue(self.is_autocast_enabled())
/pytorch/xla/test/test_autocast.py:347: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
self.assertFalse(self.is_autocast_enabled())
ok
test_autocast_methods_expect_builtin_promote (__main__.TestAutocastTPU) ... ok
test_autocast_methods_fp32 (__main__.TestAutocastTPU) ... ok
test_autocast_nn_fp32 (__main__.TestAutocastTPU) ... ok
test_autocast_torch_expect_builtin_promote (__main__.TestAutocastTPU) ... ok
test_autocast_torch_fp32 (__main__.TestAutocastTPU) ... ok
test_autocast_torch_need_autocast_promote (__main__.TestAutocastTPU) ... ok
test_autocast_tpu_check_dtype (__main__.TestAutocastTPU) ... /pytorch/xla/torch_xla/amp/autocast_mode.py:57: UserWarning: In XLA:TPU autocast, but the target dtype is not supported. Disabling autocast.
TPU Autocast only supports dtype of torch.bfloat16 currently.
warnings.warn(error_message)
/pytorch/xla/test/test_autocast.py:484: DeprecationWarning: torch.is_autocast_xla_enabled() is deprecated. Please use torch.is_autocast_enabled('xla') instead. (Triggered internally at /pytorch/torch/csrc/autograd/init.cpp:715.)
assert not torch.is_autocast_xla_enabled()
ok
----------------------------------------------------------------------
Ran 19 tests in 9.558s
OK (skipped=11)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f80d0910ce1,7f80d0c0e13f,7f80d09596d9&map=
*** SIGABRT received by PID 581835 (TID 582868) on cpu 18 from PID 581835; stack trace: ***
PC: @ 0x7f80d0910ce1 (unknown) raise
@ 0x7f7bbafa91a1 1888 (unknown)
@ 0x7f80d0c0e140 2320 (unknown)
@ 0x7f80d09596da 33616352 (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f80d0910ce1,7f7bbafa91a0,7f80d0c0e13f,7f80d09596d9,0&map=
E0905 20:17:47.768658 582868 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:17:47.768670 582868 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:17:47.768675 582868 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:17:47.768692 582868 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:17:47.768698 582868 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:17:56.531782 582868 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 18: 581835 Aborted (core dumped) python3 test/test_autocast.py
+ python3 test/test_fp8.py
......
----------------------------------------------------------------------
Ran 6 tests in 0.132s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f1889412ce1,7f188971013f,7f188945b6d9&map=
*** SIGABRT received by PID 583375 (TID 584403) on cpu 206 from PID 583375; stack trace: ***
PC: @ 0x7f1889412ce1 (unknown) raise
@ 0x7f1376fa91a1 1888 (unknown)
@ 0x7f1889710140 2320 (unknown)
@ 0x7f188945b6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f1889412ce1,7f1376fa91a0,7f188971013f,7f188945b6d9,0&map=
E0905 20:18:05.644312 584403 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:18:05.644329 584403 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:18:05.644334 584403 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:18:05.644355 584403 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:18:05.644362 584403 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:18:11.028746 584403 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 19: 583375 Aborted (core dumped) python3 test/test_fp8.py
+ python3 test/test_grad_checkpoint.py
/pytorch/xla/torch_xla/utils/checkpoint.py:183: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs), \
/pytorch/xla/torch_xla/utils/checkpoint.py:184: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fc92048ace1,7fc92078813f,7fc9204d36d9&map=
*** SIGABRT received by PID 584819 (TID 586336) on cpu 136 from PID 584819; stack trace: ***
PC: @ 0x7fc92048ace1 (unknown) raise
@ 0x7fc40afa91a1 1888 (unknown)
@ 0x7fc920788140 2320 (unknown)
@ 0x7fc9204d36da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fc92048ace1,7fc40afa91a0,7fc92078813f,7fc9204d36d9,0&map=
E0905 20:18:23.610399 586336 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:18:23.610412 586336 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:18:23.610421 586336 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:18:23.610438 586336 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:18:23.610444 586336 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:18:38.153823 586336 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 20: 584819 Aborted (core dumped) python3 test/test_grad_checkpoint.py
+ python3 test/dynamo/test_dynamo.py
......s.s.s..ss.s......s.W0905 20:20:08.567000 586505 torch/_logging/_internal.py:1057] [2/0] Profiler function <class 'torch.autograd.profiler.record_function'> will be ignored
..
----------------------------------------------------------------------
Ran 27 tests in 87.024s
OK (skipped=7)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fd287053ce1,7fd28735113f,7fd28709c6d9&map=
*** SIGABRT received by PID 586505 (TID 588761) on cpu 41 from PID 586505; stack trace: ***
PC: @ 0x7fd287053ce1 (unknown) raise
@ 0x7fcd6afa91a1 1888 (unknown)
@ 0x7fd287351140 2320 (unknown)
@ 0x7fd28709c6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fd287053ce1,7fcd6afa91a0,7fd28735113f,7fd28709c6d9,0&map=
E0905 20:20:14.204855 588761 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:20:14.204872 588761 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:20:14.204882 588761 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:20:14.204902 588761 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:20:14.204908 588761 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:20:54.890056 588761 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 21: 586505 Aborted (core dumped) python3 test/dynamo/test_dynamo.py
+ python3 test/dynamo/test_dynamo_dynamic_shape.py
.......s
----------------------------------------------------------------------
Ran 8 tests in 32.966s
OK (skipped=1)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ff76439ece1,7ff76469c13f,7ff7643e76d9&map=
*** SIGABRT received by PID 589087 (TID 590958) on cpu 22 from PID 589087; stack trace: ***
PC: @ 0x7ff76439ece1 (unknown) raise
@ 0x7ff24efa91a1 1888 (unknown)
@ 0x7ff76469c140 2320 (unknown)
@ 0x7ff7643e76da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ff76439ece1,7ff24efa91a0,7ff76469c13f,7ff7643e76d9,0&map=
E0905 20:21:34.103527 590958 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:21:34.103541 590958 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:21:34.103547 590958 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:21:34.103568 590958 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:21:34.103574 590958 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:21:55.436557 590958 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 22: 589087 Aborted (core dumped) python3 test/dynamo/test_dynamo_dynamic_shape.py
+ python3 test/spmd/test_spmd_debugging.py
/pytorch/xla/torch_xla/runtime.py:233: UserWarning: XLA_USE_SPMD is being deprecated. Use torch_xla.runtime.use_spmd() without setting XLA_USE_SPMD env-var.
warnings.warn("XLA_USE_SPMD is being deprecated. "
s
TPU 0 TPU 4 TPU 8 TPU 12 TPU 2 TPU 6 TPU 10 TPU 14
TPU 1 TPU 5 TPU 9 TPU 13 TPU 3 TPU 7 TPU 11 TPU 15
.s
TPU 0 TPU 1 TPU 2 TPU 3
TPU 4 TPU 5 TPU 6 TPU 7
.s
TPU [0, 1]
TPU [4, 5]
TPU [8, 9]
TPU [12, 13]
TPU [2, 3]
TPU [6, 7]
TPU [10, 11]
TPU [14, 15]
.sss
TPU [0, 1]
TPU [2, 3]
TPU [4, 5]
TPU [6, 7]
.s
TPU [0, 1, 2, 3]
.
----------------------------------------------------------------------
Ran 12 tests in 0.075s
OK (skipped=7)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fde24a1ace1,7fde24d1813f,7fde24a636d9&map=
*** SIGABRT received by PID 591144 (TID 592242) on cpu 126 from PID 591144; stack trace: ***
PC: @ 0x7fde24a1ace1 (unknown) raise
@ 0x7fd90efa91a1 1888 (unknown)
@ 0x7fde24d18140 2320 (unknown)
@ 0x7fde24a636da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fde24a1ace1,7fd90efa91a0,7fde24d1813f,7fde24a636d9,0&map=
E0905 20:22:04.320640 592242 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:04.320655 592242 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:04.320660 592242 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:04.320678 592242 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:04.320684 592242 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:22:09.833324 592242 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 23: 591144 Aborted (core dumped) python3 test/spmd/test_spmd_debugging.py
+ XLA_PARAMETER_WRAPPING_THREADSHOLD=1
+ python test/spmd/test_spmd_parameter_wrapping.py
.
----------------------------------------------------------------------
Ran 1 test in 3.567s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f9069ed1ce1,7f906a1cf13f,7f9069f1a6d9&map=
*** SIGABRT received by PID 592334 (TID 592334) on cpu 189 from PID 592334; stack trace: ***
PC: @ 0x7f9069ed1ce1 (unknown) raise
@ 0x7f8b52fa91a1 1888 (unknown)
@ 0x7f906a1cf140 981709696 (unknown)
@ 0x7f9069f1a6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f9069ed1ce1,7f8b52fa91a0,7f906a1cf13f,7f9069f1a6d9,0&map=
E0905 20:22:19.068211 592334 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:19.068228 592334 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:19.068233 592334 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:19.068251 592334 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:19.068258 592334 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:22:24.774020 592334 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 24: 592334 Aborted (core dumped) XLA_PARAMETER_WRAPPING_THREADSHOLD=1 python test/spmd/test_spmd_parameter_wrapping.py
+ python3 test/pjrt/test_dtypes.py
Running tests under Python 3.10.14: /root/miniconda3/envs/torch310/bin/python3
[ RUN ] TestDtypes.test_bool_round_trip
[ OK ] TestDtypes.test_bool_round_trip
[ RUN ] TestDtypes.test_float_round_trip0 (torch.float16)
[ OK ] TestDtypes.test_float_round_trip0 (torch.float16)
[ RUN ] TestDtypes.test_float_round_trip1 (torch.float32)
[ OK ] TestDtypes.test_float_round_trip1 (torch.float32)
[ RUN ] TestDtypes.test_float_round_trip2 (torch.float64)
[ OK ] TestDtypes.test_float_round_trip2 (torch.float64)
[ RUN ] TestDtypes.test_float_round_trip3 (torch.bfloat16)
[ OK ] TestDtypes.test_float_round_trip3 (torch.bfloat16)
[ RUN ] TestDtypes.test_float_round_trip4 (torch.complex64)
[ OK ] TestDtypes.test_float_round_trip4 (torch.complex64)
[ RUN ] TestDtypes.test_int_round_trip0 (torch.uint8)
[ OK ] TestDtypes.test_int_round_trip0 (torch.uint8)
[ RUN ] TestDtypes.test_int_round_trip1 (torch.int8)
[ OK ] TestDtypes.test_int_round_trip1 (torch.int8)
[ RUN ] TestDtypes.test_int_round_trip2 (torch.int16)
[ OK ] TestDtypes.test_int_round_trip2 (torch.int16)
[ RUN ] TestDtypes.test_int_round_trip3 (torch.int32)
[ OK ] TestDtypes.test_int_round_trip3 (torch.int32)
[ RUN ] TestDtypes.test_int_round_trip4 (torch.int64)
[ OK ] TestDtypes.test_int_round_trip4 (torch.int64)
----------------------------------------------------------------------
Ran 11 tests in 3.309s
OK
free(): corrupted unsorted chunks
test/tpu/run_tests.sh: line 25: 593797 Aborted (core dumped) python3 test/pjrt/test_dtypes.py
+ python3 test/pjrt/test_dynamic_plugin_tpu.py
Running tests under Python 3.10.14: /root/miniconda3/envs/torch310/bin/python3
[ RUN ] TestDynamicTpuPlugin.test_single_process
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
[ OK ] TestDynamicTpuPlugin.test_single_process
[ RUN ] TestDynamicTpuPlugin.test_spawn
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
/pytorch/xla/torch_xla/core/xla_model.py:99: UserWarning: `devkind` argument is deprecated and will be removed in a future release.
warnings.warn("`devkind` argument is deprecated and will be removed in a "
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f8656095ce1,7f865639313f,7f86560de6d9&map=
*** SIGABRT received by PID 596291 (TID 596291) on cpu 58 from PID 596291; stack trace: ***
PC: @ 0x7f8656095ce1 (unknown) raise
@ 0x7f813efa91a1 1888 (unknown)
@ 0x7f8656393140 1854042976 (unknown)
@ 0x7f86560de6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f8656095ce1,7f813efa91a0,7f865639313f,7f86560de6d9,0&map=
E0905 20:22:51.326703 596291 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:51.326717 596291 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:51.326722 596291 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:51.326740 596291 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:51.326746 596291 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f6ae21fcce1,7f6ae24fa13f,7f6ae22456d9&map=
*** SIGABRT received by PID 596289 (TID 596289) on cpu 7 from PID 596289; stack trace: ***
PC: @ 0x7f6ae21fcce1 (unknown) raise
@ 0x7f65cafa91a1 1888 (unknown)
@ 0x7f6ae24fa140 (unknown) (unknown)
@ 0x7f6ae22456da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fd22e331ce1,7fd22e62f13f,7fd22e37a6d9&map=
*** SIGABRT received by PID 596290 (TID 596290) on cpu 129 from PID 596290; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f6ae21fcce1,7f65cafa91a0,7f6ae24fa13f,7f6ae22456d9,0&map=
E0905 20:22:51.362059 596289 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:51.362070 596289 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:51.362076 596289 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:51.362096 596289 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:51.362101 596289 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fd22e331ce1 (unknown) raise
@ 0x7fcd16fa91a1 1888 (unknown)
@ 0x7fd22e62f140 1545310384 (unknown)
@ 0x7fd22e37a6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fd22e331ce1,7fcd16fa91a0,7fd22e62f13f,7fd22e37a6d9,0&map=
E0905 20:22:51.365104 596290 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:51.365114 596290 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:51.365118 596290 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:51.365133 596290 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:51.365138 596290 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f652dc90ce1,7f652df8e13f,7f652dcd96d9&map=
*** SIGABRT received by PID 596286 (TID 596286) on cpu 16 from PID 596286; stack trace: ***
PC: @ 0x7f652dc90ce1 (unknown) raise
@ 0x7f6016fa91a1 1888 (unknown)
@ 0x7f652df8e140 2082578608 (unknown)
@ 0x7f652dcd96da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f652dc90ce1,7f6016fa91a0,7f652df8e13f,7f652dcd96d9,0&map=
E0905 20:22:51.378228 596286 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:22:51.378245 596286 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:22:51.378251 596286 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:22:51.378270 596286 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:22:51.378287 596286 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:22:57.391832 596290 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:22:57.393400 596289 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:22:57.394805 596286 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:22:57.394991 596291 process_state.cc:805] RAW: Raising signal 6 with default behavior
[ OK ] TestDynamicTpuPlugin.test_spawn
----------------------------------------------------------------------
Ran 2 tests in 20.302s
OK
+ python3 test/test_while_loop.py
s...
----------------------------------------------------------------------
Ran 4 tests in 4.731s
OK (skipped=1)
+ python3 test/test_scan.py
....
----------------------------------------------------------------------
Ran 4 tests in 3.516s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f63705a2ce1,7f63708a013f,7f63705eb6d9&map=
*** SIGABRT received by PID 600990 (TID 600990) on cpu 174 from PID 600990; stack trace: ***
PC: @ 0x7f63705a2ce1 (unknown) raise
@ 0x7f5e5afa91a1 1888 (unknown)
@ 0x7f63708a0140 1492842576 (unknown)
@ 0x7f63705eb6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f63705a2ce1,7f5e5afa91a0,7f63708a013f,7f63705eb6d9,0&map=
E0905 20:23:18.153425 600990 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:23:18.153436 600990 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:23:18.153442 600990 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:23:18.153461 600990 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:23:18.153466 600990 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:23:23.689609 600990 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 28: 600990 Aborted (core dumped) python3 test/test_scan.py
+ python3 test/test_input_output_aliases.py
......
----------------------------------------------------------------------
Ran 6 tests in 6.797s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f5562750ce1,7f5562a4e13f,7f55627996d9&map=
*** SIGABRT received by PID 602443 (TID 603799) on cpu 166 from PID 602443; stack trace: ***
PC: @ 0x7f5562750ce1 (unknown) raise
@ 0x7f504efa91a1 1888 (unknown)
@ 0x7f5562a4e140 2320 (unknown)
@ 0x7f55627996da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f5562750ce1,7f504efa91a0,7f5562a4e13f,7f55627996d9,0&map=
E0905 20:23:35.987562 603799 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:23:35.987575 603799 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:23:35.987580 603799 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:23:35.987598 603799 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:23:35.987604 603799 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:23:43.071171 603799 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 31: 602443 Aborted (core dumped) python3 test/test_input_output_aliases.py
+ python3 test/test_gmm.py
............
----------------------------------------------------------------------
Ran 12 tests in 27.641s
OK
+ python3 test/eager/test_eager_spmd.py
..
----------------------------------------------------------------------
Ran 2 tests in 3.618s
OK
+ python3 test/torch_distributed/test_torch_distributed_all_gather_xla_backend.py
[W905 20:24:41.543918223 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fa7447adce1,7fa744aab13f,7fa7447f66d9&map=
*** SIGABRT received by PID 607982 (TID 611029) on cpu 168 from PID 607982; stack trace: ***
PC: @ 0x7fa7447adce1 (unknown) raise
@ 0x7fa22efa91a1 1888 (unknown)
@ 0x7fa744aab140 2320 (unknown)
@ 0x7fa7447f66da (unknown) (unknown)
free(): corrupted unsorted chunks
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fdd32038ce1,7fdd3233613f,7fdd320816d9https://symbolize.stripped_domain/r/?trace=&map=7fc3660d4ce1,
7fc3663d213f,7fc36611d6d9&map=
*** SIGABRT received by PID 607980 (TID 607980) on cpu 44 from PID 607980; stack trace: ***
*** SIGABRT received by PID 607981 (TID 607981) on cpu 16 from PID 607981; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fa7447adce1,7fa22efa91a0,7fa744aab13f,7fa7447f66d9,0&map=
E0905 20:24:43.520062 611029 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:24:43.520079 611029 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:24:43.520084 611029 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:24:43.520106 611029 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:24:43.520112 611029 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fc3660d4ce1 (unknown) raise
@ 0x7fbe4efa91a1 1888 (unknown)
@ 0x7fc3663d2140 569070048 (unknown)
@ 0x7fc36611d6da (unknown) (unknown)
PC: @ 0x7fdd32038ce1 (unknown) raise
@ 0x7fd81afa91a1 1888 (unknown)
@ 0x7fdd32336140 84930416 (unknown)
@ 0x7fdd320816da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fc3660d4ce1,7fbe4efa91a0,7fc3663d213f,7fc36611d6d9,0&map=
E0905 20:24:43.524300 607981 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:24:43.524315 607981 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:24:43.524320 607981 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:24:43.524340 607981 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:24:43.524349 607981 coredump_hook.cc:472] RAW: Dumping core locally.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fdd32038ce1,7fd81afa91a0,7fdd3233613f,7fdd320816d9,0&map=
E0905 20:24:43.524488 607980 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:24:43.524504 607980 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:24:43.524507 607980 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:24:43.524526 607980 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:24:43.524534 607980 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:24:50.102664 607981 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:24:50.328473 607980 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:24:50.337744 611029 process_state.cc:805] RAW: Raising signal 6 with default behavior
+ python3 test/torch_distributed/test_torch_distributed_all_reduce_xla_backend.py
[W905 20:25:02.397202020 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
[W905 20:25:02.425694836 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f583b4bdce1,7f583b7bb13f,7f583b5066d9&map=
*** SIGABRT received by PID 612396 (TID 612396) on cpu 179 from PID 612396; stack trace: ***
PC: @ 0x7f583b4bdce1 (unknown) raise
@ 0x7f5326fa91a1 1888 (unknown)
@ 0x7f583b7bb140 (unknown) (unknown)
@ 0x7f583b5066da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f583b4bdce1,7f5326fa91a0,7f583b7bb13f,7f583b5066d9,0&map=
E0905 20:25:04.718246 612396 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:04.718260 612396 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:04.718264 612396 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:04.718287 612396 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:04.718292 612396 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f0b8b69cce1,7f0b8b99a13f,7f0b8b6e56d9&map=
*** SIGABRT received by PID 612392 (TID 612392) on cpu 138 from PID 612392; stack trace: ***
PC: @ 0x7f0b8b69cce1 (unknown) raise
@ 0x7f0676fa91a1 1888 (unknown)
@ 0x7f0b8b99a140 (unknown) (unknown)
@ 0x7f0b8b6e56da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f0b8b69cce1,7f0676fa91a0,7f0b8b99a13f,7f0b8b6e56d9,0&map=
E0905 20:25:04.758038 612392 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:04.758054 612392 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:04.758057 612392 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:04.758073 612392 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:04.758077 612392 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f3be1b27ce1,7f3be1e2513f,7f3be1b706d9&map=
*** SIGABRT received by PID 612395 (TID 612395) on cpu 175 from PID 612395; stack trace: ***
PC: @ 0x7f3be1b27ce1 (unknown) raise
@ 0x7f36cafa91a1 1888 (unknown)
@ 0x7f3be1e25140 (unknown) (unknown)
@ 0x7f3be1b706da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fef2d2c9ce1,7fef2d5c713f,7fef2d3126d9&map=
*** SIGABRT received by PID 612397 (TID 612397) on cpu 168 from PID 612397; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f3be1b27ce1,7f36cafa91a0,7f3be1e2513f,7f3be1b706d9,0&map=
E0905 20:25:04.779049 612395 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:04.779060 612395 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:04.779066 612395 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:04.779084 612395 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:04.779090 612395 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7fef2d2c9ce1 (unknown) raise
@ 0x7fea1afa91a1 1888 (unknown)
@ 0x7fef2d5c7140 (unknown) (unknown)
@ 0x7fef2d3126da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fef2d2c9ce1,7fea1afa91a0,7fef2d5c713f,7fef2d3126d9,0&map=
E0905 20:25:04.782359 612397 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:04.782370 612397 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:04.782375 612397 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:04.782396 612397 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:04.782401 612397 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:25:12.497701 612392 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:12.499824 612396 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:12.744406 612395 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:12.745863 612397 process_state.cc:805] RAW: Raising signal 6 with default behavior
+ python3 test/torch_distributed/test_torch_distributed_multi_all_reduce_xla_backend.py
[W905 20:25:24.637096282 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f4e026e4ce1,7f4e029e213f,7f4e0272d6d9&map=
*** SIGABRT received by PID 616802 (TID 616802) on cpu 10 from PID 616802; stack trace: ***
PC: @ 0x7f4e026e4ce1 (unknown) raise
@ 0x7f48eafa91a1 1888 (unknown)
@ 0x7f4e029e2140 968598080 (unknown)
@ 0x7f4e0272d6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f4e026e4ce1,7f48eafa91a0,7f4e029e213f,7f4e0272d6d9,0&map=
E0905 20:25:26.845696 616802 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:26.845708 616802 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:26.845713 616802 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:26.845733 616802 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:26.845739 616802 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f67cf31fce1,7f67cf61d13f,7f67cf3686d9&map=
*** SIGABRT received by PID 616806 (TID 616806) on cpu 169 from PID 616806; stack trace: ***
PC: @ 0x7f67cf31fce1 (unknown) raise
@ 0x7f62bafa91a1 1888 (unknown)
@ 0x7f67cf61d140 (unknown) (unknown)
@ 0x7f67cf3686da (unknown) (unknown)
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f750c767ce1,7f750ca6513f,7f750c7b06d9&map=
*** SIGABRT received by PID 616807 (TID 616807) on cpu 13 from PID 616807; stack trace: ***
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f67cf31fce1,7f62bafa91a0,7f67cf61d13f,7f67cf3686d9,0&map=
E0905 20:25:26.865477 616806 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:26.865487 616806 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:26.865493 616806 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:26.865510 616806 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:26.865517 616806 coredump_hook.cc:472] RAW: Dumping core locally.
PC: @ 0x7f750c767ce1 (unknown) raise
@ 0x7f6ff6fa91a1 1888 (unknown)
@ 0x7f750ca65140 (unknown) (unknown)
@ 0x7f750c7b06da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f750c767ce1,7f6ff6fa91a0,7f750ca6513f,7f750c7b06d9,0&map=
E0905 20:25:26.868245 616807 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:26.868254 616807 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:26.868258 616807 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:26.868277 616807 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:26.868282 616807 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f17aadc2ce1,7f17ab0c013f,7f17aae0b6d9&map=
*** SIGABRT received by PID 616805 (TID 616805) on cpu 19 from PID 616805; stack trace: ***
PC: @ 0x7f17aadc2ce1 (unknown) raise
@ 0x7f1292fa91a1 1888 (unknown)
@ 0x7f17ab0c0140 (unknown) (unknown)
@ 0x7f17aae0b6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f17aadc2ce1,7f1292fa91a0,7f17ab0c013f,7f17aae0b6d9,0&map=
E0905 20:25:26.875982 616805 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:26.876000 616805 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:26.876005 616805 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:26.876025 616805 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:26.876031 616805 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:25:35.246334 616805 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:35.247887 616807 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:36.993890 616802 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:36.994871 616806 process_state.cc:805] RAW: Raising signal 6 with default behavior
+ python3 test/torch_distributed/test_torch_distributed_reduce_scatter_xla_backend.py
[W905 20:25:48.879264991 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
[W905 20:25:48.893023749 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
[W905 20:25:48.896316908 socket.cpp:752] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f8d49f45ce1,7f8d4a24313f,7f8d49f8e6d9&map=
*** SIGABRT received by PID 621306 (TID 621306) on cpu 131 from PID 621306; stack trace: ***
PC: @ 0x7f8d49f45ce1 (unknown) raise
@ 0x7f8832fa91a1 1888 (unknown)
@ 0x7f8d4a243140 74410320 (unknown)
@ 0x7f8d49f8e6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f8d49f45ce1,7f8832fa91a0,7f8d4a24313f,7f8d49f8e6d9,0&map=
E0905 20:25:51.067471 621306 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:51.067486 621306 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:51.067491 621306 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:51.067515 621306 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:51.067520 621306 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7ffb7d565ce1,7ffb7d86313f,7ffb7d5ae6d9&map=
*** SIGABRT received by PID 621301 (TID 624359) on cpu 175 from PID 621301; stack trace: ***
PC: @ 0x7ffb7d565ce1 (unknown) raise
@ 0x7ff66afa91a1 1888 (unknown)
@ 0x7ffb7d863140 2320 (unknown)
@ 0x7ffb7d5ae6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7ffb7d565ce1,7ff66afa91a0,7ffb7d86313f,7ffb7d5ae6d9,0&map=
E0905 20:25:51.079362 624359 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:51.079374 624359 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:51.079378 624359 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:51.079399 624359 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:51.079404 624359 coredump_hook.cc:472] RAW: Dumping core locally.
free(): corrupted unsorted chunks
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f7332708ce1,7f7332a0613f,7f73327516d9&map=
*** SIGABRT received by PID 621305 (TID 621305) on cpu 32 from PID 621305; stack trace: ***
https://symbolize.stripped_domain/r/?trace=7fb0c44b5ce1,7fb0c47b313f,7fb0c44fe6d9&map=
*** SIGABRT received by PID 621304 (TID 621304) on cpu 52 from PID 621304; stack trace: ***
PC: @ 0x7f7332708ce1 (unknown) raise
@ 0x7f6e1efa91a1 1888 (unknown)
PC: @ 0x7fb0c44b5ce1 (unknown) raise
@ 0x7f7332a06140 24689136 (unknown)
@ 0x7f73327516da (unknown) (unknown)
@ 0x7fabaefa91a1 1888 (unknown)
@ 0x7fb0c47b3140 1768588016 (unknown)
@ 0x7fb0c44fe6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f7332708ce1,7f6e1efa91a0,7f7332a0613f,7f73327516d9,0&map=
E0905 20:25:51.124426 621305 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:51.124439 621305 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:51.124444 621305 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=E0905 20:25:51.124461 621305 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
7fb0c44b5ce1,E0905 20:25:51.124468 621305 coredump_hook.cc:472] RAW: Dumping core locally.
7fabaefa91a0,7fb0c47b313f,7fb0c44fe6d9,0&map=
E0905 20:25:51.124493 621304 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:25:51.124504 621304 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:25:51.124508 621304 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:25:51.124525 621304 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:25:51.124531 621304 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:25:58.007730 624359 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:58.009071 621305 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:58.011528 621306 process_state.cc:805] RAW: Raising signal 6 with default behavior
E0905 20:25:59.827668 621304 process_state.cc:805] RAW: Raising signal 6 with default behavior
+ python3 test/quantized_ops/test_dot_general.py
....
----------------------------------------------------------------------
Ran 4 tests in 0.160s
OK
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7fc711ed9ce1,7fc7121d713f,7fc711f226d9&map=
*** SIGABRT received by PID 625611 (TID 626980) on cpu 171 from PID 625611; stack trace: ***
PC: @ 0x7fc711ed9ce1 (unknown) raise
@ 0x7fc1fafa91a1 1888 (unknown)
@ 0x7fc7121d7140 2320 (unknown)
@ 0x7fc711f226da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7fc711ed9ce1,7fc1fafa91a0,7fc7121d713f,7fc711f226d9,0&map=
E0905 20:26:11.134821 626980 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:26:11.134837 626980 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:26:11.134843 626980 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:26:11.134867 626980 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:26:11.134873 626980 coredump_hook.cc:472] RAW: Dumping core locally.
E0905 20:26:16.555061 626980 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 38: 625611 Aborted (core dumped) python3 test/quantized_ops/test_dot_general.py
+ python3 examples/data_parallel/train_resnet_spmd_data_parallel.py
Epoch 1 train begin 8:26PM UTC on Sep 05, 2024
epoch: 1, step: 0, loss: 6.888794422149658, rate: 19.639100615806406
epoch: 1, step: 10, loss: 6.8788065910339355, rate: 120.74183048396439
epoch: 1, step: 20, loss: 6.8688130378723145, rate: 4119.416236337881
epoch: 1, step: 30, loss: 6.858831405639648, rate: 5719.89857862935
epoch: 1, step: 40, loss: 6.848846912384033, rate: 6263.918316915789
epoch: 1, step: 50, loss: 6.838860034942627, rate: 6578.935260438671
epoch: 1, step: 60, loss: 6.828869819641113, rate: 6704.828720407788
epoch: 1, step: 70, loss: 6.818877696990967, rate: 6669.649127750544
epoch: 1, step: 80, loss: 6.808884143829346, rate: 6552.767186883248
epoch: 1, step: 90, loss: 6.7988972663879395, rate: 6693.223515473259
epoch: 1, step: 100, loss: 6.7889180183410645, rate: 6750.332856742464
epoch: 1, step: 110, loss: 6.778954029083252, rate: 6775.349813354032
epoch: 1, step: 120, loss: 6.7689738273620605, rate: 6738.289319082537
epoch: 1, step: 130, loss: 6.758986949920654, rate: 6760.801271464079
epoch: 1, step: 140, loss: 6.748992443084717, rate: 6772.727700990708
epoch: 1, step: 150, loss: 6.7389912605285645, rate: 6781.156244048092
epoch: 1, step: 160, loss: 6.7289958000183105, rate: 6728.416064925854
epoch: 1, step: 170, loss: 6.718994140625, rate: 6761.406999703549
epoch: 1, step: 180, loss: 6.708993911743164, rate: 6772.050427017025
epoch: 1, step: 190, loss: 6.698999404907227, rate: 6784.201827749102
epoch: 1, step: 200, loss: 6.6890106201171875, rate: 6645.418376469767
epoch: 1, step: 210, loss: 6.67902946472168, rate: 6723.224499385275
epoch: 1, step: 220, loss: 6.669054985046387, rate: 6760.907167822907
epoch: 1, step: 230, loss: 6.659095287322998, rate: 6779.040847477586
epoch: 1, step: 240, loss: 6.649146556854248, rate: 6461.093456239546
epoch: 1, step: 250, loss: 6.639055252075195, rate: 6654.603951974737
epoch: 1, step: 260, loss: 6.629129409790039, rate: 6701.806303492882
epoch: 1, step: 270, loss: 6.619180679321289, rate: 6549.166760884047
epoch: 1, step: 280, loss: 6.609229564666748, rate: 6575.344930427953
epoch: 1, step: 290, loss: 6.599252700805664, rate: 6693.196710226297
Epoch 1 train end 8:27PM UTC on Sep 05, 2024
free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=7f2ba0762ce1,7f2ba0a6013f,7f2ba07ab6d9&map=
*** SIGABRT received by PID 627075 (TID 629631) on cpu 82 from PID 627075; stack trace: ***
PC: @ 0x7f2ba0762ce1 (unknown) raise
@ 0x7f2366fa91a1 1888 (unknown)
@ 0x7f2ba0a60140 2320 (unknown)
@ 0x7f2ba07ab6da (unknown) (unknown)
@ 0x1 (unknown) (unknown)
https://symbolize.stripped_domain/r/?trace=7f2ba0762ce1,7f2366fa91a0,7f2ba0a6013f,7f2ba07ab6d9,0&map=
E0905 20:27:41.467463 629631 coredump_hook.cc:316] RAW: Remote crash data gathering hook invoked.
E0905 20:27:41.467482 629631 client.cc:269] RAW: Coroner client retries enabled, will retry for up to 30 sec.
E0905 20:27:41.467487 629631 coredump_hook.cc:411] RAW: Sending fingerprint to remote end.
E0905 20:27:41.467508 629631 coredump_hook.cc:420] RAW: Cannot send fingerprint to Coroner: [NOT_FOUND] stat failed on crash reporting socket /var/google/services/logmanagerd/remote_coredump.socket (Is the listener running?): No such file or directory
E0905 20:27:41.467514 629631 coredump_hook.cc:472] RAW: Dumping core locally.
base/elfcore.c:2078 Failed to write mapping 2480 at 0x7f01c8000000 of size 58597376: No space left on device(28)
E0905 20:29:27.997053 629631 process_state.cc:805] RAW: Raising signal 6 with default behavior
test/tpu/run_tests.sh: line 41: 627075 Aborted (core dumped) python3 examples/data_parallel/train_resnet_spmd_data_parallel.py
+ python3 examples/fsdp/train_decoder_only_fsdp_v2.py
Traceback (most recent call last):
File "/pytorch/xla/examples/fsdp/train_decoder_only_fsdp_v2.py", line 6, in <module>
from train_decoder_only_base import TrainDecoderOnlyBase
File "/pytorch/xla/examples/train_decoder_only_base.py", line 3, in <module>
from torch_xla import runtime as xr
File "/pytorch/xla/torch_xla/__init__.py", line 212, in <module>
torch._dynamo.config.automatic_dynamic_shapes = False
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/__init__.py", line 2564, in __getattr__
return importlib.import_module(f".{name}", __name__)
File "/root/miniconda3/envs/torch310/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/__init__.py", line 3, in <module>
from . import convert_frame, eval_frame, resume_execution
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 52, in <module>
from . import config, exc, trace_rules
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/trace_rules.py", line 46, in <module>
from .variables import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/__init__.py", line 2, in <module>
from .builtin import BuiltinVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/builtin.py", line 47, in <module>
from .ctx_manager import EventVariable, StreamVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/ctx_manager.py", line 22, in <module>
from .functions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/functions.py", line 31, in <module>
from torch.distributed._composable.fsdp import _fsdp_param_group
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/__init__.py", line 3, in <module>
from .fully_shard import fully_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/fully_shard.py", line 10, in <module>
from torch.distributed.fsdp._common_utils import _FSDPState
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/__init__.py", line 1, in <module>
from ._flat_param import FlatParameter as FlatParameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_flat_param.py", line 47, in <module>
from ._fsdp_extensions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_fsdp_extensions.py", line 6, in <module>
from torch.distributed._shard.sharded_tensor.api import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/__init__.py", line 1, in <module>
from .api import _shard_tensor, load_with_process_group, shard_module, shard_parameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/api.py", line 9, in <module>
from torch.distributed._shard.sharded_tensor import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/__init__.py", line 8, in <module>
from .api import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/api.py", line 31, in <module>
from .reshard import reshard_local_shard, reshuffle_local_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/reshard.py", line 14, in <module>
from torch.distributed.nn.functional import all_to_all, all_to_all_single
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/__init__.py", line 7, in <module>
from .api.remote_module import RemoteModule
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/api/remote_module.py", line 26, in <module>
from torch.distributed.nn.jit import instantiator
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/jit/instantiator.py", line 20, in <module>
_TEMP_DIR = tempfile.TemporaryDirectory()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 835, in __init__
self.name = mkdtemp(suffix, prefix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 373, in mkdtemp
prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 126, in _sanitize_params
dir = gettempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 315, in gettempdir
return _os.fsdecode(_gettempdir())
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 308, in _gettempdir
tempdir = _get_default_tempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 223, in _get_default_tempdir
raise FileNotFoundError(_errno.ENOENT,
FileNotFoundError: [Errno 2] No usable temporary directory found in ['/tmp', '/var/tmp', '/usr/tmp', '/pytorch/xla']
+ python3 examples/train_resnet_amp.py
Traceback (most recent call last):
File "/pytorch/xla/examples/train_resnet_amp.py", line 1, in <module>
from train_resnet_base import TrainResNetBase
File "/pytorch/xla/examples/train_resnet_base.py", line 1, in <module>
from torch_xla import runtime as xr
File "/pytorch/xla/torch_xla/__init__.py", line 212, in <module>
torch._dynamo.config.automatic_dynamic_shapes = False
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/__init__.py", line 2564, in __getattr__
return importlib.import_module(f".{name}", __name__)
File "/root/miniconda3/envs/torch310/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/__init__.py", line 3, in <module>
from . import convert_frame, eval_frame, resume_execution
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 52, in <module>
from . import config, exc, trace_rules
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/trace_rules.py", line 46, in <module>
from .variables import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/__init__.py", line 2, in <module>
from .builtin import BuiltinVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/builtin.py", line 47, in <module>
from .ctx_manager import EventVariable, StreamVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/ctx_manager.py", line 22, in <module>
from .functions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/functions.py", line 31, in <module>
from torch.distributed._composable.fsdp import _fsdp_param_group
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/__init__.py", line 3, in <module>
from .fully_shard import fully_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/fully_shard.py", line 10, in <module>
from torch.distributed.fsdp._common_utils import _FSDPState
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/__init__.py", line 1, in <module>
from ._flat_param import FlatParameter as FlatParameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_flat_param.py", line 47, in <module>
from ._fsdp_extensions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_fsdp_extensions.py", line 6, in <module>
from torch.distributed._shard.sharded_tensor.api import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/__init__.py", line 1, in <module>
from .api import _shard_tensor, load_with_process_group, shard_module, shard_parameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/api.py", line 9, in <module>
from torch.distributed._shard.sharded_tensor import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/__init__.py", line 8, in <module>
from .api import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/api.py", line 31, in <module>
from .reshard import reshard_local_shard, reshuffle_local_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/reshard.py", line 14, in <module>
from torch.distributed.nn.functional import all_to_all, all_to_all_single
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/__init__.py", line 7, in <module>
from .api.remote_module import RemoteModule
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/api/remote_module.py", line 26, in <module>
from torch.distributed.nn.jit import instantiator
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/jit/instantiator.py", line 20, in <module>
_TEMP_DIR = tempfile.TemporaryDirectory()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 835, in __init__
self.name = mkdtemp(suffix, prefix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 373, in mkdtemp
prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 126, in _sanitize_params
dir = gettempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 315, in gettempdir
return _os.fsdecode(_gettempdir())
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 308, in _gettempdir
tempdir = _get_default_tempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 223, in _get_default_tempdir
raise FileNotFoundError(_errno.ENOENT,
FileNotFoundError: [Errno 2] No usable temporary directory found in ['/tmp', '/var/tmp', '/usr/tmp', '/pytorch/xla']
++ python -c 'import sys; sys.path.remove('\'''\''); import torch_xla; print(torch_xla._internal.tpu.version())'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/pytorch/xla/torch_xla/__init__.py", line 212, in <module>
torch._dynamo.config.automatic_dynamic_shapes = False
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/__init__.py", line 2564, in __getattr__
return importlib.import_module(f".{name}", __name__)
File "/root/miniconda3/envs/torch310/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/__init__.py", line 3, in <module>
from . import convert_frame, eval_frame, resume_execution
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 52, in <module>
from . import config, exc, trace_rules
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/trace_rules.py", line 46, in <module>
from .variables import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/__init__.py", line 2, in <module>
from .builtin import BuiltinVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/builtin.py", line 47, in <module>
from .ctx_manager import EventVariable, StreamVariable
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/ctx_manager.py", line 22, in <module>
from .functions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/_dynamo/variables/functions.py", line 31, in <module>
from torch.distributed._composable.fsdp import _fsdp_param_group
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/__init__.py", line 3, in <module>
from .fully_shard import fully_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_composable/fully_shard.py", line 10, in <module>
from torch.distributed.fsdp._common_utils import _FSDPState
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/__init__.py", line 1, in <module>
from ._flat_param import FlatParameter as FlatParameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_flat_param.py", line 47, in <module>
from ._fsdp_extensions import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/fsdp/_fsdp_extensions.py", line 6, in <module>
from torch.distributed._shard.sharded_tensor.api import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/__init__.py", line 1, in <module>
from .api import _shard_tensor, load_with_process_group, shard_module, shard_parameter
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/api.py", line 9, in <module>
from torch.distributed._shard.sharded_tensor import ShardedTensor
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/__init__.py", line 8, in <module>
from .api import (
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/api.py", line 31, in <module>
from .reshard import reshard_local_shard, reshuffle_local_shard
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/_shard/sharded_tensor/reshard.py", line 14, in <module>
from torch.distributed.nn.functional import all_to_all, all_to_all_single
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/__init__.py", line 7, in <module>
from .api.remote_module import RemoteModule
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/api/remote_module.py", line 26, in <module>
from torch.distributed.nn.jit import instantiator
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/torch/distributed/nn/jit/instantiator.py", line 20, in <module>
_TEMP_DIR = tempfile.TemporaryDirectory()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 835, in __init__
self.name = mkdtemp(suffix, prefix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 373, in mkdtemp
prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir)
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 126, in _sanitize_params
dir = gettempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 315, in gettempdir
return _os.fsdecode(_gettempdir())
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 308, in _gettempdir
tempdir = _get_default_tempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 223, in _get_default_tempdir
raise FileNotFoundError(_errno.ENOENT,
FileNotFoundError: [Errno 2] No usable temporary directory found in ['/tmp', '/var/tmp', '/usr/tmp', '/pytorch/xla']
+ TPU_VERSION=
+ [[ -n '' ]]
+ python3 test/tpu/tpu_info/test_cli.py
Traceback (most recent call last):
File "/pytorch/xla/test/tpu/tpu_info/test_cli.py", line 2, in <module>
from absl.testing import absltest, parameterized
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/absl_py-2.1.0-py3.10.egg/absl/testing/absltest.py", line 212, in <module>
get_default_test_tmpdir(),
File "/root/miniconda3/envs/torch310/lib/python3.10/site-packages/absl_py-2.1.0-py3.10.egg/absl/testing/absltest.py", line 150, in get_default_test_tmpdir
tmpdir = os.path.join(tempfile.gettempdir(), 'absl_testing')
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 315, in gettempdir
return _os.fsdecode(_gettempdir())
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 308, in _gettempdir
tempdir = _get_default_tempdir()
File "/root/miniconda3/envs/torch310/lib/python3.10/tempfile.py", line 223, in _get_default_tempdir
raise FileNotFoundError(_errno.ENOENT,
FileNotFoundError: [Errno 2] No usable temporary directory found in ['/tmp', '/var/tmp', '/usr/tmp', '/pytorch/xla']
(torch310) root@6
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment