Last active
January 14, 2022 21:41
-
-
Save davidberard98/e7065260fb3b7549879ec17904d0cd84 to your computer and use it in GitHub Desktop.
nvfuser opinfo test failures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
srun: job 15710 queued and waiting for resources | |
srun: job 15710 has been allocated resources | |
srun: error: ioctl(TIOCGWINSZ): Inappropriate ioctl for device | |
srun: error: Not using a pseudo-terminal, disregarding --pty option | |
monkeytype is not installed. Skipping tests for Profile-Directed Typing | |
test_nvfuser_correctness_H_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_H_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_T_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_T_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___getitem___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness___getitem___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness___radd___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___radd___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rdiv___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness___rdiv___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rmatmul___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rmatmul___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rmod___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rmul___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rmul___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rpow___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/_tensor.py:627: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
return torch.tensor(other, dtype=dtype, device=self.device) ** self | |
ERROR | |
test_nvfuser_correctness___rpow___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness___rsub___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness___rsub___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness__masked_amax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/_masked/__init__.py:331: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
return torch.tensor(-torch.inf, dtype=dtype, device=device) | |
ERROR | |
test_nvfuser_correctness__masked_amin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/_masked/__init__.py:336: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
return torch.tensor(torch.inf, dtype=dtype, device=device) | |
ERROR | |
test_nvfuser_correctness__masked_log_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/_masked/__init__.py:386: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
elif mask.shape != input.shape: | |
ERROR | |
test_nvfuser_correctness__masked_mean_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/_masked/__init__.py:351: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
return torch.tensor(0, dtype=dtype, device=device) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/_masked/__init__.py:350: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
return torch.tensor(torch.inf, dtype=dtype, device=device) | |
ERROR | |
test_nvfuser_correctness__masked_normalize_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness__masked_prod_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_prod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_softmin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_sum_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_sum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_var_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness__masked_var_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_abs_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_abs_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_acos_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_acos_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_acosh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_acosh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_add_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_add_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addbmm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addbmm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addcdiv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addcdiv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addcmul_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addcmul_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmm_decomposed_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmm_decomposed_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addmv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addr_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_addr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_all_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_all_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_allclose_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_allclose_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_amax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_amin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_aminmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_angle_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_angle_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_any_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_any_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_argmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_argmin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_argsort_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_argwhere_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_argwhere_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_as_strided_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_as_strided_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_asin_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_asin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_asinh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_asinh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atan2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atan_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atan_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atanh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atanh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_1d_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_2d_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_3d_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_atleast_3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_baddbmm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_baddbmm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bernoulli_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bfloat16_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py:3224: UserWarning: Casting complex values to real discards the imaginary part (Triggered internally at ../../aten/src/ATen/native/Copy.cpp:237.) | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
ok | |
test_nvfuser_correctness_bfloat16_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bfloat16_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bfloat16_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bitwise_left_shift_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bitwise_right_shift_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_block_diag_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_block_diag_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bmm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bmm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bool_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bool_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bool_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bool_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_broadcast_tensors_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_broadcast_tensors_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_broadcast_to_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_broadcast_to_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_bucketize_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_byte_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_byte_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_byte_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_byte_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cartesian_prod_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cartesian_prod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cat_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cat_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cdist_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ceil_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_char_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_char_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_char_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_char_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cholesky_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_cholesky_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_cholesky_inverse_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_cholesky_inverse_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_cholesky_solve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_cholesky_solve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_chunk_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_chunk_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_clamp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_clamp_scalar_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_clone_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_clone_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_column_stack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_column_stack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_combinations_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py:3224: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../../aten/src/ATen/native/TensorShape.cpp:2167.) | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
ok | |
test_nvfuser_correctness_combinations_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_complex_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_conj_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_conj_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_conj_physical_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_conj_physical_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_contiguous_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_contiguous_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_copysign_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_corrcoef_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_corrcoef_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cos_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cos_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cosh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cosh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_count_nonzero_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_count_nonzero_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cov_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_cov_cuda_complex64! Caching allocator allocated memory was 694784 and is now reported as 698880 on device 0. CUDA driver allocated memory was 1732247552 and is now 1732247552. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_cov_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py:3224: UserWarning: cov(): degrees of freedom is <= 0 (Triggered internally at ../../aten/src/ATen/native/Correlation.cpp:99.) | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py:354: UserWarning: cov(): degrees of freedom is <= 0 (Triggered internally at ../../aten/src/ATen/native/Correlation.cpp:99.) | |
return fn(*(args[i] if s == 's' else next(tensors) for i, s in enumerate(source)), **kwargs) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:415: UserWarning: cov(): degrees of freedom is <= 0 (Triggered internally at ../../aten/src/ATen/native/Correlation.cpp:99.) | |
return callable(*args, **kwargs) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_cov_cuda_float32! Caching allocator allocated memory was 698880 and is now reported as 702976 on device 0. CUDA driver allocated memory was 1732247552 and is now 1732247552. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_cross_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cross_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cummax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cummin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumprod_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumprod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumsum_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumsum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumulative_trapezoid_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_cumulative_trapezoid_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_deg2rad_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diag_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diag_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diag_embed_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diag_embed_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diagflat_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diagflat_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diagonal_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diagonal_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diagonal_scatter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diff_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_diff_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_digamma_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dist_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dist_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_floor_rounding_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_floor_rounding_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_no_rounding_mode_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_no_rounding_mode_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_trunc_rounding_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_div_trunc_rounding_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dot_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dot_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_double_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_double_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_double_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_double_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dsplit_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dsplit_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dstack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_dstack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_eig_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_eig_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_einsum_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_einsum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_empty_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... FAIL | |
test_nvfuser_correctness_empty_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... FAIL | |
test_nvfuser_correctness_eq_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_eq_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_erf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_erfc_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_erfinv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_exp2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_exp_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_exp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_expand_as_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_expand_as_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_expand_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_expand_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_expm1_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fft2_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fftn_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fftshift_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_fftshift_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfft2_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfftn_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_hfftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifft2_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifftn_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifftshift_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ifftshift_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ihfft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ihfft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_ihfftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfft2_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfftn_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_irfftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_rfft2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_rfft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fft_rfftn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fill__cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fill__cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flatten_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flatten_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flip_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flip_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fliplr_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fliplr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flipud_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_flipud_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_power_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_float_power_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_floor_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_floor_divide_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py:3224: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. | |
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at ../../aten/src/ATen/native/BinaryOps.cpp:607.) | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
ok | |
test_nvfuser_correctness_fmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fmin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fmod_autodiffed_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_fmod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_frac_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_frexp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_full_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_full_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_gather_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_gather_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ge_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_geqrf_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_geqrf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_gradient_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_gradient_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_gt_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_half_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_half_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_half_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_half_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_heaviside_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_histc_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_hsplit_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_hsplit_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_hstack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_hstack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_hypot_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_i0_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_igamma_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_igamma_grad_other_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_igammac_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_igammac_grad_other_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_imag_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_add_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_add_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_copy_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_copy_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_fill_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_fill_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_put_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_put_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_select_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_index_select_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_inner_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_inner_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_int_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_int_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_int_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_int_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_inverse_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_inverse_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isclose_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isclose_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isfinite_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isfinite_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isinf_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isinf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isnan_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isnan_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isneginf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isposinf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isreal_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_isreal_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_istft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_istft_cuda_complex64! Caching allocator allocated memory was 702976 and is now reported as 703488 on device 0. CUDA driver allocated memory was 2176843776 and is now 2176843776. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_istft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/functional.py:770: UserWarning: istft will require a complex-valued input tensor in a future PyTorch release. Matching the output from stft with return_complex=True. (Triggered internally at ../../aten/src/ATen/native/SpectralOps.cpp:950.) | |
return _VF.istft(input, n_fft, hop_length, win_length, window, center, # type: ignore[attr-defined] | |
ERROR | |
test_nvfuser_correctness_kron_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_kron_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_kthvalue_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ldexp_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ldexp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_le_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lerp_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lerp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lgamma_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cholesky_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cholesky_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cholesky_ex_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cholesky_ex_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cond_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cond_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cross_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_cross_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_det_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_det_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eig_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eig_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigvals_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigvals_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigvalsh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_eigvalsh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_householder_product_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_householder_product_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_inv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_inv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_inv_ex_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_inv_ex_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_lstsq_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_lstsq_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_lstsq_grad_oriented_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_lu_factor_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_lu_factor_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_lu_factor_ex_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_lu_factor_ex_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_matrix_norm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_matrix_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_matrix_power_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_matrix_power_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_matrix_rank_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_matrix_rank_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_matrix_rank_hermitian_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_matrix_rank_hermitian_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_multi_dot_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_multi_dot_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_norm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_pinv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_linalg_pinv_cuda_complex64! Caching allocator allocated memory was 704000 and is now reported as 708096 on device 0. CUDA driver allocated memory was 2178940928 and is now 2178940928. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_linalg_pinv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_linalg_pinv_hermitian_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_pinv_hermitian_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_pinv_singular_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'test is slow; run with PYTORCH_TEST_WITH_SLOW to enable test' | |
test_nvfuser_correctness_linalg_pinv_singular_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'test is slow; run with PYTORCH_TEST_WITH_SLOW to enable test' | |
test_nvfuser_correctness_linalg_qr_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_qr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_slogdet_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_slogdet_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_solve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_solve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_solve_triangular_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_solve_triangular_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_svd_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_svd_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_svdvals_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_svdvals_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_tensorinv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_tensorinv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_linalg_tensorsolve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_linalg_tensorsolve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_linalg_vector_norm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_linalg_vector_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_log10_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log10_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log1p_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log2_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log_softmax_dtype_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_log_softmax_dtype_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logaddexp2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logaddexp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logcumsumexp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logdet_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_logical_and_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_and_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_not_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_not_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_or_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_or_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_xor_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logical_xor_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logit_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_logsumexp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_long_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_long_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_long_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_long_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lt_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lu_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lu_solve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_lu_solve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_lu_unpack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_lu_unpack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mH_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mH_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mT_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mT_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_fill_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_fill_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_scatter_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_scatter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_select_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_masked_select_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_matmul_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_matmul_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_matrix_exp_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_matrix_exp_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_max_binary_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_max_reduction_no_dim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_max_reduction_with_dim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_maximum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mean_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_median_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_meshgrid_list_of_tensors_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_meshgrid_list_of_tensors_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_meshgrid_variadic_tensors_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_meshgrid_variadic_tensors_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_min_binary_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_min_reduction_no_dim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_min_reduction_with_dim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_minimum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mode_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_movedim_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_movedim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_msort_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mul_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mul_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_multinomial_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mv_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mvlgamma_mvlgamma_p_1_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mvlgamma_mvlgamma_p_3_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_mvlgamma_mvlgamma_p_5_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nan_to_num_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nanmean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nanmedian_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nanquantile_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nansum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_narrow_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_narrow_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ne_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ne_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_neg_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_neg_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_empty_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... FAIL | |
test_nvfuser_correctness_new_empty_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... FAIL | |
test_nvfuser_correctness_new_full_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_full_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_ones_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_ones_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_zeros_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_new_zeros_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nextafter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_avg_pool1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_avg_pool2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_avg_pool3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_max_pool1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_max_pool2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_adaptive_max_pool3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_avg_pool1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_avg_pool2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_avg_pool3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_batch_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2363: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if size_prods == 1: | |
ok | |
test_nvfuser_correctness_nn_functional_batch_norm_without_cudnn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_bilinear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_celu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_conv1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_conv2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py:3224: UserWarning: Using padding='same' with even kernel lengths and odd dilation may require a zero-padded copy of the input be created (Triggered internally at ../../aten/src/ATen/native/Convolution.cpp:736.) | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
ok | |
test_nvfuser_correctness_nn_functional_conv_transpose1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_conv_transpose2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_conv_transpose3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_cosine_embedding_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_cosine_similarity_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_cross_entropy_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_nn_functional_cross_entropy_cuda_float32! Caching allocator allocated memory was 712192 and is now reported as 713728 on device 0. CUDA driver allocated memory was 2692743168 and is now 2692743168. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_nn_functional_ctc_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_dropout2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_dropout_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_elu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_embedding_bag_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_embedding_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_feature_alpha_dropout_with_train_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_feature_alpha_dropout_without_train_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_feature_alpha_dropout_without_train_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_fractional_max_pool2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_fractional_max_pool3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2756: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if var.size() != input.size(): | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2780: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if torch.any(var < 0): | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2768: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
elif input.size()[:-1] == var.size()[:-1] and var.size(-1) == 1: # Heteroscedastic case | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2762: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if input.size()[:-1] == var.size(): | |
ok | |
test_nvfuser_correctness_nn_functional_gelu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_glu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_grid_sample_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_group_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2475: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). | |
_verify_batch_size([input.size(0) * input.size(1) // num_groups, num_groups] + list(input.size()[2:])) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_nn_functional_group_norm_cuda_float32! Caching allocator allocated memory was 713728 and is now reported as 717824 on device 0. CUDA driver allocated memory was 2692743168 and is now 2692743168. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_nn_functional_hardshrink_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_hardsigmoid_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_hardswish_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_hardtanh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_hinge_embedding_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_huber_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3170: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if not (target.size() == input.size()): | |
ok | |
test_nvfuser_correctness_nn_functional_instance_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2408: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if size_prods == 1: | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_nn_functional_instance_norm_cuda_float32! Caching allocator allocated memory was 717824 and is now reported as 732672 on device 0. CUDA driver allocated memory was 2692743168 and is now 2692743168. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_nn_functional_interpolate_area_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3826: UserWarning: The default behavior for interpolate/upsample with float scale_factor changed in 1.6.0 to align with other frameworks/libraries, and now uses scale_factor directly, instead of relying on the computed output size. If you wish to restore the old behavior, please set recompute_scale_factor=True. See the documentation of nn.Upsample for details. | |
warnings.warn( | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3848: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. | |
(torch.floor((input.size(i + 2).float() * torch.tensor(scale_factors[i], dtype=torch.float32)).float())) | |
ok | |
test_nvfuser_correctness_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3771: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details. | |
warnings.warn( | |
ok | |
test_nvfuser_correctness_nn_functional_interpolate_bilinear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3771: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details. | |
warnings.warn( | |
ok | |
test_nvfuser_correctness_nn_functional_interpolate_linear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3771: UserWarning: Default upsampling behavior when mode=linear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details. | |
warnings.warn( | |
ok | |
test_nvfuser_correctness_nn_functional_interpolate_nearest_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3771: UserWarning: Default upsampling behavior when mode=trilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details. | |
warnings.warn( | |
ok | |
test_nvfuser_correctness_nn_functional_kl_div_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:2863: UserWarning: reduction: 'mean' divides the total loss by both the batch size and the support size.'batchmean' divides only by the batch size, and aligns with the KL div math definition.'mean' will be changed to behave the same as 'batchmean' in the next major release. | |
warnings.warn( | |
ok | |
test_nvfuser_correctness_nn_functional_layer_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_leaky_relu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_linear_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_linear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_local_response_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_logsigmoid_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_max_pool1d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:682: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool1d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:651: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool1d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
ok | |
test_nvfuser_correctness_nn_functional_max_pool2d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:780: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool2d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:749: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool2d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
ok | |
test_nvfuser_correctness_nn_functional_max_pool3d_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:878: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool3d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:847: UserWarning: Note that order of the arguments: ceil_mode and return_indices will changeto match the args list in nn.MaxPool3d in a future release. | |
warnings.warn("Note that order of the arguments: ceil_mode and return_indices will change" | |
ok | |
test_nvfuser_correctness_nn_functional_mish_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_mse_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3228: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
if not (target.size() == input.size()): | |
ok | |
test_nvfuser_correctness_nn_functional_nll_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_nn_functional_normalize_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_normalize_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_circular_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:4761: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
assert padding[-(idx * 2 + 1)] <= size, "Padding value causes wrapping around more than once." | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:4762: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
assert padding[-(idx * 2 + 2)] <= size, "Padding value causes wrapping around more than once." | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:4764: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
assert ( | |
ok | |
test_nvfuser_correctness_nn_functional_pad_circular_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_constant_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_constant_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_reflect_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_reflect_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_replicate_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pad_replicate_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pairwise_distance_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pairwise_distance_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pixel_shuffle_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pixel_shuffle_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pixel_unshuffle_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_pixel_unshuffle_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_poisson_nll_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_prelu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_nn_functional_relu6_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_relu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_rrelu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_selu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_silu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softmin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softmin_with_dtype_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softmin_with_dtype_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softplus_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softshrink_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_softsign_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_nn_functional_softsign_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_tanhshrink_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_nn_functional_tanhshrink_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_threshold_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_unfold_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_unfold_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nn_functional_upsample_bilinear_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:4023: UserWarning: nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead. | |
warnings.warn("nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.") | |
ok | |
test_nvfuser_correctness_nn_functional_upsample_nearest_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/nn/functional.py:3968: UserWarning: nn.functional.upsample_nearest is deprecated. Use nn.functional.interpolate instead. | |
warnings.warn("nn.functional.upsample_nearest is deprecated. Use nn.functional.interpolate instead.") | |
ok | |
test_nvfuser_correctness_nonzero_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_nonzero_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_fro_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_fro_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_inf_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_inf_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_norm_nuc_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_norm_nuc_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_normal_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_normal_number_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ones_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ones_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ormqr_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ormqr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_outer_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_outer_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_pca_lowrank_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_permute_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_permute_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_pinverse_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_pinverse_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polar_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polygamma_polygamma_n_0_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polygamma_polygamma_n_1_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polygamma_polygamma_n_2_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polygamma_polygamma_n_3_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_polygamma_polygamma_n_4_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_positive_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_positive_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_pow_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_pow_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_prod_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_prod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_put_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_put_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_qr_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_qr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_quantile_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rad2deg_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rand_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rand_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_randint_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_randn_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_randn_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ravel_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_ravel_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_real_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_reciprocal_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_reciprocal_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_remainder_autodiffed_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_remainder_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_renorm_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_renorm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_repeat_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_repeat_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_repeat_interleave_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_repeat_interleave_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_reshape_as_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_reshape_as_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_reshape_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_reshape_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resize__cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resize__cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resize_as__cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resize_as__cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resolve_conj_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resolve_conj_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resolve_neg_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_resolve_neg_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_roll_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_roll_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rot90_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rot90_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_round_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsqrt_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsqrt_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsub_rsub_scalar_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsub_rsub_scalar_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsub_rsub_tensor_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_rsub_rsub_tensor_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_scatter_add_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_scatter_add_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_scatter_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_scatter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_searchsorted_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ERROR | |
test_nvfuser_correctness_select_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_select_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_select_scatter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sgn_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sgn_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_short_channels_last_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_short_channels_last_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_short_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_short_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sigmoid_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sigmoid_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sign_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_signbit_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sin_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sinc_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sinc_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sinh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sinh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_slice_scatter_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_softmax_with_dtype_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_softmax_with_dtype_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_solve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_solve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_sort_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_entr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_erfcx_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_i0e_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_i1_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_i1e_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_ndtr_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_ndtri_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_polygamma_special_polygamma_n_0_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_xlog1py_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_zeta_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_special_zeta_grad_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_list_args_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_list_args_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_with_sizes_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_split_with_sizes_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sqrt_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sqrt_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_square_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_square_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_squeeze_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_squeeze_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_stack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_stack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_std_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_std_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_std_mean_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_std_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_stft_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:1304: UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_stft_cuda_complex64! Caching allocator allocated memory was 834560 and is now reported as 835072 on device 0. CUDA driver allocated memory was 2703228928 and is now 2703228928. | |
warnings.warn(msg) | |
ok | |
test_nvfuser_correctness_stft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... /data/home/dberard/.local/lib/python3.9/site-packages/torch/functional.py:695: UserWarning: stft will soon require the return_complex parameter be given for real inputs, and will further require that return_complex=True in a future PyTorch release. (Triggered internally at ../../aten/src/ATen/native/SpectralOps.cpp:798.) | |
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] | |
ERROR | |
test_nvfuser_correctness_sub_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sub_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sum_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sum_to_size_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_sum_to_size_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_svd_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_svd_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_svd_lowrank_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_symeig_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_symeig_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_t_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_t_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_take_along_dim_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_take_along_dim_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_take_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_take_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tan_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tan_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tanh_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tanh_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tensor_split_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tensor_split_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tensordot_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tensordot_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tile_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tile_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_to_sparse_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... [W pybind_utils.cpp:39] Warning: Using sparse tensors in TorchScript is experimental. Many optimization pathways have not been thoroughly tested with sparse tensors. Please include the fact that the network is running sparse tensors in any bug reports submitted. (function operator()) | |
/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py:415: UserWarning: Using sparse tensors in TorchScript is experimental. Many optimization pathways have not been thoroughly tested with sparse tensors. Please include the fact that the network is running sparse tensors in any bug reports submitted. (Triggered internally at ../../../torch/csrc/jit/python/pybind_utils.h:691.) | |
return callable(*args, **kwargs) | |
ok | |
test_nvfuser_correctness_to_sparse_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_topk_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trace_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trace_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_transpose_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_transpose_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trapezoid_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trapezoid_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trapz_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trapz_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_triangular_solve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_triangular_solve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... skipped 'no MAGMA library detected' | |
test_nvfuser_correctness_tril_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_tril_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_triu_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_triu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_true_divide_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_true_divide_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_trunc_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unfold_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unfold_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unique_consecutive_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unique_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unsqueeze_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_unsqueeze_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_var_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_var_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_var_mean_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_var_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vdot_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vdot_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_as_complex_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_as_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_as_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_as_real_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_view_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vsplit_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vsplit_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vstack_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_vstack_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_where_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_where_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_xlogy_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_zero__cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_zero__cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_zeros_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
test_nvfuser_correctness_zeros_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) ... ok | |
====================================================================== | |
ERROR: test_nvfuser_correctness___getitem___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness___getitem___cuda_complex64! Caching allocator allocated memory was 512 and is now reported as 35328 on device 0. CUDA driver allocated memory was 1417674752 and is now 1419771904. | |
====================================================================== | |
ERROR: test_nvfuser_correctness___getitem___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness___getitem___cuda_float32! Caching allocator allocated memory was 35328 and is now reported as 70144 on device 0. CUDA driver allocated memory was 1419771904 and is now 1421869056. | |
====================================================================== | |
ERROR: test_nvfuser_correctness___rdiv___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 81, in _script_method_graph_for | |
dbs = parent.get_debug_state() | |
RuntimeError: optimized_plan_INTERNAL ASSERT FAILED at "../../torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp":625, please report a bug to PyTorch. | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 369, in traced_fn | |
traced_fn.last_graph = traced.graph_for(*inputs_tensors) # type: ignore[attr-defined] | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 77, in _graph_for | |
return _script_method_graph_for(self, self, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 106, in _script_method_graph_for | |
self(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 418, in prof_func_call | |
return prof_callable(func_call, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 415, in prof_callable | |
return callable(*args, **kwargs) | |
RuntimeError: The following operation failed in the TorchScript interpreter. | |
Traceback of TorchScript (most recent call last): | |
RuntimeError: scalar_typeINTERNAL ASSERT FAILED at "../../torch/csrc/jit/codegen/fuser/compiler.cpp":265, please report a bug to PyTorch. | |
====================================================================== | |
ERROR: test_nvfuser_correctness___rpow___cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness___rpow___cuda_complex64! Caching allocator allocated memory was 70144 and is now reported as 74240 on device 0. CUDA driver allocated memory was 1700790272 and is now 1702887424. | |
====================================================================== | |
ERROR: test_nvfuser_correctness___rpow___cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness___rpow___cuda_float32! Caching allocator allocated memory was 74240 and is now reported as 78336 on device 0. CUDA driver allocated memory was 1702887424 and is now 1704984576. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_amax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_amax_cuda_float32! Caching allocator allocated memory was 78336 and is now reported as 106496 on device 0. CUDA driver allocated memory was 1704984576 and is now 1707081728. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_amin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_amin_cuda_float32! Caching allocator allocated memory was 106496 and is now reported as 134656 on device 0. CUDA driver allocated memory was 1707081728 and is now 1709178880. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_log_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_log_softmax_cuda_float32! Caching allocator allocated memory was 134656 and is now reported as 145408 on device 0. CUDA driver allocated memory was 1709178880 and is now 1711276032. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_mean_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_mean_cuda_complex64! Caching allocator allocated memory was 145408 and is now reported as 213504 on device 0. CUDA driver allocated memory was 1711276032 and is now 1713373184. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_mean_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_mean_cuda_float32! Caching allocator allocated memory was 213504 and is now reported as 281600 on device 0. CUDA driver allocated memory was 1713373184 and is now 1715470336. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_norm_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_norm_cuda_float32! Caching allocator allocated memory was 281600 and is now reported as 422400 on device 0. CUDA driver allocated memory was 1715470336 and is now 1717567488. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_prod_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_prod_cuda_complex64! Caching allocator allocated memory was 422400 and is now reported as 450560 on device 0. CUDA driver allocated memory was 1717567488 and is now 1719664640. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_prod_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_prod_cuda_float32! Caching allocator allocated memory was 450560 and is now reported as 478720 on device 0. CUDA driver allocated memory was 1719664640 and is now 1721761792. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_softmax_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_softmax_cuda_float32! Caching allocator allocated memory was 478720 and is now reported as 489472 on device 0. CUDA driver allocated memory was 1721761792 and is now 1723858944. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_softmin_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_softmin_cuda_float32! Caching allocator allocated memory was 489472 and is now reported as 500224 on device 0. CUDA driver allocated memory was 1723858944 and is now 1725956096. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_sum_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_sum_cuda_complex64! Caching allocator allocated memory was 500224 and is now reported as 528384 on device 0. CUDA driver allocated memory was 1725956096 and is now 1728053248. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_sum_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_sum_cuda_float32! Caching allocator allocated memory was 528384 and is now reported as 556544 on device 0. CUDA driver allocated memory was 1728053248 and is now 1730150400. | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_var_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 81, in _script_method_graph_for | |
dbs = parent.get_debug_state() | |
RuntimeError: optimized_plan_INTERNAL ASSERT FAILED at "../../torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp":625, please report a bug to PyTorch. | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 369, in traced_fn | |
traced_fn.last_graph = traced.graph_for(*inputs_tensors) # type: ignore[attr-defined] | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 77, in _graph_for | |
return _script_method_graph_for(self, self, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 106, in _script_method_graph_for | |
self(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 418, in prof_func_call | |
return prof_callable(func_call, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 415, in prof_callable | |
return callable(*args, **kwargs) | |
RuntimeError: The following operation failed in the TorchScript interpreter. | |
Traceback of TorchScript (most recent call last): | |
RuntimeError: default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(34): error: name followed by "::" must be a class or namespace name | |
default_program(34): error: expected an identifier | |
default_program(34): error: expected a ";" | |
default_program(36): error: name followed by "::" must be a class or namespace name | |
default_program(36): error: expected an identifier | |
default_program(36): error: expected a ";" | |
default_program(40): error: name followed by "::" must be a class or namespace name | |
default_program(40): error: expected an identifier | |
default_program(40): error: expected a ";" | |
default_program(58): error: identifier "t0_buf" is undefined | |
default_program(58): error: identifier "t0" is undefined | |
default_program(59): error: identifier "t1_buf" is undefined | |
default_program(59): error: identifier "t1" is undefined | |
default_program(65): error: name followed by "::" must be a class or namespace name | |
default_program(65): error: expected an identifier | |
default_program(65): error: expected a ";" | |
default_program(66): error: name followed by "::" must be a class or namespace name | |
default_program(66): error: expected an identifier | |
default_program(66): error: expected a ";" | |
default_program(69): error: name followed by "::" must be a class or namespace name | |
default_program(69): error: expected an identifier | |
default_program(69): error: expected a ";" | |
default_program(70): error: name followed by "::" must be a class or namespace name | |
default_program(70): error: expected an identifier | |
default_program(70): error: expected a ";" | |
default_program(71): error: identifier "t3_buf" is undefined | |
default_program(71): error: identifier "n5" is undefined | |
default_program(67): warning: variable "n2" was declared but never referenced | |
default_program(68): warning: variable "n3" was declared but never referenced | |
default_program(75): error: identifier "t3" is undefined | |
default_program(75): error: identifier "t3_buf" is undefined | |
default_program(49): warning: variable "t0_linearIndex" was declared but never referenced | |
default_program(51): warning: variable "t1_linearIndex" was declared but never referenced | |
default_program(53): warning: variable "t2_linearIndex" was declared but never referenced | |
default_program(55): warning: variable "t3_linearIndex" was declared but never referenced | |
default_program(93): error: name followed by "::" must be a class or namespace name | |
default_program(93): error: expected an identifier | |
default_program(93): error: expected a ";" | |
default_program(94): error: name followed by "::" must be a class or namespace name | |
default_program(94): error: expected an identifier | |
default_program(94): error: expected a ";" | |
default_program(97): error: name followed by "::" must be a class or namespace name | |
default_program(97): error: expected an identifier | |
default_program(97): error: expected a ";" | |
default_program(98): error: name followed by "::" must be a class or namespace name | |
default_program(98): error: expected an identifier | |
default_program(98): error: expected a ";" | |
default_program(99): error: identifier "t3" is undefined | |
default_program(99): error: identifier "n5" is undefined | |
default_program(83): warning: variable "t0_offset" was declared but never referenced | |
default_program(84): warning: variable "t0_linearIndex" was declared but never referenced | |
default_program(85): warning: variable "t1_offset" was declared but never referenced | |
default_program(86): warning: variable "t1_linearIndex" was declared but never referenced | |
default_program(88): warning: variable "t2_linearIndex" was declared but never referenced | |
default_program(90): warning: variable "t3_linearIndex" was declared but never referenced | |
default_program(96): warning: variable "n3" was declared but never referenced | |
58 errors detected in the compilation of "default_program". | |
====================================================================== | |
ERROR: test_nvfuser_correctness__masked_var_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness__masked_var_cuda_float32! Caching allocator allocated memory was 556544 and is now reported as 694784 on device 0. CUDA driver allocated memory was 1730150400 and is now 1732247552. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_allclose_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 365, in traced_fn | |
traced = torch.jit.trace(fn_tensors, inputs_tensors, check_trace=False) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_trace.py", line 786, in trace | |
traced = torch._C._create_function_from_trace( | |
RuntimeError: Only tensors, lists, tuples of tensors, or dictionary of tensors can be output from traced functions | |
====================================================================== | |
ERROR: test_nvfuser_correctness_allclose_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 365, in traced_fn | |
traced = torch.jit.trace(fn_tensors, inputs_tensors, check_trace=False) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_trace.py", line 786, in trace | |
traced = torch._C._create_function_from_trace( | |
RuntimeError: Only tensors, lists, tuples of tensors, or dictionary of tensors can be output from traced functions | |
====================================================================== | |
ERROR: test_nvfuser_correctness_gradient_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 365, in traced_fn | |
traced = torch.jit.trace(fn_tensors, inputs_tensors, check_trace=False) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_trace.py", line 786, in trace | |
traced = torch._C._create_function_from_trace( | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 354, in new_fn | |
return fn(*(args[i] if s == 's' else next(tensors) for i, s in enumerate(source)), **kwargs) | |
RuntimeError: Tracing a list of arbitrary type is currently not supported! | |
====================================================================== | |
ERROR: test_nvfuser_correctness_gradient_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 365, in traced_fn | |
traced = torch.jit.trace(fn_tensors, inputs_tensors, check_trace=False) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_trace.py", line 786, in trace | |
traced = torch._C._create_function_from_trace( | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 354, in new_fn | |
return fn(*(args[i] if s == 's' else next(tensors) for i, s in enumerate(source)), **kwargs) | |
RuntimeError: Tracing a list of arbitrary type is currently not supported! | |
====================================================================== | |
ERROR: test_nvfuser_correctness_istft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_istft_cuda_float32! Caching allocator allocated memory was 703488 and is now reported as 704000 on device 0. CUDA driver allocated memory was 2176843776 and is now 2178940928. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_linalg_pinv_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_linalg_pinv_cuda_float32! Caching allocator allocated memory was 708096 and is now reported as 712192 on device 0. CUDA driver allocated memory was 2178940928 and is now 2181038080. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_linalg_tensorsolve_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 788, in dep_fn | |
return fn(slf, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3224, in test_nvfuser_correctness | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
RuntimeError: Calling torch.lu_solve on a CUDA tensor requires compiling PyTorch with MAGMA. Please rebuild with MAGMA. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_linalg_tensorsolve_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 788, in dep_fn | |
return fn(slf, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3224, in test_nvfuser_correctness | |
ref = variant(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
RuntimeError: Calling torch.lu_solve on a CUDA tensor requires compiling PyTorch with MAGMA. Please rebuild with MAGMA. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_nn_functional_nll_loss_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_nn_functional_nll_loss_cuda_float32! Caching allocator allocated memory was 732672 and is now reported as 746496 on device 0. CUDA driver allocated memory was 2692743168 and is now 2694840320. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_nn_functional_prelu_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_nn_functional_prelu_cuda_float32! Caching allocator allocated memory was 746496 and is now reported as 755712 on device 0. CUDA driver allocated memory was 2694840320 and is now 2696937472. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_nn_functional_softsign_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 81, in _script_method_graph_for | |
dbs = parent.get_debug_state() | |
RuntimeError: optimized_plan_INTERNAL ASSERT FAILED at "../../torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp":625, please report a bug to PyTorch. | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 369, in traced_fn | |
traced_fn.last_graph = traced.graph_for(*inputs_tensors) # type: ignore[attr-defined] | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 77, in _graph_for | |
return _script_method_graph_for(self, self, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 106, in _script_method_graph_for | |
self(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 418, in prof_func_call | |
return prof_callable(func_call, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 415, in prof_callable | |
return callable(*args, **kwargs) | |
RuntimeError: The following operation failed in the TorchScript interpreter. | |
Traceback of TorchScript (most recent call last): | |
RuntimeError: default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(34): error: name followed by "::" must be a class or namespace name | |
default_program(34): error: expected an identifier | |
default_program(34): error: expected a ";" | |
default_program(36): error: name followed by "::" must be a class or namespace name | |
default_program(36): error: expected an identifier | |
default_program(36): error: expected a ";" | |
default_program(58): error: identifier "t0_buf" is undefined | |
default_program(58): error: identifier "t0" is undefined | |
default_program(63): error: name followed by "::" must be a class or namespace name | |
default_program(63): error: expected an identifier | |
default_program(63): error: expected a ";" | |
default_program(65): error: identifier "n0" is undefined | |
default_program(67): error: name followed by "::" must be a class or namespace name | |
default_program(67): error: expected an identifier | |
default_program(67): error: expected a ";" | |
default_program(68): error: identifier "t1_buf" is undefined | |
default_program(68): error: identifier "n4" is undefined | |
default_program(72): error: identifier "t1" is undefined | |
default_program(72): error: identifier "t1_buf" is undefined | |
default_program(94): error: name followed by "::" must be a class or namespace name | |
default_program(94): error: expected an identifier | |
default_program(94): error: expected a ";" | |
default_program(96): error: identifier "n0" is undefined | |
default_program(98): error: name followed by "::" must be a class or namespace name | |
default_program(98): error: expected an identifier | |
default_program(98): error: expected a ";" | |
default_program(99): error: identifier "t1" is undefined | |
default_program(99): error: identifier "n4" is undefined | |
38 errors detected in the compilation of "default_program". | |
====================================================================== | |
ERROR: test_nvfuser_correctness_nn_functional_tanhshrink_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 81, in _script_method_graph_for | |
dbs = parent.get_debug_state() | |
RuntimeError: optimized_plan_INTERNAL ASSERT FAILED at "../../torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp":625, please report a bug to PyTorch. | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 381, in instantiated_test | |
raise rte | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 369, in traced_fn | |
traced_fn.last_graph = traced.graph_for(*inputs_tensors) # type: ignore[attr-defined] | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 77, in _graph_for | |
return _script_method_graph_for(self, self, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/jit/_fuser.py", line 106, in _script_method_graph_for | |
self(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 418, in prof_func_call | |
return prof_callable(func_call, *args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 415, in prof_callable | |
return callable(*args, **kwargs) | |
RuntimeError: The following operation failed in the TorchScript interpreter. | |
Traceback of TorchScript (most recent call last): | |
RuntimeError: default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(30): error: name followed by "::" must be a class or namespace name | |
default_program(30): error: expected an identifier | |
default_program(30): error: invalid combination of type specifiers | |
default_program(30): error: too few arguments for class template "TensorInfo" | |
default_program(30): error: expected a type specifier | |
default_program(34): error: name followed by "::" must be a class or namespace name | |
default_program(34): error: expected an identifier | |
default_program(34): error: expected a ";" | |
default_program(36): error: name followed by "::" must be a class or namespace name | |
default_program(36): error: expected an identifier | |
default_program(36): error: expected a ";" | |
default_program(58): error: identifier "t0_buf" is undefined | |
default_program(58): error: identifier "t0" is undefined | |
default_program(63): error: name followed by "::" must be a class or namespace name | |
default_program(63): error: expected an identifier | |
default_program(63): error: expected a ";" | |
default_program(65): error: name followed by "::" must be a class or namespace name | |
default_program(65): error: expected an identifier | |
default_program(65): error: expected a ";" | |
default_program(66): error: name followed by "::" must be a class or namespace name | |
default_program(66): error: expected an identifier | |
default_program(66): error: expected a ";" | |
default_program(67): error: identifier "t1_buf" is undefined | |
default_program(67): error: identifier "n3" is undefined | |
default_program(64): warning: variable "n1" was declared but never referenced | |
default_program(71): error: identifier "t1" is undefined | |
default_program(71): error: identifier "t1_buf" is undefined | |
default_program(93): error: name followed by "::" must be a class or namespace name | |
default_program(93): error: expected an identifier | |
default_program(93): error: expected a ";" | |
default_program(95): error: name followed by "::" must be a class or namespace name | |
default_program(95): error: expected an identifier | |
default_program(95): error: expected a ";" | |
default_program(96): error: name followed by "::" must be a class or namespace name | |
default_program(96): error: expected an identifier | |
default_program(96): error: expected a ";" | |
default_program(97): error: identifier "t1" is undefined | |
default_program(97): error: identifier "n3" is undefined | |
default_program(94): warning: variable "n1" was declared but never referenced | |
42 errors detected in the compilation of "default_program". | |
====================================================================== | |
ERROR: test_nvfuser_correctness_repeat_interleave_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_repeat_interleave_cuda_complex64! Caching allocator allocated memory was 755712 and is now reported as 756224 on device 0. CUDA driver allocated memory was 2696937472 and is now 2699034624. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_repeat_interleave_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_repeat_interleave_cuda_float32! Caching allocator allocated memory was 756224 and is now reported as 756736 on device 0. CUDA driver allocated memory was 2699034624 and is now 2701131776. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_searchsorted_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_searchsorted_cuda_float32! Caching allocator allocated memory was 756736 and is now reported as 834560 on device 0. CUDA driver allocated memory was 2701131776 and is now 2703228928. | |
====================================================================== | |
ERROR: test_nvfuser_correctness_stft_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1324, in __exit__ | |
raise RuntimeError(msg) | |
RuntimeError: CUDA driver API confirmed a leak in __main__.TestCudaFuserOpInfoCUDA.test_nvfuser_correctness_stft_cuda_float32! Caching allocator allocated memory was 835072 and is now reported as 835584 on device 0. CUDA driver allocated memory was 2703228928 and is now 2705326080. | |
====================================================================== | |
FAIL: test_nvfuser_correctness_empty_like_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 366, in traced_fn | |
self.assertExportImport(traced.graph, inputs_tensors) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 148, in assertExportImport | |
self.assertExportImportModule(m, inputs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 154, in assertExportImportModule | |
self.assertEqual(a, b, "Results of original model and " | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 2078, in assertEqual | |
super().assertTrue(result, msg=self._get_assert_msg(msg, debug_msg=debug_msg)) | |
AssertionError: False is not true : Tensors failed to compare as equal!With rtol=1.3e-06 and atol=1e-05, found 1 element(s) (out of 1) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 13.124701040829311 ((5.323528289794922-4.462103843688965j) vs. (1.7476329803466797+8.166069030761719j)), which occurred at index 0. | |
Results of original model and exported/imported version of model differed | |
====================================================================== | |
FAIL: test_nvfuser_correctness_empty_like_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 366, in traced_fn | |
self.assertExportImport(traced.graph, inputs_tensors) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 148, in assertExportImport | |
self.assertExportImportModule(m, inputs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 154, in assertExportImportModule | |
self.assertEqual(a, b, "Results of original model and " | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 2078, in assertEqual | |
super().assertTrue(result, msg=self._get_assert_msg(msg, debug_msg=debug_msg)) | |
AssertionError: False is not true : Tensors failed to compare as equal!With rtol=1.3e-06 and atol=1e-05, found 1 element(s) (out of 1) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 8.957265615463257 (-6.603706359863281 vs. 2.3535592555999756), which occurred at index 0. | |
Results of original model and exported/imported version of model differed | |
====================================================================== | |
FAIL: test_nvfuser_correctness_new_empty_cuda_complex64 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 366, in traced_fn | |
self.assertExportImport(traced.graph, inputs_tensors) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 148, in assertExportImport | |
self.assertExportImportModule(m, inputs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 154, in assertExportImportModule | |
self.assertEqual(a, b, "Results of original model and " | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 2078, in assertEqual | |
super().assertTrue(result, msg=self._get_assert_msg(msg, debug_msg=debug_msg)) | |
AssertionError: False is not true : Tensors failed to compare as equal!With rtol=1.3e-06 and atol=1e-05, found 1 element(s) (out of 1) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 7.400782869960298 ((-7.351451396942139+0.5127878189086914j) vs. (-0.12964248657226562+2.130521774291992j)), which occurred at index 0. | |
Results of original model and exported/imported version of model differed | |
====================================================================== | |
FAIL: test_nvfuser_correctness_new_empty_cuda_float32 (__main__.TestCudaFuserOpInfoCUDA) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 1553, in wrapper | |
method(*args, **kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 376, in instantiated_test | |
result = test(self, **param_kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_device_type.py", line 753, in test_wrapper | |
return test(*args, **kwargs) | |
File "/data/home/dberard/repos/pytorch/test/test_jit_cuda_fuser.py", line 3226, in test_nvfuser_correctness | |
trace(*clone_inputs((sample.input, *sample.args)), **sample.kwargs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/jit_metaprogramming_utils.py", line 366, in traced_fn | |
self.assertExportImport(traced.graph, inputs_tensors) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 148, in assertExportImport | |
self.assertExportImportModule(m, inputs) | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_jit.py", line 154, in assertExportImportModule | |
self.assertEqual(a, b, "Results of original model and " | |
File "/data/home/dberard/.local/lib/python3.9/site-packages/torch/testing/_internal/common_utils.py", line 2078, in assertEqual | |
super().assertTrue(result, msg=self._get_assert_msg(msg, debug_msg=debug_msg)) | |
AssertionError: False is not true : Tensors failed to compare as equal!With rtol=1.3e-06 and atol=1e-05, found 1 element(s) (out of 1) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 0.8986730575561523 (-0.8986730575561523 vs. 0.0), which occurred at index 0. | |
Results of original model and exported/imported version of model differed | |
---------------------------------------------------------------------- | |
Ran 832 tests in 1222.255s | |
FAILED (failures=4, errors=35, skipped=57) | |
srun: error: dev-st-p4d24xlarge-5: task 0: Exited with exit code 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment