Last active
December 14, 2022 10:18
-
-
Save eldar/97390726588eecd0cffa615dc194d126 to your computer and use it in GitHub Desktop.
Torch Dynamo bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Running DDP training on rank 1. | |
Running DDP training on rank 0. | |
0%| | 0/1000 [00:00<?, ?it/s] | |
0%| | 0/1000 [00:00<?, ?it/s][2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping enable_dynamic /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping inner /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __getitem__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping Optional /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __repr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping enable_dynamic /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _type_check /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _type_convert /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping inner /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __getitem__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __eq__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping Optional /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __hash__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __repr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping Union /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _type_check /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping <genexpr> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _type_convert /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _remove_dups_flatten /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _deduplicate /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __eq__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __hash__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping __setattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping Union /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,736] torch._dynamo.eval_frame: [DEBUG] skipping _is_dunder /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping <genexpr> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping <genexpr> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping _remove_dups_flatten /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping _deduplicate /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __setattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping _is_dunder /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping <genexpr> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/typing.py | |
[2022-12-14 10:11:05,737] torch._dynamo.convert_frame: [DEBUG] skipping because no torch.* _collect_type_vars /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/typing_extensions.py 123 | |
[2022-12-14 10:11:05,737] torch._dynamo.convert_frame: [DEBUG] skipping because no torch.* _should_collect_from_parameters /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/typing_extensions.py 111 | |
[2022-12-14 10:11:05,737] torch._dynamo.convert_frame: [DEBUG] skipping because no torch.* _collect_type_vars /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/typing_extensions.py 123 | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping annotate /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/jit/__init__.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __call__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:05,737] torch._dynamo.convert_frame: [DEBUG] skipping because no torch.* _should_collect_from_parameters /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/typing_extensions.py 111 | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __setattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __instancecheck__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parameter.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping annotate /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/jit/__init__.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping notify_join_context /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/algorithms/join.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __call__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __getattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping _check_sync_bufs_pre_fwd /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping will_sync_module_buffers /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __setattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping _run_ddp_forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,737] torch._dynamo.eval_frame: [DEBUG] skipping __instancecheck__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parameter.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _to_kwargs /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _recursive_to /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping to_map /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping notify_join_context /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/algorithms/join.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _is_namedtuple /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping __getattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping <listcomp> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _check_sync_bufs_pre_fwd /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping will_sync_module_buffers /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _inside_ddp_forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _run_ddp_forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _call_impl /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _to_kwargs /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _recursive_to /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping to_map /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _is_namedtuple /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping <listcomp> /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/utils.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _inside_ddp_forward /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py | |
[2022-12-14 10:11:05,738] torch._dynamo.eval_frame: [DEBUG] skipping _call_impl /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py | |
[2022-12-14 10:11:05,742] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:52 | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL F [] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:52 | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL F [] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR embedding [TorchVariable(<module 'torch.nn.functional' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/functional.py'>)] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR embedding [TorchVariable(<module 'torch.nn.functional' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/functional.py'>)] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST idx [TorchVariable(<function embedding at 0x7f8d2e660670>)] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST self [TorchVariable(<function embedding at 0x7f8d2e660670>), TensorVariable()] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR pose_update_r [TorchVariable(<function embedding at 0x7f8d2e660670>), TensorVariable(), NNModuleVariable()] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST idx [TorchVariable(<function embedding at 0x7f25ea230670>)] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST self [TorchVariable(<function embedding at 0x7f25ea230670>), TensorVariable()] | |
[2022-12-14 10:11:05,743] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR pose_update_r [TorchVariable(<function embedding at 0x7f25ea230670>), TensorVariable(), NNModuleVariable()] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 0.175 [TorchVariable(<function embedding at 0x7f8d2e660670>), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST ('max_norm',) [TorchVariable(<function embedding at 0x7f8d2e660670>), TensorVariable(), TensorVariable(), ConstantVariable(float)] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION_KW 3 [TorchVariable(<function embedding at 0x7f8d2e660670>), TensorVariable(), TensorVariable(), ConstantVariable(float), ConstantVariable(tuple)] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 0.175 [TorchVariable(<function embedding at 0x7f25ea230670>), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST ('max_norm',) [TorchVariable(<function embedding at 0x7f25ea230670>), TensorVariable(), TensorVariable(), ConstantVariable(float)] | |
[2022-12-14 10:11:05,745] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION_KW 3 [TorchVariable(<function embedding at 0x7f25ea230670>), TensorVariable(), TensorVariable(), ConstantVariable(float), ConstantVariable(tuple)] | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST r [TensorVariable()] | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:54 | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL dist [] | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST r [TensorVariable()] | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:54 | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR get_rank [TorchVariable(<module 'torch.distributed' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/__init__.py'>)] | |
[2022-12-14 10:11:05,747] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL dist [] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR get_rank [TorchVariable(<module 'torch.distributed' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/__init__.py'>)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 0 [TorchVariable(<function get_rank at 0x7f8d2e5be050>)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 0 [TorchVariable(<function get_rank at 0x7f25ea18e050>)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 1 [ConstantVariable(int)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE COMPARE_OP == [ConstantVariable(int), ConstantVariable(int)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE POP_JUMP_IF_FALSE 50 [ConstantVariable(bool)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 1 [ConstantVariable(int)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:56 | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL get_matrix [] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE COMPARE_OP == [ConstantVariable(int), ConstantVariable(int)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE POP_JUMP_IF_FALSE 50 [ConstantVariable(bool)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:55 | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL print [] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST r.device before call: [BuiltinVariable(print)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST r [BuiltinVariable(print), ConstantVariable(str)] | |
[2022-12-14 10:11:05,748] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR device [BuiltinVariable(print), ConstantVariable(str), TensorVariable()] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST rank: [BuiltinVariable(print), ConstantVariable(str), TorchVariable(cuda:1)] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL dist [BuiltinVariable(print), ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str)] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST r [UserFunctionVariable()] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 1 [UserFunctionVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR get_rank [BuiltinVariable(print), ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str), TorchVariable(<module 'torch.distributed' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/__init__.py'>)] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 0 [BuiltinVariable(print), ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str), TorchVariable(<function get_rank at 0x7f25ea18e050>)] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] INLINING <code object get_matrix at 0x7f8dd0353b50, file "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 25> | |
26 0 LOAD_GLOBAL 0 (dist) | |
2 LOAD_METHOD 1 (get_rank) | |
4 CALL_METHOD 0 | |
6 LOAD_CONST 1 (1) | |
8 COMPARE_OP 2 (==) | |
10 POP_JUMP_IF_FALSE 16 (to 32) | |
27 12 LOAD_GLOBAL 2 (print) | |
14 LOAD_CONST 2 ('r.device inside call:') | |
16 LOAD_FAST 0 (r) | |
18 LOAD_ATTR 3 (device) | |
20 LOAD_CONST 3 ('rank:') | |
22 LOAD_GLOBAL 0 (dist) | |
24 LOAD_METHOD 1 (get_rank) | |
26 CALL_METHOD 0 | |
28 CALL_FUNCTION 4 | |
30 POP_TOP | |
29 >> 32 LOAD_FAST 0 (r) | |
34 LOAD_METHOD 4 (unbind) | |
36 LOAD_CONST 4 (-1) | |
38 CALL_METHOD 1 | |
40 UNPACK_SEQUENCE 3 | |
42 STORE_FAST 1 (x) | |
44 STORE_FAST 2 (y) | |
46 STORE_FAST 3 (z) | |
30 48 LOAD_GLOBAL 5 (torch) | |
50 LOAD_ATTR 6 (stack) | |
52 LOAD_FAST 3 (z) | |
54 LOAD_FAST 2 (y) | |
31 56 LOAD_FAST 3 (z) | |
58 LOAD_FAST 1 (x) | |
32 60 LOAD_FAST 2 (y) | |
62 LOAD_FAST 1 (x) | |
30 64 BUILD_LIST 6 | |
32 66 LOAD_CONST 4 (-1) | |
30 68 LOAD_CONST 5 (('dim',)) | |
70 CALL_FUNCTION_KW 2 | |
72 STORE_FAST 4 (R) | |
33 74 LOAD_FAST 4 (R) | |
76 LOAD_ATTR 7 (reshape) | |
78 BUILD_LIST 0 | |
80 LOAD_FAST 1 (x) | |
82 LOAD_ATTR 8 (shape) | |
84 LIST_EXTEND 1 | |
86 LOAD_CONST 6 (3) | |
88 LIST_APPEND 1 | |
90 LOAD_CONST 7 (2) | |
92 LIST_APPEND 1 | |
94 LIST_TO_TUPLE | |
96 CALL_FUNCTION_EX 0 | |
98 RETURN_VALUE | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 4 [BuiltinVariable(print), ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str), ConstantVariable(int)] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:26 | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL dist [] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR get_rank [TorchVariable(<module 'torch.distributed' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/distributed/__init__.py'>)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 0 [TorchVariable(<function get_rank at 0x7f8d2e5be050>)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 1 [ConstantVariable(int)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE COMPARE_OP == [ConstantVariable(int), ConstantVariable(int)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE POP_JUMP_IF_FALSE 32 [ConstantVariable(bool)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:29 | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST r [] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR unbind [TensorVariable()] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST -1 [GetAttrVariable(TensorVariable(), unbind)] | |
[2022-12-14 10:11:05,750] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 1 [GetAttrVariable(TensorVariable(), unbind), ConstantVariable(int)] | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE UNPACK_SEQUENCE 3 [TupleVariable()] | |
[2022-12-14 10:11:05,749] torch._dynamo.symbolic_convert: [DEBUG] break_graph_if_unsupported triggered compile | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 306, in wrapper | |
return inner_fn(self, inst) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 965, in CALL_FUNCTION | |
self.call_function(fn, args, {}) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 434, in call_function | |
self.push(fn.call_function(self, args, kwargs)) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/variables/builtin.py", line 375, in call_function | |
return super().call_function(tx, args, kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/variables/base.py", line 230, in call_function | |
unimplemented(f"call_function {self} {args} {kwargs}") | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/exc.py", line 67, in unimplemented | |
raise Unsupported(msg) | |
torch._dynamo.exc.Unsupported: call_function BuiltinVariable(print) [ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str), ConstantVariable(int)] {} | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST x [TensorVariable(), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,752] torch._dynamo.output_graph: [DEBUG] restore_graphstate: removed 0 nodes | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST y [TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,752] torch._dynamo.output_graph: [DEBUG] COMPILING GRAPH due to GraphCompileReason(reason='call_function BuiltinVariable(print) [ConstantVariable(str), TorchVariable(cuda:1), ConstantVariable(str), ConstantVariable(int)] {}', user_stack=[<FrameSummary file /users/eldar/src/hybridrf/train_dynamo_bug.py, line 55 in forward>]) | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST z [TensorVariable()] | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:30 | |
[2022-12-14 10:11:05,752] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_GLOBAL torch [] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR stack [TorchVariable(<module 'torch' from '/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/__init__.py'>)] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST z [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>)] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST y [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:31 | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST z [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST x [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable(), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:32 | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST y [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST x [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:30 | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE BUILD_LIST 6 [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:32 | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST -1 [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), ListVariable()] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:30 | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST ('dim',) [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), ListVariable(), ConstantVariable(int)] | |
[2022-12-14 10:11:05,753] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION_KW 2 [TorchVariable(<built-in method stack of type object at 0x7f8daf0d0b40>), ListVariable(), ConstantVariable(int), ConstantVariable(tuple)] | |
[2022-12-14 10:11:05,754] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function compile_fn | |
[2022-12-14 10:11:05,754] torch._dynamo.optimizations.distributed: [INFO] DDPOptimizer used bucket cap 26214400 and produced the following buckets: | |
[2022-12-14 10:11:05,754] torch._dynamo.optimizations.distributed: [INFO] Please `pip install tabulate` in order to pretty-print ddp bucket sizes | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST R [TensorVariable()] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:33 | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST R [] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR reshape [TensorVariable()] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE BUILD_LIST 0 [GetAttrVariable(TensorVariable(), reshape)] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST x [GetAttrVariable(TensorVariable(), reshape), ListVariable()] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR shape [GetAttrVariable(TensorVariable(), reshape), ListVariable(), TensorVariable()] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LIST_EXTEND 1 [GetAttrVariable(TensorVariable(), reshape), ListVariable(), ShapeVariable()] | |
[2022-12-14 10:11:05,755] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 3 [GetAttrVariable(TensorVariable(), reshape), ListVariable()] | |
[2022-12-14 10:11:05,756] torch._dynamo.symbolic_convert: [DEBUG] TRACE LIST_APPEND 1 [GetAttrVariable(TensorVariable(), reshape), ListVariable(), ConstantVariable(int)] | |
[2022-12-14 10:11:05,756] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_CONST 2 [GetAttrVariable(TensorVariable(), reshape), ListVariable()] | |
[2022-12-14 10:11:05,756] torch._dynamo.symbolic_convert: [DEBUG] TRACE LIST_APPEND 1 [GetAttrVariable(TensorVariable(), reshape), ListVariable(), ConstantVariable(int)] | |
[2022-12-14 10:11:05,756] torch._dynamo.symbolic_convert: [DEBUG] TRACE LIST_TO_TUPLE None [GetAttrVariable(TensorVariable(), reshape), ListVariable()] | |
[2022-12-14 10:11:05,756] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION_EX 0 [GetAttrVariable(TensorVariable(), reshape), TupleVariable()] | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] TRACE RETURN_VALUE None [TensorVariable()] | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] DONE INLINING <code object get_matrix at 0x7f8dd0353b50, file "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 25> | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST T [TensorVariable()] | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /users/eldar/src/hybridrf/train_dynamo_bug.py:57 | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST T [] | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] TRACE RETURN_VALUE None [TensorVariable()] | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo done tracing forward (RETURN_VALUE) | |
[2022-12-14 10:11:05,757] torch._dynamo.symbolic_convert: [DEBUG] RETURN_VALUE triggered compile | |
[2022-12-14 10:11:05,757] torch._dynamo.output_graph: [DEBUG] COMPILING GRAPH due to None | |
[2022-12-14 10:11:05,759] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function compile_fn | |
[2022-12-14 10:11:05,759] torch._dynamo.optimizations.distributed: [INFO] DDPOptimizer used bucket cap 26214400 and produced the following buckets: | |
[2022-12-14 10:11:05,759] torch._dynamo.optimizations.distributed: [INFO] Please `pip install tabulate` in order to pretty-print ddp bucket sizes | |
[2022-12-14 10:11:05,996] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling FORWARDS graph 0 | |
[2022-12-14 10:11:06,003] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling FORWARDS graph 0 | |
[2022-12-14 10:11:06,032] torch._inductor.graph: [WARNING] Creating implicit fallback for: | |
target: aten.embedding_renorm.default | |
args[0]: TensorBox(StorageBox( | |
Pointwise( | |
'cuda', | |
torch.float32, | |
load(primals_1, i1 + 3 * i0), | |
ranges=[150000, 3], | |
origins={clone, primals_1} | |
) | |
)) | |
args[1]: TensorBox(StorageBox( | |
InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[8], stride=[1])) | |
)) | |
args[2]: 0.175 | |
args[3]: 2.0 | |
[2022-12-14 10:11:06,032] torch._inductor.graph: [WARNING] Creating implicit fallback for: | |
target: aten.embedding_renorm.default | |
args[0]: TensorBox(StorageBox( | |
Pointwise( | |
'cuda', | |
torch.float32, | |
load(primals_1, i1 + 3 * i0), | |
ranges=[150000, 3], | |
origins={clone, primals_1} | |
) | |
)) | |
args[1]: TensorBox(StorageBox( | |
InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[8], stride=[1])) | |
)) | |
args[2]: 0.175 | |
args[3]: 2.0 | |
[2022-12-14 10:11:06,037] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.embedding_renorm.default | |
[2022-12-14 10:11:06,038] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.embedding_renorm.default | |
[2022-12-14 10:11:06,513] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling FORWARDS graph 0 | |
[2022-12-14 10:11:06,514] torch._dynamo.output_graph: [INFO] Step 2: done compiler function compile_fn | |
[2022-12-14 10:11:06,515] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling FORWARDS graph 0 | |
[2022-12-14 10:11:06,516] torch._dynamo.output_graph: [INFO] Step 2: done compiler function compile_fn | |
[2022-12-14 10:11:06,679] torch._dynamo.eval_frame: [DEBUG] skipping _fn /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:06,679] torch._dynamo.eval_frame: [DEBUG] skipping nothing /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:06,679] torch._dynamo.eval_frame: [DEBUG] skipping _fn /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:06,679] torch._dynamo.eval_frame: [DEBUG] skipping nothing /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping is_scripting /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_jit_internal.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __getattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:06,682] torch._dynamo.eval_frame: [DEBUG] skipping __call__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
0%| | 0/1000 [00:00<?, ?it/s] | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __exit__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/autograd/profiler.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping is_scripting /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_jit_internal.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __getattr__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
[2022-12-14 10:11:06,683] torch._dynamo.eval_frame: [DEBUG] skipping __call__ /users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_ops.py | |
0%| | 0/1000 [00:00<?, ?it/s] | |
Traceback (most recent call last): | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 81, in <module> | |
mp.spawn(train, | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn | |
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes | |
while not context.join(): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 160, in join | |
raise ProcessRaisedException(msg, error_index, failed_process.pid) | |
torch.multiprocessing.spawn.ProcessRaisedException: | |
-- Process 0 terminated with the following error: | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap | |
fn(i, *args) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 74, in train | |
T = model_ddp(idx) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 82, in forward | |
return self.dynamo_ctx(self._orig_mod.forward)(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 211, in _fn | |
return fn(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1098, in forward | |
output = self._run_ddp_forward(*inputs, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1051, in _run_ddp_forward | |
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore[index] | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 44, in forward | |
def forward(self, idx): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 211, in _fn | |
return fn(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2343, in forward | |
return compiled_fn(full_args) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 887, in g | |
return f(*args) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1906, in debug_compiled_function | |
return compiled_function(*args) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1802, in compiled_function | |
original_inpt.copy_(updated_inpt) | |
RuntimeError: a leaf Variable that requires grad is being used in an in-place operation. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from tqdm import tqdm | |
import torch | |
import torch._dynamo | |
from torch import nn | |
import torch.nn.functional as F | |
import torch.multiprocessing as mp | |
import torch.distributed as dist | |
from torch.nn.parallel import DistributedDataParallel as DDP | |
def setup(rank, world_size): | |
os.environ['MASTER_ADDR'] = 'localhost' | |
os.environ['MASTER_PORT'] = '12355' | |
# initialize the process group | |
dist.init_process_group("nccl", rank=rank, world_size=world_size) | |
def cleanup(): | |
dist.destroy_process_group() | |
def get_matrix(r): | |
if dist.get_rank() == 1: | |
print("r.device inside call:", r.device, "rank:", dist.get_rank()) | |
x, y, z = r.unbind(-1) | |
R = torch.stack([-z, y, | |
z, -x, | |
-y, x], dim=-1) | |
return R.reshape(*x.shape, 3, 2) | |
class Model(nn.Module): | |
def __init__(self): | |
super().__init__() | |
num_views = 150000 | |
self.pose_update_r = nn.Parameter(torch.zeros(num_views, 3)) | |
nn.init.zeros_(self.pose_update_r) | |
def forward(self, idx): | |
r = F.embedding(idx, self.pose_update_r, max_norm=0.175) # (B, 3) | |
if dist.get_rank() == 1: | |
print("r.device before call:", r.device, "rank:", dist.get_rank()) | |
T = get_matrix(r) | |
return T | |
def train(rank, world_size): | |
print(f"Running DDP training on rank {rank}.") | |
setup(rank, world_size) | |
torch.cuda.set_device(rank) | |
torch._dynamo.config.log_level = logging.DEBUG | |
# create model and move it to GPU with id rank | |
model = Model().to(rank) | |
model_ddp = DDP(model, device_ids=[rank], find_unused_parameters=True) | |
model_ddp = torch.compile(model_ddp) | |
for _ in tqdm(range(1000)): | |
idx = torch.randint(1000, (8,)).to(rank) | |
T = model_ddp(idx) | |
cleanup() | |
if __name__ == "__main__": | |
world_size = 2 | |
mp.spawn(train, | |
args=(world_size,), | |
nprocs=world_size, | |
join=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment