Created
December 12, 2022 22:50
-
-
Save eldar/d515106872f429d17519810284381dd4 to your computer and use it in GitHub Desktop.
torch.compile bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Running DDP training on rank 1. | |
Running DDP training on rank 0. | |
r.device before call: cuda:1 rank: 1 | |
r.device inside call: cuda:0 rank: 1 | |
[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) | |
Traceback (most recent call last): | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 77, in <module> mp.spawn(train, File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes | |
while not context.join(): File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 160, in join | |
raise ProcessRaisedException(msg, error_index, failed_process.pid) | |
torch.multiprocessing.spawn.ProcessRaisedException: | |
-- Process 1 terminated with the following error: | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 76, in preserve_rng_state | |
yield | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2026, in create_aot_dispatcher_function | |
compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe | |
return compiler_fn(flat_fn, leaf_flat_args, aot_config) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd | |
compiled_fw_func = aot_config.fw_compiler( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 369, in fw_compiler | |
return inner_compile( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper | |
compiled_fn = compiler_fn(gm, example_inputs, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/debug.py", line 224, in inner | |
return fn(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py", line 79, in inner | |
return func(*args, **kwds) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner | |
compiled_fn = graph.compile_to_fn() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/graph.py", line 503, in compile_to_fn | |
return self.compile_to_module().call | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/graph.py", line 492, in compile_to_module | |
mod = PyCodeCache.load(code) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 459, in load | |
exec(code, mod.__dict__, mod.__dict__) | |
File "/tmp/torchinductor_eldar/x6/cx6hqno5xt6tqkv23eqbcsqgzlm3vrqwgsu5rx5nooh6ksncvzv7.py", line 129, in <module> | |
async_compile.wait(globals()) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 642, in wait | |
scope[key] = result.result() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 513, in result | |
kernel = self.kernel = _load_kernel(self.source_code) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 493, in _load_kernel | |
kernel.precompile() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 59, in precompile | |
self.launchers = [ | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 60, in <listcomp> | |
self._precompile_config(c, warm_cache_only_with_cc) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 84, in _precompile_config | |
binary = triton.compile( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/triton/compiler.py", line 1268, in compile | |
return CompiledKernel(name, so_cache_manager._make_path(so_name), fn_cache_manager.cache_dir, device) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/triton/compiler.py", line 1301, in __init__ | |
mod, func, n_regs, n_spills = _triton.code_gen.load_binary(metadata["name"], self.asm["cubin"], self.shared, device) | |
RuntimeError: CUDA: Error- illegal address | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 637, in call_user_compiler | |
compiled_fn = compiler_fn(gm, self.fake_example_inputs()) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/optimizations/distributed.py", line 189, in compile_fn | |
return self.backend_compile_fn(gm, example_inputs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper | |
compiled_gm = compiler_fn(gm, example_inputs, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/__init__.py", line 1204, in _compile_fn | |
return compile_fn(model_, inputs_) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 394, in compile_fx | |
return aot_autograd( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn | |
cg = aot_module_simplified(gm, example_inputs, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2329, in aot_module_simplified | |
compiled_fn = create_aot_dispatcher_function( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1980, in create_aot_dispatcher_function | |
with torch.autograd.set_multithreading_enabled( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py", line 153, in __exit__ | |
self.gen.throw(typ, value, traceback) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 80, in preserve_rng_state | |
torch.cuda.set_rng_state(cuda_rng_state) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 64, in set_rng_state | |
_lazy_call(cb) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/__init__.py", line 176, in _lazy_call | |
callable() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 62, in cb | |
default_generator.set_state(new_state_copy) | |
RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037405088 | |
The above exception was the direct cause of the following exception: | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 103, in _fn | |
return fn(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert | |
return _compile( | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 398, in _compile | |
out_code = transform_code_object(code, transform) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object | |
transformations(instructions, code_options) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 385, in transform | |
tracer.run() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 1686, in run | |
super().run() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 537, in run | |
and self.step() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 500, in step | |
getattr(self, inst.opname)(inst) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 1752, in RETURN_VALUE | |
self.output.compile_subgraph(self) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 490, in compile_subgraph | |
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 561, in compile_and_call_fx_graph | |
compiled_fn = self.call_user_compiler(gm) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 642, in call_user_compiler | |
raise BackendCompilerFailed(self.compiler_fn, e) from e | |
torch._dynamo.exc.BackendCompilerFailed: compile_fn raised RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037405088 | |
Set torch._dynamo.config.verbose=True for more information | |
You can suppress this exception and fall back to eager by setting: | |
torch._dynamo.config.suppress_errors = True | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap | |
fn(i, *args) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 70, in train | |
T = model_ddp(idx) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 82, in forward | |
return self.dynamo_ctx(self._orig_mod.forward)(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 211, in _fn | |
return fn(*args, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1098, in forward | |
output = self._run_ddp_forward(*inputs, **kwargs) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1051, in _run_ddp_forward | |
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore[index] | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 54, in forward | |
print("r.device before call:", r.device, "rank:", dist.get_rank()) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 55, in <graph break in forward> | |
T = get_matrix(r) | |
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 25, in get_matrix | |
print("r.device inside call:", r.device, "rank:", dist.get_rank()) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 329, in catch_errors | |
return hijacked_callback(frame, cache_size, hooks) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 479, in _convert_frame | |
result = inner_convert(frame, cache_size, hooks) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 108, in _fn | |
torch.cuda.set_rng_state(cuda_rng_state) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 64, in set_rng_state | |
_lazy_call(cb) | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/__init__.py", line 176, in _lazy_call | |
callable() | |
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 62, in cb | |
default_generator.set_state(new_state_copy) | |
RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037418240 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment