Skip to content

Instantly share code, notes, and snippets.

@eldar
Created December 12, 2022 22:50
Show Gist options
  • Save eldar/d515106872f429d17519810284381dd4 to your computer and use it in GitHub Desktop.
Save eldar/d515106872f429d17519810284381dd4 to your computer and use it in GitHub Desktop.
torch.compile bug
Running DDP training on rank 1.
Running DDP training on rank 0.
r.device before call: cuda:1 rank: 1
r.device inside call: cuda:0 rank: 1
[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
Traceback (most recent call last):
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 77, in <module> mp.spawn(train, File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join(): File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 1 terminated with the following error:
Traceback (most recent call last):
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 76, in preserve_rng_state
yield
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2026, in create_aot_dispatcher_function
compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
return compiler_fn(flat_fn, leaf_flat_args, aot_config)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper
r = func(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 369, in fw_compiler
return inner_compile(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/debug.py", line 224, in inner
return fn(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/graph.py", line 503, in compile_to_fn
return self.compile_to_module().call
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper
r = func(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/graph.py", line 492, in compile_to_module
mod = PyCodeCache.load(code)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 459, in load
exec(code, mod.__dict__, mod.__dict__)
File "/tmp/torchinductor_eldar/x6/cx6hqno5xt6tqkv23eqbcsqgzlm3vrqwgsu5rx5nooh6ksncvzv7.py", line 129, in <module>
async_compile.wait(globals())
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 642, in wait
scope[key] = result.result()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 513, in result
kernel = self.kernel = _load_kernel(self.source_code)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 493, in _load_kernel
kernel.precompile()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 59, in precompile
self.launchers = [
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 60, in <listcomp>
self._precompile_config(c, warm_cache_only_with_cc)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py", line 84, in _precompile_config
binary = triton.compile(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/triton/compiler.py", line 1268, in compile
return CompiledKernel(name, so_cache_manager._make_path(so_name), fn_cache_manager.cache_dir, device)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/triton/compiler.py", line 1301, in __init__
mod, func, n_regs, n_spills = _triton.code_gen.load_binary(metadata["name"], self.asm["cubin"], self.shared, device)
RuntimeError: CUDA: Error- illegal address
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 637, in call_user_compiler
compiled_fn = compiler_fn(gm, self.fake_example_inputs())
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/optimizations/distributed.py", line 189, in compile_fn
return self.backend_compile_fn(gm, example_inputs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/__init__.py", line 1204, in _compile_fn
return compile_fn(model_, inputs_)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 394, in compile_fx
return aot_autograd(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
cg = aot_module_simplified(gm, example_inputs, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2329, in aot_module_simplified
compiled_fn = create_aot_dispatcher_function(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper
r = func(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1980, in create_aot_dispatcher_function
with torch.autograd.set_multithreading_enabled(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 80, in preserve_rng_state
torch.cuda.set_rng_state(cuda_rng_state)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 64, in set_rng_state
_lazy_call(cb)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/__init__.py", line 176, in _lazy_call
callable()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 62, in cb
default_generator.set_state(new_state_copy)
RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037405088
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 103, in _fn
return fn(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 90, in time_wrapper
r = func(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
return _compile(
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 398, in _compile
out_code = transform_code_object(code, transform)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
transformations(instructions, code_options)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 385, in transform
tracer.run()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 1686, in run
super().run()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 537, in run
and self.step()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 500, in step
getattr(self, inst.opname)(inst)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 1752, in RETURN_VALUE
self.output.compile_subgraph(self)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 490, in compile_subgraph
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 561, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 642, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fn raised RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037405088
Set torch._dynamo.config.verbose=True for more information
You can suppress this exception and fall back to eager by setting:
torch._dynamo.config.suppress_errors = True
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 70, in train
T = model_ddp(idx)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl
return forward_call(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 82, in forward
return self.dynamo_ctx(self._orig_mod.forward)(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 211, in _fn
return fn(*args, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1098, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1051, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore[index]
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1482, in _call_impl
return forward_call(*args, **kwargs)
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 54, in forward
print("r.device before call:", r.device, "rank:", dist.get_rank())
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 55, in <graph break in forward>
T = get_matrix(r)
File "/users/eldar/src/hybridrf/train_dynamo_bug.py", line 25, in get_matrix
print("r.device inside call:", r.device, "rank:", dist.get_rank())
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 329, in catch_errors
return hijacked_callback(frame, cache_size, hooks)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
result = inner_convert(frame, cache_size, hooks)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 108, in _fn
torch.cuda.set_rng_state(cuda_rng_state)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 64, in set_rng_state
_lazy_call(cb)
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/__init__.py", line 176, in _lazy_call
callable()
File "/users/eldar/apps/anaconda3/envs/ca3d/lib/python3.10/site-packages/torch/cuda/random.py", line 62, in cb
default_generator.set_state(new_state_copy)
RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus2037418240
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment