ezyang/gist:6cad3b202daf5c35f93acbb4d71afa5f Secret

## gistfile0.txt
cuda train BERT_pytorch                       ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 5 ops
cuda train Background_Matting                 [2022-12-12 06:21:23,943] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <function sym_float at 0x7f6cde1a2ca0>
  args[0]: 256.0
ERROR:common:compile_fx raised LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7f6cde1a2ca0>
  args[0]: 256.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_108,), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: sym_float() missing 1 required positional argument: 'a'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7f6cde1a2ca0>
  args[0]: 256.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_108,), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7f6cde1a2ca0>
  args[0]: 256.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_108,), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 183 ops
WARNING:root:DALLE2_pytorch failed to load
Eager model failed to run
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 998, in validate_model
    self.model_iter_fn(model, example_inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in forward_and_backward_pass
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 2010, in run
    ) = runner.load_model(device, model_name, batch_size=batch_size)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 305, in load_model
    self.validate_model(model, example_inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1000, in validate_model
    raise NotImplementedError("Eager model failed to run") from e
NotImplementedError: Eager model failed to run

cuda train LearningToPaint                    ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/LearningToPaint/baseline/DRL/actor.py", line 104, in forward
    def forward(self, x):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/mh/cmhdhkraqea5462gm4idaxjnb3czmtuq2j2l2et3rzcnx6p33x7a.py", line 944, in call
    return (buf1, buf2, buf5, buf6, buf9, buf10, buf12, buf13, buf17, buf18, buf21, buf22, buf25, buf26, buf29, buf30, buf32, buf33, buf37, buf38, buf41, buf42, buf45, buf46, buf49, buf50, buf52, buf53, buf57, buf58, buf61, buf62, buf65, buf66, buf69, buf70, buf72, buf73, buf77, buf78, buf81, buf82, buf86, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_129, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf55, buf56, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf86, as_strided(primals_64, (65, 512), (512, 1)), s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 72 ops
cuda train Super_SloMo                        ERROR:common:compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_113 : [#users=1] = placeholder[target=primals_113]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'SymInt'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in <listcomp>
    size = [sympy.Integer(i) for i in ex.size()]
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 74, in wrapper
    retval = func(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/data/users/ezyang/a/pytorch/torch/__init__.py", line 242, in __int__
    return self.node.int_()
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/symbolic_shapes.py", line 210, in int_
    raise RuntimeError("Trying to extract a concrete int out of a symbolic int")
RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_113 : [#users=1] = placeholder[target=primals_113]
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_113 : [#users=1] = placeholder[target=primals_113]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 374 ops
cuda train alexnet                            ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/alexnet.py", line 47, in forward
    def forward(self, x: torch.Tensor) -> torch.Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/m2/cm2m6q3kj6mp7dnzzrlg3nuw7z7vmhps3uhynbgm4zrdsc2dzwjp.py", line 386, in call
    return (buf21, primals_1, primals_3, primals_5, primals_7, primals_9, primals_17, buf1, buf2, buf3, buf5, buf6, buf7, buf9, buf11, buf13, buf14, buf15, as_strided(buf16, (4, 9216), (9216, 1)), buf18, buf20, as_strided(primals_15, (1000, 4096), (4096, 1)), as_strided(primals_13, (4096, 4096), (4096, 1)), as_strided(primals_11, (4096, 9216), (9216, 1)), s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 22 ops
cuda train attention_is_all_you_need_pytorch  ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 11 ops
cuda train dcgan                              ERROR:common:'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=0] = placeholder[target=tangents_1]
Original traceback:
None
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 233, in placeholder
    sizes, strides = self.symbolic_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 50, in symbolic_sizes_strides
    size, stride = self._shape_env.create_symbolic_sizes_strides(ex)
AttributeError: 'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=0] = placeholder[target=tangents_1]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 13 ops
cuda train densenet121                        ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/densenet.py", line 212, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/ed/cedphlhn6ebmmibptrxt4otj3zfyv4zj57m3eonhhak6fd6ulh45.py", line 6557, in call
    return (buf1, buf2, buf6, buf7, buf10, buf11, buf16, buf17, buf20, buf21, buf28, buf29, buf32, buf33, buf41, buf42, buf45, buf46, buf55, buf56, buf59, buf60, buf70, buf71, buf74, buf75, buf86, buf87, buf91, buf92, buf95, buf96, buf101, buf102, buf105, buf106, buf113, buf114, buf117, buf118, buf126, buf127, buf130, buf131, buf140, buf141, buf144, buf145, buf155, buf156, buf159, buf160, buf171, buf172, buf175, buf176, buf188, buf189, buf192, buf193, buf206, buf207, buf210, buf211, buf225, buf226, buf229, buf230, buf245, buf246, buf249, buf250, buf266, buf267, buf270, buf271, buf288, buf289, buf293, buf294, buf297, buf298, buf303, buf304, buf307, buf308, buf315, buf316, buf319, buf320, buf328, buf329, buf332, buf333, buf342, buf343, buf346, buf347, buf357, buf358, buf361, buf362, buf373, buf374, buf377, buf378, buf390, buf391, buf394, buf395, buf408, buf409, buf412, buf413, buf427, buf428, buf431, buf432, buf447, buf448, buf451, buf452, buf468, buf469, buf472, buf473, buf490, buf491, buf494, buf495, buf513, buf514, buf517, buf518, buf537, buf538, buf541, buf542, buf562, buf563, buf566, buf567, buf588, buf589, buf592, buf593, buf615, buf616, buf619, buf620, buf643, buf644, buf647, buf648, buf672, buf673, buf676, buf677, buf702, buf703, buf706, buf707, buf733, buf734, buf737, buf738, buf765, buf766, buf769, buf770, buf798, buf799, buf802, buf803, buf832, buf833, buf837, buf838, buf841, buf842, buf847, buf848, buf851, buf852, buf859, buf860, buf863, buf864, buf872, buf873, buf876, buf877, buf886, buf887, buf890, buf891, buf901, buf902, buf905, buf906, buf917, buf918, buf921, buf922, buf934, buf935, buf938, buf939, buf952, buf953, buf956, buf957, buf971, buf972, buf975, buf976, buf991, buf992, buf995, buf996, buf1012, buf1013, buf1016, buf1017, buf1034, buf1035, buf1038, buf1039, buf1057, buf1058, buf1061, buf1062, buf1081, buf1082, buf1085, buf1086, buf1106, buf1107, buf1110, buf1111, buf1132, buf1133, buf1137, primals_1, primals_2, primals_4, primals_6, primals_7, primals_9, primals_10, primals_12, primals_13, primals_15, primals_16, primals_18, primals_19, primals_21, primals_22, primals_24, primals_25, primals_27, primals_28, primals_30, primals_31, primals_33, primals_34, primals_36, primals_37, primals_39, primals_40, primals_42, primals_43, primals_45, primals_46, primals_48, primals_49, primals_51, primals_52, primals_54, primals_55, primals_57, primals_58, primals_60, primals_61, primals_63, primals_64, primals_66, primals_67, primals_69, primals_70, primals_72, primals_73, primals_75, primals_76, primals_78, primals_79, primals_81, primals_82, primals_84, primals_85, primals_87, primals_88, primals_90, primals_91, primals_93, primals_94, primals_96, primals_97, primals_99, primals_100, primals_102, primals_103, primals_105, primals_106, primals_108, primals_109, primals_111, primals_112, primals_114, primals_115, primals_117, primals_118, primals_120, primals_121, primals_123, primals_124, primals_126, primals_127, primals_129, primals_130, primals_132, primals_133, primals_135, primals_136, primals_138, primals_139, primals_141, primals_142, primals_144, primals_145, primals_147, primals_148, primals_150, primals_151, primals_153, primals_154, primals_156, primals_157, primals_159, primals_160, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, primals_322, primals_324, primals_325, primals_327, primals_328, primals_330, primals_331, primals_333, primals_334, primals_336, primals_337, primals_339, primals_340, primals_342, primals_343, primals_345, primals_346, primals_348, primals_349, primals_351, primals_352, primals_354, primals_355, primals_357, primals_358, primals_360, primals_361, primals_728, buf0, buf1, buf2, buf3, buf5, buf4, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf54, buf55, buf56, buf57, buf58, buf59, buf60, buf61, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf97, buf100, buf101, buf102, buf103, buf104, buf105, buf106, buf107, buf112, buf113, buf114, buf115, buf116, buf117, buf118, buf119, buf125, buf126, buf127, buf128, buf129, buf130, buf131, buf132, buf139, buf140, buf141, buf142, buf143, buf144, buf145, buf146, buf154, buf155, buf156, buf157, buf158, buf159, buf160, buf161, buf170, buf171, buf172, buf173, buf174, buf175, buf176, buf177, buf187, buf188, buf189, buf190, buf191, buf192, buf193, buf194, buf205, buf206, buf207, buf208, buf209, buf210, buf211, buf212, buf224, buf225, buf226, buf227, buf228, buf229, buf230, buf231, buf244, buf245, buf246, buf247, buf248, buf249, buf250, buf251, buf265, buf266, buf267, buf268, buf269, buf270, buf271, buf272, buf287, buf288, buf289, buf290, buf291, buf292, buf293, buf294, buf295, buf296, buf297, buf298, buf299, buf302, buf303, buf304, buf305, buf306, buf307, buf308, buf309, buf314, buf315, buf316, buf317, buf318, buf319, buf320, buf321, buf327, buf328, buf329, buf330, buf331, buf332, buf333, buf334, buf341, buf342, buf343, buf344, buf345, buf346, buf347, buf348, buf356, buf357, buf358, buf359, buf360, buf361, buf362, buf363, buf372, buf373, buf374, buf375, buf376, buf377, buf378, buf379, buf389, buf390, buf391, buf392, buf393, buf394, buf395, buf396, buf407, buf408, buf409, buf410, buf411, buf412, buf413, buf414, buf426, buf427, buf428, buf429, buf430, buf431, buf432, buf433, buf446, buf447, buf448, buf449, buf450, buf451, buf452, buf453, buf467, buf468, buf469, buf470, buf471, buf472, buf473, buf474, buf489, buf490, buf491, buf492, buf493, buf494, buf495, buf496, buf512, buf513, buf514, buf515, buf516, buf517, buf518, buf519, buf536, buf537, buf538, buf539, buf540, buf541, buf542, buf543, buf561, buf562, buf563, buf564, buf565, buf566, buf567, buf568, buf587, buf588, buf589, buf590, buf591, buf592, buf593, buf594, buf614, buf615, buf616, buf617, buf618, buf619, buf620, buf621, buf642, buf643, buf644, buf645, buf646, buf647, buf648, buf649, buf671, buf672, buf673, buf674, buf675, buf676, buf677, buf678, buf701, buf702, buf703, buf704, buf705, buf706, buf707, buf708, buf732, buf733, buf734, buf735, buf736, buf737, buf738, buf739, buf764, buf765, buf766, buf767, buf768, buf769, buf770, buf771, buf797, buf798, buf799, buf800, buf801, buf802, buf803, buf804, buf831, buf832, buf833, buf834, buf835, buf836, buf837, buf838, buf839, buf840, buf841, buf842, buf843, buf846, buf847, buf848, buf849, buf850, buf851, buf852, buf853, buf858, buf859, buf860, buf861, buf862, buf863, buf864, buf865, buf871, buf872, buf873, buf874, buf875, buf876, buf877, buf878, buf885, buf886, buf887, buf888, buf889, buf890, buf891, buf892, buf900, buf901, buf902, buf903, buf904, buf905, buf906, buf907, buf916, buf917, buf918, buf919, buf920, buf921, buf922, buf923, buf933, buf934, buf935, buf936, buf937, buf938, buf939, buf940, buf951, buf952, buf953, buf954, buf955, buf956, buf957, buf958, buf970, buf971, buf972, buf973, buf974, buf975, buf976, buf977, buf990, buf991, buf992, buf993, buf994, buf995, buf996, buf997, buf1011, buf1012, buf1013, buf1014, buf1015, buf1016, buf1017, buf1018, buf1033, buf1034, buf1035, buf1036, buf1037, buf1038, buf1039, buf1040, buf1056, buf1057, buf1058, buf1059, buf1060, buf1061, buf1062, buf1063, buf1080, buf1081, buf1082, buf1083, buf1084, buf1085, buf1086, buf1087, buf1105, buf1106, buf1107, buf1108, buf1109, buf1110, buf1111, buf1112, buf1131, buf1132, buf1133, as_strided(buf1136, (4, 1024), (1024, 1)), as_strided(primals_363, (1000, 1024), (1024, 1)), buf1138, s0, 7, 7, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 431 ops
WARNING:root:detectron2_fcos_r_50_fpn failed to load
FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 2010, in run
    ) = runner.load_model(device, model_name, batch_size=batch_size)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 268, in load_model
    benchmark = benchmark_cls(
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/model.py", line 18, in __call__
    obj = type.__call__(cls, *args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/detectron2_fcos_r_50_fpn/__init__.py", line 15, in __init__
    super().__init__(variant="COCO-Detection/fcos_R_50_FPN_1x.py", test=test, device=device,
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 100, in __init__
    loader = self.setup_train(cfg, args)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 110, in setup_train
    raise NotImplementedError("FCOS train is not supported by upstream detectron2. " \
NotImplementedError: FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.

WARNING:root:detectron2_maskrcnn_r_50_c4 failed to load
Eager model failed to run
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 998, in validate_model
    self.model_iter_fn(model, example_inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 360, in forward_and_backward_pass
    loss = self.compute_loss(pred)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 350, in compute_loss
    return reduce_to_scalar_loss(pred)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 97, in reduce_to_scalar_loss
    return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 97, in <listcomp>
    return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 107, in reduce_to_scalar_loss
    return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 107, in <listcomp>
    return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 110, in reduce_to_scalar_loss
    raise NotImplementedError("Don't know how to reduce", type(out))
NotImplementedError: ("Don't know how to reduce", <class 'detectron2.structures.instances.Instances'>)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 2010, in run
    ) = runner.load_model(device, model_name, batch_size=batch_size)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 305, in load_model
    self.validate_model(model, example_inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1000, in validate_model
    raise NotImplementedError("Eager model failed to run") from e
NotImplementedError: Eager model failed to run

cuda train dlrm                               ERROR:common:compile_fx raised NotImplementedError: Cannot access storage of SparseTensorImpl

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1494, in aot_dispatch_autograd
    fx_g = make_fx(joint_forward_backward, aot_config.decompositions)(
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/proxy_tensor.py", line 691, in wrapped
    t = dispatch_trace(wrap_key(func, args, fx_tracer), tracer=fx_tracer, concrete_args=tuple(phs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/proxy_tensor.py", line 441, in dispatch_trace
    graph = tracer.trace(root, concrete_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/fx/_symbolic_trace.py", line 756, in trace
    (self.create_arg(fn(*args)),),
  File "/data/users/ezyang/a/pytorch/torch/fx/_symbolic_trace.py", line 630, in flatten_fn
    tree_out = root_fn(*tree_args)
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/proxy_tensor.py", line 457, in wrapped
    out = f(*tensors)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 822, in functionalized_joint
    outs = joint_forward_backward(f_primals, f_tangents)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 789, in joint_forward_backward
    backward_out = torch.autograd.grad(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 266, in grad
    return handle_torch_function(
  File "/data/users/ezyang/a/pytorch/torch/overrides.py", line 1520, in handle_torch_function
    result = mode.__torch_function__(public_api, types, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 300, in grad
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
NotImplementedError: Cannot access storage of SparseTensorImpl

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised NotImplementedError: Cannot access storage of SparseTensorImpl

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 40 ops
/data/users/ezyang/a/pytorch/torch/utils/tensorboard/__init__.py:4: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
  if not hasattr(tensorboard, "__version__") or LooseVersion(
/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/gym/core.py:317: DeprecationWarning: [33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
cuda train drq                                ERROR:common:'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=1] = placeholder[target=tangents_1]
Original traceback:
None
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 233, in placeholder
    sizes, strides = self.symbolic_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 50, in symbolic_sizes_strides
    size, stride = self._shape_env.create_symbolic_sizes_strides(ex)
AttributeError: 'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=1] = placeholder[target=tangents_1]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 34 ops
cuda train fastNLP_Bert                       ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/fastNLP/models/bert.py", line 265, in forward
    sequence_output = self.bert(words)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/fastNLP/embeddings/bert_embedding.py", line 137, in forward
    outputs = self.model(words)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 13 ops
cuda train functorch_dp_cifar10               ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/resnet.py", line 284, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/lj/cljg56pbqgfgxf7ybhk5sr2tdonkcbp33cyblginj3jibxqstila.py", line 1185, in call
    return (buf105, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_63, buf0, as_strided(buf106, (4, 32), (32, 1)), as_strided(buf107, (4, 32), (32, 1)), buf4, buf5, buf6, buf7, as_strided(buf108, (4, 32), (32, 1)), as_strided(buf109, (4, 32), (32, 1)), buf11, buf12, as_strided(buf110, (4, 32), (32, 1)), as_strided(buf111, (4, 32), (32, 1)), buf16, buf17, as_strided(buf112, (4, 32), (32, 1)), as_strided(buf113, (4, 32), (32, 1)), buf21, buf22, as_strided(buf114, (4, 32), (32, 1)), as_strided(buf115, (4, 32), (32, 1)), buf26, buf27, as_strided(buf116, (4, 32), (32, 1)), as_strided(buf117, (4, 32), (32, 1)), buf31, buf32, as_strided(buf118, (4, 32), (32, 1)), as_strided(buf119, (4, 32), (32, 1)), buf36, as_strided(buf120, (4, 32), (32, 1)), as_strided(buf121, (4, 32), (32, 1)), buf41, buf42, as_strided(buf122, (4, 32), (32, 1)), as_strided(buf123, (4, 32), (32, 1)), buf46, buf47, as_strided(buf124, (4, 32), (32, 1)), as_strided(buf125, (4, 32), (32, 1)), buf51, buf52, as_strided(buf126, (4, 32), (32, 1)), as_strided(buf127, (4, 32), (32, 1)), buf56, buf57, as_strided(buf128, (4, 32), (32, 1)), as_strided(buf129, (4, 32), (32, 1)), buf61, as_strided(buf130, (4, 32), (32, 1)), as_strided(buf131, (4, 32), (32, 1)), buf66, buf67, as_strided(buf132, (4, 32), (32, 1)), as_strided(buf133, (4, 32), (32, 1)), buf71, buf72, as_strided(buf134, (4, 32), (32, 1)), as_strided(buf135, (4, 32), (32, 1)), buf76, buf77, as_strided(buf136, (4, 32), (32, 1)), as_strided(buf137, (4, 32), (32, 1)), buf81, buf82, as_strided(buf138, (4, 32), (32, 1)), as_strided(buf139, (4, 32), (32, 1)), buf86, as_strided(buf140, (4, 32), (32, 1)), as_strided(buf141, (4, 32), (32, 1)), buf91, buf92, as_strided(buf142, (4, 32), (32, 1)), as_strided(buf143, (4, 32), (32, 1)), buf96, buf97, buf101, buf102, as_strided(buf104, (4, 512), (512, 1)), as_strided(primals_61, (1000, 512), (512, 1)), buf144, s0, 16, 16, 256, 8, 8, 64, 8, 8, 64, 8, 8, 64, 8, 8, 64, 4, 4, 16, 4, 4, 16, 4, 4, 16, 4, 4, 16, 4, 4, 16, 2, 2, 4, 2, 2, 4, 2, 2, 4, 2, 2, 4, 2, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 69 ops
cuda train functorch_maml_omniglot            ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/container.py", line 202, in forward
    def forward(self, input):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/65/c65vfsttcdarnn6xnicpy4ochel42nvco2kf73ju2lz63hq6kt6u.py", line 543, in call
    return (buf21, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_15, buf1, as_strided(buf22, (64, ), (1, )), buf5, buf6, buf23, buf8, as_strided(buf24, (64, ), (1, )), buf12, buf13, buf25, buf15, as_strided(buf26, (64, ), (1, )), buf19, buf27, as_strided(buf20, (5, 64), (64, 1)), as_strided(primals_13, (5, 64), (64, 1)), as_strided(buf28, (1, 64, 1, 1), (0, 1, 0, 0)), as_strided(buf29, (1, 64, 1, 1), (0, 1, 0, 0)), as_strided(buf30, (1, 64, 1, 1), (0, 1, 0, 0)), s0, 1, 1, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 14 ops
cuda train hf_Albert                          ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 990, in forward
    outputs = self.albert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 737, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 560 ops
cuda train hf_Bart                            ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1353, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1222, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 846, in forward
    layer_outputs = encoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 323, in forward
    hidden_states, attn_weights, _ = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 26 ops
cuda train hf_Bert                            ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1351, in forward
    outputs = self.bert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1018, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 552 ops
cuda train hf_BigBird                         ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2462, in forward
    outputs = self.bert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 5 ops
cuda train hf_DistilBert                      ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 649, in forward
    dlbrt_output = self.distilbert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 566, in forward
    inputs_embeds = self.embeddings(input_ids)  # (bs, seq_length, dim)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 213 ops
cuda train hf_GPT2                            ERROR:common:compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 493, in codegen
    assert shape in added, f"{shape} is needed but not added"
AssertionError: s1 is needed but not added

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1048, in forward
    transformer_outputs = self.transformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 891, in forward
    outputs = block(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 391, in forward
    attn_outputs = self.attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 332, in forward
    attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 33 ops
cuda train hf_GPT2_large                      PASS
Dynamo produced 0 graph(s) covering 0 ops
cuda train hf_Longformer                      [2022-12-12 06:41:50,594] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1813, in forward
    outputs = self.longformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1696, in forward
    padding_len, input_ids, attention_mask, token_type_ids, position_ids, inputs_embeds = self._pad_to_window_size(
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1715, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 30 ops
cuda train hf_Reformer                        [2022-12-12 06:42:02,721] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.native_dropout
[2022-12-12 06:42:02,724] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: aten.rand_like.default
  args[0]: TensorBox(StorageBox(
    Pointwise(
      'cuda',
      torch.float32,
      constant(0, torch.float32),
      ranges=[s0, 64, 1, 1],
      origins={empty}
    )
  ))
  kwargs: {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0), 'pin_memory': False}
[2022-12-12 06:42:02,729] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.rand_like.default
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2397, in forward
    reformer_outputs = self.reformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2063, in forward
    least_common_mult_chunk_length = _get_least_common_mult_chunk_len(self.config)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2100, in <graph break in forward>
    embedding_output = self.embeddings(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 239, in forward
    def forward(self, input_ids=None, position_ids=None, inputs_embeds=None, start_idx_pos_encodings=0):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/cu/ccu6uydvpuo5fmjs2tzpghbvbsqwlt63k7pgemkgfr6f3dt6ipj2.py", line 218, in call
    return (buf11, buf3, buf10, buf12, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 33 ops
cuda train hf_T5                              WARNING:common:fp64 golden ref were not generated for hf_T5. Setting accuracy check to cosine
ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 881 ops
cuda train hf_T5_base                         WARNING:common:fp64 golden ref were not generated for hf_T5_base. Setting accuracy check to cosine
ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s1
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': "<class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>", 'self_model_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_model_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_model_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
 |   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 1643 ops
cuda train hf_T5_large                        PASS
Dynamo produced 0 graph(s) covering 0 ops
cuda train lennard_jones                      ERROR:common:'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=3] = placeholder[target=tangents_1]
Original traceback:
None
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 233, in placeholder
    sizes, strides = self.symbolic_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 50, in symbolic_sizes_strides
    size, stride = self._shape_env.create_symbolic_sizes_strides(ex)
AttributeError: 'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=3] = placeholder[target=tangents_1]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 9 ops
cuda train maml_omniglot                      ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/container.py", line 202, in forward
    def forward(self, input):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/6y/c6y6azkuvza4smupdfxq5ea47bfzgx7beluww2iqslxfm7ndhouu.py", line 330, in call
    return (buf2, buf3, buf8, buf9, buf14, buf15, buf18, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_24, buf1, buf2, buf3, buf4, buf5, buf19, buf7, buf8, buf9, buf10, buf11, buf20, buf13, buf14, buf15, buf16, buf21, as_strided(buf17, (5, 64), (64, 1)), as_strided(primals_13, (5, 64), (64, 1)), s0, 1, 1, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 14 ops
cuda train mnasnet1_0                         ERROR:common:'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=0] = placeholder[target=tangents_1]
Original traceback:
None
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 233, in placeholder
    sizes, strides = self.symbolic_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 50, in symbolic_sizes_strides
    size, stride = self._shape_env.create_symbolic_sizes_strides(ex)
AttributeError: 'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=0] = placeholder[target=tangents_1]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 152 ops
cuda train mobilenet_v2                       ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/mobilenetv2.py", line 173, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/in/cingril2qoagsybomwj7uvkez6oszpatzuvrmxongfcvpcdh6zd3.py", line 1783, in call
    return (buf1, buf2, buf6, buf7, buf11, buf12, buf15, buf16, buf20, buf21, buf25, buf26, buf29, buf30, buf34, buf35, buf39, buf40, buf43, buf44, buf48, buf49, buf53, buf54, buf57, buf58, buf62, buf63, buf67, buf68, buf71, buf72, buf76, buf77, buf81, buf82, buf85, buf86, buf90, buf91, buf95, buf96, buf99, buf100, buf104, buf105, buf109, buf110, buf113, buf114, buf118, buf119, buf123, buf124, buf127, buf128, buf132, buf133, buf137, buf138, buf141, buf142, buf146, buf147, buf151, buf152, buf155, buf156, buf160, buf161, buf165, buf166, buf169, buf170, buf174, buf175, buf179, buf180, buf183, buf184, buf188, buf189, buf193, buf194, buf197, buf198, buf202, buf203, buf207, buf208, buf211, buf212, buf216, buf217, buf221, buf222, buf225, buf226, buf230, buf231, buf235, buf236, buf239, buf240, buf244, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_315, buf0, buf1, buf2, buf4, buf5, buf6, buf7, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf18, buf19, buf20, buf21, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf32, buf33, buf34, buf35, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf46, buf47, buf48, buf49, buf51, buf52, buf53, buf54, buf55, buf56, buf57, buf58, buf60, buf61, buf62, buf63, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf74, buf75, buf76, buf77, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf88, buf89, buf90, buf91, buf93, buf94, buf95, buf96, buf97, buf98, buf99, buf100, buf102, buf103, buf104, buf105, buf107, buf108, buf109, buf110, buf111, buf112, buf113, buf114, buf116, buf117, buf118, buf119, buf121, buf122, buf123, buf124, buf125, buf126, buf127, buf128, buf130, buf131, buf132, buf133, buf135, buf136, buf137, buf138, buf139, buf140, buf141, buf142, buf144, buf145, buf146, buf147, buf149, buf150, buf151, buf152, buf153, buf154, buf155, buf156, buf158, buf159, buf160, buf161, buf163, buf164, buf165, buf166, buf167, buf168, buf169, buf170, buf172, buf173, buf174, buf175, buf177, buf178, buf179, buf180, buf181, buf182, buf183, buf184, buf186, buf187, buf188, buf189, buf191, buf192, buf193, buf194, buf195, buf196, buf197, buf198, buf200, buf201, buf202, buf203, buf205, buf206, buf207, buf208, buf209, buf210, buf211, buf212, buf214, buf215, buf216, buf217, buf219, buf220, buf221, buf222, buf223, buf224, buf225, buf226, buf228, buf229, buf230, buf231, buf233, buf234, buf235, buf236, buf237, buf238, buf239, buf240, as_strided(buf243, (4, 1280), (1280, 1)), as_strided(primals_157, (1000, 1280), (1280, 1)), buf245, buf246, buf247, buf248, buf249, buf250, buf251, buf252, buf253, buf254, buf255, buf256, buf257, buf258, buf259, buf260, buf261, buf262, buf263, buf264, buf265, buf266, buf267, buf268, buf269, buf270, buf271, buf272, buf273, buf274, buf275, buf276, buf277, buf278, buf279, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 153 ops
cuda train mobilenet_v2_quantized_qat         WARNING:common:fp64 golden ref were not generated for mobilenet_v2_quantized_qat. Setting accuracy check to cosine
[2022-12-12 06:47:40,143] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,154] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,175] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,180] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,188] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,205] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,210] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,217] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,236] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,243] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,262] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,267] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,274] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,294] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,300] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,307] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,327] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,334] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,353] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,359] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,366] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,382] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,388] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,395] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,415] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,419] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,426] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,443] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,448] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,454] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,474] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,480] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,487] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,506] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,513] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,533] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,539] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,546] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,564] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,570] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,577] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,595] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,599] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,606] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,624] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,630] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,637] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,655] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,661] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,672] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,690] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,695] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,701] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,719] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,726] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,732] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,752] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,758] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,766] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,786] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,793] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,814] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,821] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,828] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,847] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,854] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,862] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,881] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,885] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,892] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,912] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,919] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,927] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,946] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,961] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,968] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,988] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:40,993] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,001] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,022] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,030] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,037] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,058] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,066] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,074] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,094] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,100] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,107] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,128] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,136] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,145] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,164] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,173] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,180] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,202] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,209] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,231] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,239] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,257] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,276] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,285] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,293] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,314] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,319] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,326] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,348] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,355] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,364] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,384] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,392] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,399] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,420] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,426] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,434] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,455] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,463] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,470] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,493] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,501] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,510] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,532] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,541] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,563] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,584] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,592] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,615] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,624] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,635] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,655] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,664] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,671] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,694] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,704] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,714] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,735] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,745] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,753] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,775] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,781] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,789] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,813] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,822] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,832] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,853] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,863] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,870] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,895] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,903] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,946] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,956] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,967] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,971] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,976] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,980] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:47:41,987] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
    return self._wrapped_call(self, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 279, in __call__
    raise e
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 269, in __call__
    return super(self.cls, obj).__call__(*args, **kwargs)  # type: ignore[misc]
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "<eval_with_key>.8", line 4, in forward
    def forward(self, x : torch.Tensor) -> torch.Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/g5/cg5x7byhqrl4rvqdtnu3bwzry6p6bljfufiqhw2xj4i3cbm3ngrs.py", line 7382, in call
    return (buf9, buf10, buf7, buf8, buf26, buf27, buf21, buf22, buf19, buf20, buf46, buf47, buf44, buf45, buf63, buf64, buf58, buf59, buf56, buf57, buf83, buf84, buf81, buf82, buf101, buf102, buf95, buf96, buf93, buf94, buf113, buf114, buf111, buf112, buf131, buf132, buf125, buf126, buf123, buf124, buf151, buf152, buf149, buf150, buf168, buf169, buf163, buf164, buf161, buf162, buf188, buf189, buf186, buf187, buf206, buf207, buf200, buf201, buf198, buf199, buf218, buf219, buf216, buf217, buf236, buf237, buf230, buf231, buf228, buf229, buf256, buf257, buf254, buf255, buf273, buf274, buf268, buf269, buf266, buf267, buf293, buf294, buf291, buf292, buf310, buf311, buf305, buf306, buf303, buf304, buf322, buf323, buf320, buf321, buf334, buf335, buf332, buf333, buf351, buf352, buf346, buf347, buf344, buf345, buf371, buf372, buf369, buf370, buf388, buf389, buf383, buf384, buf381, buf382, buf408, buf409, buf406, buf407, buf425, buf426, buf420, buf421, buf418, buf419, buf437, buf438, buf435, buf436, buf455, buf456, buf449, buf450, buf447, buf448, buf475, buf476, buf473, buf474, buf492, buf493, buf487, buf488, buf485, buf486, buf512, buf513, buf510, buf511, buf529, buf530, buf524, buf525, buf522, buf523, buf541, buf542, buf539, buf540, buf553, buf554, buf551, buf552, buf570, buf571, buf565, buf566, buf563, buf564, buf590, buf591, buf588, buf589, buf607, buf608, buf602, buf603, buf600, buf601, buf627, buf628, buf625, buf626, buf644, buf645, buf639, buf640, buf637, buf638, buf656, buf657, buf654, buf655, buf668, buf669, buf666, buf667, buf685, buf686, buf680, buf681, buf678, buf679, buf705, buf706, buf703, buf704, buf722, buf723, buf717, buf718, buf715, buf716, buf742, buf743, buf740, buf741, buf760, buf761, buf754, buf755, buf752, buf753, buf772, buf773, buf770, buf771, buf790, buf791, buf784, buf785, buf782, buf783, buf810, buf811, buf808, buf809, buf827, buf828, buf822, buf823, buf820, buf821, buf847, buf848, buf845, buf846, buf864, buf865, buf859, buf860, buf857, buf858, buf876, buf877, buf874, buf875, buf888, buf889, buf886, buf887, buf905, buf906, buf900, buf901, buf898, buf899, buf925, buf926, buf923, buf924, buf942, buf943, buf937, buf938, buf935, buf936, buf962, buf963, buf960, buf961, buf979, buf980, buf974, buf975, buf972, buf973, buf991, buf992, buf989, buf990, buf1003, buf1004, buf1001, buf1002, buf1020, buf1021, buf1015, buf1016, buf1013, buf1014, buf1040, buf1041, buf1038, buf1039, buf1057, buf1058, buf1052, buf1053, buf1050, buf1051, buf1077, buf1078, buf1075, buf1076, buf1094, buf1095, buf1089, buf1090, buf1087, buf1088, buf1106, buf1107, buf1104, buf1105, buf1118, buf1119, buf1116, buf1117, buf1135, buf1136, buf1130, buf1131, buf1128, buf1129, buf1155, buf1156, buf1153, buf1154, buf1172, buf1173, buf1167, buf1168, buf1165, buf1166, buf1192, buf1193, buf1190, buf1191, buf1209, buf1210, buf1204, buf1205, buf1202, buf1203, buf1221, buf1222, buf1219, buf1220, buf1239, buf1240, buf1233, buf1234, buf1231, buf1232, buf1259, buf1260, buf1257, buf1258, buf1276, buf1277, buf1271, buf1272, buf1269, buf1270, buf1296, buf1297, buf1294, buf1295, buf1313, buf1314, buf1308, buf1309, buf1306, buf1307, buf1325, buf1326, buf1323, buf1324, buf1337, buf1338, buf1335, buf1336, buf1354, buf1355, buf1349, buf1350, buf1347, buf1348, buf1374, buf1375, buf1372, buf1373, buf1391, buf1392, buf1386, buf1387, buf1384, buf1385, buf1411, buf1412, buf1409, buf1410, buf1428, buf1429, buf1423, buf1424, buf1421, buf1422, buf1440, buf1441, buf1438, buf1439, buf1452, buf1453, buf1450, buf1451, buf1469, buf1470, buf1464, buf1465, buf1462, buf1463, buf1489, buf1490, buf1487, buf1488, buf1506, buf1507, buf1501, buf1502, buf1499, buf1500, buf1526, buf1527, buf1524, buf1525, buf1544, buf1545, buf1538, buf1539, buf1536, buf1537, buf1556, buf1557, buf1554, buf1555, buf1574, buf1575, buf1568, buf1569, buf1566, buf1567, buf1594, buf1595, buf1592, buf1593, buf1611, buf1612, buf1606, buf1607, buf1604, buf1605, buf1631, buf1632, buf1629, buf1630, buf1648, buf1649, buf1643, buf1644, buf1641, buf1642, buf1660, buf1661, buf1658, buf1659, buf1672, buf1673, buf1670, buf1671, buf1689, buf1690, buf1684, buf1685, buf1682, buf1683, buf1709, buf1710, buf1707, buf1708, buf1726, buf1727, buf1721, buf1722, buf1719, buf1720, buf1746, buf1747, buf1744, buf1745, buf1763, buf1764, buf1758, buf1759, buf1756, buf1757, buf1775, buf1776, buf1773, buf1774, buf1787, buf1788, buf1785, buf1786, buf1804, buf1805, buf1799, buf1800, buf1797, buf1798, buf1824, buf1825, buf1822, buf1823, buf1841, buf1842, buf1836, buf1837, buf1834, buf1835, buf1861, buf1862, buf1859, buf1860, buf1879, buf1880, buf1873, buf1874, buf1871, buf1872, buf1891, buf1892, buf1889, buf1890, buf1909, buf1910, buf1903, buf1904, buf1901, buf1902, buf1945, buf1946, buf1943, buf1944, buf1956, buf1957, buf1954, buf1955, buf1967, buf1968, buf1965, buf1966, buf1979, buf1980, buf1977, buf1978, buf1975, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_167, primals_184, primals_201, primals_218, primals_235, primals_252, primals_269, primals_286, primals_303, primals_327, primals_344, primals_361, primals_378, primals_395, primals_412, primals_436, primals_453, primals_470, primals_494, primals_511, primals_528, primals_545, primals_562, primals_579, primals_603, primals_620, primals_637, primals_661, primals_678, primals_695, primals_719, primals_736, primals_753, primals_770, primals_787, primals_804, primals_828, primals_845, primals_862, primals_886, primals_903, primals_920, primals_937, primals_954, primals_971, primals_995, primals_1012, primals_1029, primals_1053, primals_1070, primals_1087, primals_1104, buf5, buf17, buf18, buf25, buf26, buf35, buf42, buf43, buf60, buf55, buf62, buf63, buf72, buf79, buf80, buf97, buf92, buf100, buf101, buf109, buf110, buf127, buf122, buf130, buf131, buf140, buf147, buf148, buf165, buf160, buf167, buf168, buf177, buf184, buf185, buf202, buf197, buf205, buf206, buf214, buf215, buf232, buf227, buf235, buf236, buf245, buf252, buf253, buf270, buf265, buf272, buf273, buf282, buf289, buf290, buf307, buf302, buf309, buf310, buf319, buf330, buf331, buf348, buf343, buf350, buf351, buf360, buf367, buf368, buf385, buf380, buf387, buf388, buf397, buf404, buf405, buf422, buf417, buf424, buf425, buf433, buf434, buf451, buf446, buf454, buf455, buf464, buf471, buf472, buf489, buf484, buf491, buf492, buf501, buf508, buf509, buf526, buf521, buf528, buf529, buf538, buf549, buf550, buf567, buf562, buf569, buf570, buf579, buf586, buf587, buf604, buf599, buf606, buf607, buf616, buf623, buf624, buf641, buf636, buf643, buf644, buf653, buf664, buf665, buf682, buf677, buf684, buf685, buf694, buf701, buf702, buf719, buf714, buf721, buf722, buf731, buf738, buf739, buf756, buf751, buf759, buf760, buf768, buf769, buf786, buf781, buf789, buf790, buf799, buf806, buf807, buf824, buf819, buf826, buf827, buf836, buf843, buf844, buf861, buf856, buf863, buf864, buf873, buf884, buf885, buf902, buf897, buf904, buf905, buf914, buf921, buf922, buf939, buf934, buf941, buf942, buf951, buf958, buf959, buf976, buf971, buf978, buf979, buf988, buf999, buf1000, buf1017, buf1012, buf1019, buf1020, buf1029, buf1036, buf1037, buf1054, buf1049, buf1056, buf1057, buf1066, buf1073, buf1074, buf1091, buf1086, buf1093, buf1094, buf1103, buf1114, buf1115, buf1132, buf1127, buf1134, buf1135, buf1144, buf1151, buf1152, buf1169, buf1164, buf1171, buf1172, buf1181, buf1188, buf1189, buf1206, buf1201, buf1208, buf1209, buf1217, buf1218, buf1235, buf1230, buf1238, buf1239, buf1248, buf1255, buf1256, buf1273, buf1268, buf1275, buf1276, buf1285, buf1292, buf1293, buf1310, buf1305, buf1312, buf1313, buf1322, buf1333, buf1334, buf1351, buf1346, buf1353, buf1354, buf1363, buf1370, buf1371, buf1388, buf1383, buf1390, buf1391, buf1400, buf1407, buf1408, buf1425, buf1420, buf1427, buf1428, buf1437, buf1448, buf1449, buf1466, buf1461, buf1468, buf1469, buf1478, buf1485, buf1486, buf1503, buf1498, buf1505, buf1506, buf1515, buf1522, buf1523, buf1540, buf1535, buf1543, buf1544, buf1552, buf1553, buf1570, buf1565, buf1573, buf1574, buf1583, buf1590, buf1591, buf1608, buf1603, buf1610, buf1611, buf1620, buf1627, buf1628, buf1645, buf1640, buf1647, buf1648, buf1657, buf1668, buf1669, buf1686, buf1681, buf1688, buf1689, buf1698, buf1705, buf1706, buf1723, buf1718, buf1725, buf1726, buf1735, buf1742, buf1743, buf1760, buf1755, buf1762, buf1763, buf1772, buf1783, buf1784, buf1801, buf1796, buf1803, buf1804, buf1813, buf1820, buf1821, buf1838, buf1833, buf1840, buf1841, buf1850, buf1857, buf1858, buf1875, buf1870, buf1878, buf1879, buf1887, buf1888, buf1905, buf1900, buf1908, buf1909, buf1918, buf1926, buf1935, buf1942, buf1952, buf1953, buf1964, buf1976, as_strided(buf1963, (1000, 1280), (1280, 1)), buf1981, buf1982, buf1983, buf1984, buf1985, buf1986, buf1987, buf1988, buf1989, buf1990, buf1991, buf1992, buf1993, buf1994, buf1995, buf1996, buf1997, buf1998, buf1999, buf2000, buf2001, buf2002, buf2003, buf2004, buf2005, buf2006, buf2007, buf2008, buf2009, buf2010, buf2011, buf2012, buf2013, buf2014, buf2015, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 203 ops
cuda train mobilenet_v3_large                 ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/mobilenetv3.py", line 219, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/bb/cbb6eyb435nax2dw5el7ux4pqvdo3hiiovjwiopdwipnb752ak45.py", line 2733, in call
    return (buf1, buf2, buf6, buf7, buf10, buf11, buf14, buf15, buf18, buf19, buf22, buf23, buf26, buf27, buf30, buf31, buf34, buf35, buf38, buf39, buf42, buf43, buf53, buf54, buf57, buf58, buf61, buf62, buf72, buf73, buf76, buf77, buf80, buf81, buf91, buf92, buf95, buf96, buf100, buf101, buf105, buf106, buf109, buf110, buf114, buf115, buf119, buf120, buf123, buf124, buf128, buf129, buf133, buf134, buf137, buf138, buf142, buf143, buf147, buf148, buf151, buf152, buf156, buf157, buf168, buf169, buf172, buf173, buf177, buf178, buf189, buf190, buf193, buf194, buf198, buf199, buf210, buf211, buf214, buf215, buf219, buf220, buf231, buf232, buf235, buf236, buf240, buf241, buf252, buf253, buf256, buf257, buf263, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_36, primals_38, primals_39, primals_41, primals_42, primals_44, primals_45, primals_47, primals_49, primals_51, primals_52, primals_54, primals_55, primals_57, primals_58, primals_60, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_111, primals_113, primals_114, primals_116, primals_117, primals_119, primals_120, primals_122, primals_124, primals_126, primals_127, primals_129, primals_130, primals_132, primals_133, primals_135, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_150, primals_152, primals_153, primals_155, primals_156, primals_158, primals_159, primals_161, primals_163, primals_165, primals_166, primals_168, primals_169, primals_313, buf0, buf1, buf2, buf264, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf46, buf48, buf50, buf52, buf53, buf54, buf55, buf56, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf65, buf67, buf69, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf84, buf86, buf88, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf265, buf98, buf99, buf100, buf101, buf266, buf103, buf104, buf105, buf106, buf107, buf108, buf109, buf110, buf267, buf112, buf113, buf114, buf115, buf268, buf117, buf118, buf119, buf120, buf121, buf122, buf123, buf124, buf269, buf126, buf127, buf128, buf129, buf270, buf131, buf132, buf133, buf134, buf135, buf136, buf137, buf138, buf271, buf140, buf141, buf142, buf143, buf272, buf145, buf146, buf147, buf148, buf149, buf150, buf151, buf152, buf273, buf154, buf155, buf156, buf157, buf274, buf159, buf161, buf163, buf165, buf167, buf168, buf169, buf170, buf171, buf172, buf173, buf275, buf175, buf176, buf177, buf178, buf276, buf180, buf182, buf184, buf186, buf188, buf189, buf190, buf191, buf192, buf193, buf194, buf277, buf196, buf197, buf198, buf199, buf278, buf201, buf203, buf205, buf207, buf209, buf210, buf211, buf212, buf213, buf214, buf215, buf279, buf217, buf218, buf219, buf220, buf280, buf222, buf224, buf226, buf228, buf230, buf231, buf232, buf233, buf234, buf235, buf236, buf281, buf238, buf239, buf240, buf241, buf282, buf243, buf245, buf247, buf249, buf251, buf252, buf253, buf254, buf255, buf256, buf257, buf283, as_strided(buf260, (4, 960), (960, 1)), buf261, buf262, as_strided(primals_173, (1000, 1280), (1280, 1)), as_strided(primals_171, (1280, 960), (960, 1)), buf284, buf285, buf286, buf287, buf288, buf289, buf290, buf291, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 187 ops
devgpu019:3716426:3716426 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0
devgpu019:3716426:3716426 [0] NCCL INFO NCCL_SOCKET_IFNAME set to eth0
devgpu019:3716426:3716426 [0] NCCL INFO Bootstrap : Using eth0:2803:6080:6188:70b4::1<0>
devgpu019:3716426:3716426 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
devgpu019:3716426:3716426 [0] NCCL INFO cudaDriverVersion 11040
NCCL version 2.14.3+cuda11.4
devgpu019:3716426:3718755 [0] NCCL INFO NCCL_IB_DISABLE set by environment to 1.
devgpu019:3716426:3718755 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0
devgpu019:3716426:3718755 [0] NCCL INFO NET/Socket : Using [0]eth0:2803:6080:6188:70b4::1<0>
devgpu019:3716426:3718755 [0] NCCL INFO Using network Socket
devgpu019:3716426:3718755 [0] NCCL INFO NET/Socket : GPU Direct RDMA Disabled for HCA 0 'eth0'
devgpu019:3716426:3718755 [0] NCCL INFO === System : maxBw 5000.0 totalBw 0.0 ===
devgpu019:3716426:3718755 [0] NCCL INFO CPU/0 (1/1/2)
devgpu019:3716426:3718755 [0] NCCL INFO + PCI[12.0] - PCI/D000 (11f840001d9bfbe1)
devgpu019:3716426:3718755 [0] NCCL INFO               + PCI[24.0] - PCI/F000 (11f840001d9bfbe0)
devgpu019:3716426:3718755 [0] NCCL INFO                             + PCI[24.0] - GPU/11000 (0)
devgpu019:3716426:3718755 [0] NCCL INFO + PCI[12.0] - NIC/30000
devgpu019:3716426:3718755 [0] NCCL INFO ==========================================
devgpu019:3716426:3718755 [0] NCCL INFO GPU/11000 :GPU/11000 (0/5000.000000/LOC) CPU/0 (3/12.000000/PHB)
devgpu019:3716426:3718755 [0] NCCL INFO Setting affinity for GPU 0 to ffffff,00000000,00000000,00ffffff
devgpu019:3716426:3718755 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 16, bw 44.000000/44.000000, type LOC/PIX, sameChannels 1
devgpu019:3716426:3718755 [0] NCCL INFO  0 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  1 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  2 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  3 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  4 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  5 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  6 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  7 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  8 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  9 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 10 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 11 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 12 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 13 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 14 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 15 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO Pattern 3, crossNic 0, nChannels 16, bw 44.000000/44.000000, type LOC/PIX, sameChannels 1
devgpu019:3716426:3718755 [0] NCCL INFO  0 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  1 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  2 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  3 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  4 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  5 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  6 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  7 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  8 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  9 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 10 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 11 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 12 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 13 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 14 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 15 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO Pattern 3, crossNic 0, nChannels 16, bw 44.000000/44.000000, type LOC/PIX, sameChannels 1
devgpu019:3716426:3718755 [0] NCCL INFO  0 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  1 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  2 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  3 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  4 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  5 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  6 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  7 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  8 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO  9 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 10 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 11 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 12 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 13 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 14 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO 15 : GPU/0
devgpu019:3716426:3718755 [0] NCCL INFO Tree 0 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 16 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 1 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 17 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 2 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 18 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 3 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 19 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 4 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 20 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 5 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 21 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 6 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 22 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 7 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 23 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 8 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 24 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 9 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 25 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 10 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 26 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 11 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 27 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 12 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 28 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 13 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 29 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 14 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 30 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 15 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Tree 31 : -1 -> 0 -> -1/-1/-1
devgpu019:3716426:3718755 [0] NCCL INFO Channel 00/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 01/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 02/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 03/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 04/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 05/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 06/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 07/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 08/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 09/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 10/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 11/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 12/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 13/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 14/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 15/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 16/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 17/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 18/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 19/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 20/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 21/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 22/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 23/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 24/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 25/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 26/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 27/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 28/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 29/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 30/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Channel 31/32 :    0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 00 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 01 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 02 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 03 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 04 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 05 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 06 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 07 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 08 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 09 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 10 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 11 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 12 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 13 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 14 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 15 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 16 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 17 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 18 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 19 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 20 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 21 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 22 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 23 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 24 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 25 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 26 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 27 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 28 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 29 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 30 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Ring 31 : 0 -> 0 -> 0
devgpu019:3716426:3718755 [0] NCCL INFO Trees [0] -1/-1/-1->0->-1 [1] -1/-1/-1->0->-1 [2] -1/-1/-1->0->-1 [3] -1/-1/-1->0->-1 [4] -1/-1/-1->0->-1 [5] -1/-1/-1->0->-1 [6] -1/-1/-1->0->-1 [7] -1/-1/-1->0->-1 [8] -1/-1/-1->0->-1 [9] -1/-1/-1->0->-1 [10] -1/-1/-1->0->-1 [11] -1/-1/-1->0->-1 [12] -1/-1/-1->0->-1 [13] -1/-1/-1->0->-1 [14] -1/-1/-1->0->-1 [15] -1/-1/-1->0->-1 [16] -1/-1/-1->0->-1 [17] -1/-1/-1->0->-1 [18] -1/-1/-1->0->-1 [19] -1/-1/-1->0->-1 [20] -1/-1/-1->0->-1 [21] -1/-1/-1->0->-1 [22] -1/-1/-1->0->-1 [23] -1/-1/-1->0->-1 [24] -1/-1/-1->0->-1 [25] -1/-1/-1->0->-1 [26] -1/-1/-1->0->-1 [27] -1/-1/-1->0->-1 [28] -1/-1/-1->0->-1 [29] -1/-1/-1->0->-1 [30] -1/-1/-1->0->-1 [31] -1/-1/-1->0->-1
devgpu019:3716426:3718755 [0] NCCL INFO Connected all rings
devgpu019:3716426:3718755 [0] NCCL INFO Connected all trees
devgpu019:3716426:3718755 [0] NCCL INFO 32 coll channels, 32 p2p channels, 32 p2p channels per peer
devgpu019:3716426:3718772 [0] NCCL INFO New proxy send connection 0 from local rank 0, transport 2
devgpu019:3716426:3718755 [0] NCCL INFO Connection to proxy localRank 0 -> connection 0x7fbf58002e80
devgpu019:3716426:3718755 [0] NCCL INFO comm 0x71c1fc0 rank 0 nranks 1 cudaDev 0 busId 11000 - Init COMPLETE
cuda train moco                               [2022-12-12 06:49:45,255] torch._dynamo.convert_frame: [WARNING] torch._dynamo hit config.cache_size_limit (64)
   function: '<graph break in _momentum_update_key_encoder>' (/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py:50)
   reasons:  ___tuple_iterator_len(___stack0) == 160
to diagnose recompilation issues, see https://github.com/pytorch/torchdynamo/blob/main/TROUBLESHOOTING.md.
ERROR:common:

from user code:
   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 172, in concat_all_gather
    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/distributed/distributed_c10d.py", line 1346, in wrapper
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/distributed/distributed_c10d.py", line 2341, in all_gather
    work = default_pg.allgather([tensor_list], [tensor])
  File "/data/users/ezyang/a/pytorch/torch/_subclasses/fake_tensor.py", line 896, in __torch_dispatch__
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Tensors must be CUDA and dense

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1046, in get_fake_value
    return wrap_fake_exception(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 712, in wrap_fake_exception
    return fn()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1047, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <function all_gather at 0x7fc85908dee0>(*([FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0)], FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0)), **{'async_op': False}):
Tensors must be CUDA and dense
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/parallel/distributed.py", line 1098, in forward
    output = self._run_ddp_forward(*inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/parallel/distributed.py", line 1051, in _run_ddp_forward
    return module_to_run(*inputs[0], **kwargs[0])  # type: ignore[index]
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 130, in forward
    self._momentum_update_key_encoder()  # update the key encoder
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 133, in <graph break in forward>
    im_k, idx_unshuffle = self._batch_shuffle_ddp(im_k)
  File "/data/users/ezyang/a/pytorch/torch/autograd/grad_mode.py", line 34, in decorate_context
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 76, in _batch_shuffle_ddp
    x_gather = concat_all_gather(x)
  File "/data/users/ezyang/a/pytorch/torch/autograd/grad_mode.py", line 34, in decorate_context
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 329, in catch_errors
    return hijacked_callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1005, in CALL_FUNCTION_KW
    self.call_function(fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/torch.py", line 444, in call_function
    tensor_variable = wrap_fx_proxy(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 729, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 766, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 172, in concat_all_gather
    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 68 graph(s) covering 507 ops
devgpu019:3716426:3718772 [0] NCCL INFO [Service thread] Connection closed by localRank 0
devgpu019:3716426:3716426 [0] NCCL INFO comm 0x71c1fc0 rank 0 nranks 1 cudaDev 0 busId 11000 - Abort COMPLETE
cuda train nvidia_deeprecommender             ERROR:common:'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=2] = placeholder[target=tangents_1]
Original traceback:
None
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 233, in placeholder
    sizes, strides = self.symbolic_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 50, in symbolic_sizes_strides
    size, stride = self._shape_env.create_symbolic_sizes_strides(ex)
AttributeError: 'ShapeEnv' object has no attribute 'create_symbolic_sizes_strides'

While executing %tangents_1 : [#users=2] = placeholder[target=tangents_1]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 13 ops
--dataroot /data/users/ezyang/a/torchbenchmark/torchbenchmark/data/.data/pytorch_CycleGAN_and_pix2pix_inputs/datasets/horse2zebra --name horse2zebra --model cycle_gan --display_id 0 --n_epochs 3 --n_epochs_decay 3 --gpu_ids 0 --checkpoints_dir /data/users/ezyang/a/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/.data/checkpoints
cuda train pytorch_CycleGAN_and_pix2pix       ERROR:common:'int' object has no attribute 'size'

While executing %sym_size : [#users=74] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_model': "<class 'torch.nn.modules.container.Sequential'>", 'self_model_2': "<class 'torch.nn.modules.instancenorm.InstanceNorm2d'>"}
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py", line 372, in forward
    return self.model(input)
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 361, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'

While executing %sym_size : [#users=74] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_model': "<class 'torch.nn.modules.container.Sequential'>", 'self_model_2': "<class 'torch.nn.modules.instancenorm.InstanceNorm2d'>"}
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py", line 372, in forward
    return self.model(input)

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 91 ops
cuda train pytorch_stargan                    ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 60 ops
cuda train pytorch_struct                     ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 47 ops
cuda train pytorch_unet                       [2022-12-12 06:51:55,058] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <function sym_float at 0x7fdd4f725ca0>
  args[0]: 80.0
ERROR:common:compile_fx raised LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7fdd4f725ca0>
  args[0]: 80.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_30,), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: sym_float() missing 1 required positional argument: 'a'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7fdd4f725ca0>
  args[0]: 80.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_30,), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: sym_float() missing 1 required positional argument: 'a'
  target: <function sym_float at 0x7fdd4f725ca0>
  args[0]: 80.0

While executing %sym_float : [#users=1] = call_function[target=torch.fx.experimental.symbolic_shapes.sym_float](args = (%mul_30,), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 135 ops
cuda train resnet152                          ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/resnet.py", line 284, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/52/c52pgv3xeogyipebdwld3wxarzy7mb4c2fu7hartsogr4dmvjvzs.py", line 2691, in call
    return (buf1, buf2, buf7, buf8, buf11, buf12, buf15, buf16, buf18, buf19, buf23, buf24, buf27, buf28, buf31, buf32, buf35, buf36, buf39, buf40, buf43, buf44, buf47, buf48, buf51, buf52, buf55, buf56, buf58, buf59, buf63, buf64, buf67, buf68, buf71, buf72, buf75, buf76, buf79, buf80, buf83, buf84, buf87, buf88, buf91, buf92, buf95, buf96, buf99, buf100, buf103, buf104, buf107, buf108, buf111, buf112, buf115, buf116, buf119, buf120, buf123, buf124, buf127, buf128, buf131, buf132, buf135, buf136, buf139, buf140, buf143, buf144, buf147, buf148, buf151, buf152, buf155, buf156, buf158, buf159, buf163, buf164, buf167, buf168, buf171, buf172, buf175, buf176, buf179, buf180, buf183, buf184, buf187, buf188, buf191, buf192, buf195, buf196, buf199, buf200, buf203, buf204, buf207, buf208, buf211, buf212, buf215, buf216, buf219, buf220, buf223, buf224, buf227, buf228, buf231, buf232, buf235, buf236, buf239, buf240, buf243, buf244, buf247, buf248, buf251, buf252, buf255, buf256, buf259, buf260, buf263, buf264, buf267, buf268, buf271, buf272, buf275, buf276, buf279, buf280, buf283, buf284, buf287, buf288, buf291, buf292, buf295, buf296, buf299, buf300, buf303, buf304, buf307, buf308, buf311, buf312, buf315, buf316, buf319, buf320, buf323, buf324, buf327, buf328, buf331, buf332, buf335, buf336, buf339, buf340, buf343, buf344, buf347, buf348, buf351, buf352, buf355, buf356, buf359, buf360, buf363, buf364, buf367, buf368, buf371, buf372, buf375, buf376, buf379, buf380, buf383, buf384, buf387, buf388, buf391, buf392, buf395, buf396, buf399, buf400, buf403, buf404, buf407, buf408, buf411, buf412, buf415, buf416, buf419, buf420, buf423, buf424, buf427, buf428, buf431, buf432, buf435, buf436, buf439, buf440, buf443, buf444, buf447, buf448, buf451, buf452, buf455, buf456, buf459, buf460, buf463, buf464, buf467, buf468, buf471, buf472, buf475, buf476, buf479, buf480, buf483, buf484, buf487, buf488, buf491, buf492, buf495, buf496, buf499, buf500, buf503, buf504, buf507, buf508, buf511, buf512, buf515, buf516, buf519, buf520, buf523, buf524, buf527, buf528, buf531, buf532, buf535, buf536, buf539, buf540, buf543, buf544, buf547, buf548, buf551, buf552, buf555, buf556, buf559, buf560, buf563, buf564, buf567, buf568, buf571, buf572, buf575, buf576, buf579, buf580, buf583, buf584, buf587, buf588, buf591, buf592, buf594, buf595, buf599, buf600, buf603, buf604, buf607, buf608, buf611, buf612, buf615, buf616, buf619, buf620, buf624, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_160, primals_161, primals_163, primals_164, primals_166, primals_167, primals_169, primals_170, primals_172, primals_173, primals_175, primals_176, primals_178, primals_179, primals_181, primals_182, primals_184, primals_185, primals_187, primals_188, primals_190, primals_191, primals_193, primals_194, primals_196, primals_197, primals_199, primals_200, primals_202, primals_203, primals_205, primals_206, primals_208, primals_209, primals_211, primals_212, primals_214, primals_215, primals_217, primals_218, primals_220, primals_221, primals_223, primals_224, primals_226, primals_227, primals_229, primals_230, primals_232, primals_233, primals_235, primals_236, primals_238, primals_239, primals_241, primals_242, primals_244, primals_245, primals_247, primals_248, primals_250, primals_251, primals_253, primals_254, primals_256, primals_257, primals_259, primals_260, primals_262, primals_263, primals_265, primals_266, primals_268, primals_269, primals_271, primals_272, primals_274, primals_275, primals_277, primals_278, primals_280, primals_281, primals_283, primals_284, primals_286, primals_287, primals_289, primals_290, primals_292, primals_293, primals_295, primals_296, primals_298, primals_299, primals_301, primals_302, primals_304, primals_305, primals_307, primals_308, primals_310, primals_311, primals_313, primals_314, primals_316, primals_317, primals_319, primals_320, primals_322, primals_323, primals_325, primals_326, primals_328, primals_329, primals_331, primals_332, primals_334, primals_335, primals_337, primals_338, primals_340, primals_341, primals_343, primals_344, primals_346, primals_347, primals_349, primals_350, primals_352, primals_353, primals_355, primals_356, primals_358, primals_359, primals_361, primals_362, primals_364, primals_365, primals_367, primals_368, primals_370, primals_371, primals_373, primals_374, primals_376, primals_377, primals_379, primals_380, primals_382, primals_383, primals_385, primals_386, primals_388, primals_389, primals_391, primals_392, primals_394, primals_395, primals_397, primals_398, primals_400, primals_401, primals_403, primals_404, primals_406, primals_407, primals_409, primals_410, primals_412, primals_413, primals_415, primals_416, primals_418, primals_419, primals_421, primals_422, primals_424, primals_425, primals_427, primals_428, primals_430, primals_431, primals_433, primals_434, primals_436, primals_437, primals_439, primals_440, primals_442, primals_443, primals_445, primals_446, primals_448, primals_449, primals_451, primals_452, primals_454, primals_455, primals_457, primals_458, primals_460, primals_461, primals_463, primals_464, primals_933, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf56, buf57, buf58, buf59, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, buf107, buf108, buf109, buf110, buf111, buf112, buf113, buf114, buf115, buf116, buf117, buf118, buf119, buf120, buf121, buf122, buf123, buf124, buf125, buf126, buf127, buf128, buf129, buf130, buf131, buf132, buf133, buf134, buf135, buf136, buf137, buf138, buf139, buf140, buf141, buf142, buf143, buf144, buf145, buf146, buf147, buf148, buf149, buf150, buf151, buf152, buf153, buf154, buf155, buf156, buf157, buf158, buf159, buf161, buf162, buf163, buf164, buf165, buf166, buf167, buf168, buf169, buf170, buf171, buf172, buf173, buf174, buf175, buf176, buf177, buf178, buf179, buf180, buf181, buf182, buf183, buf184, buf185, buf186, buf187, buf188, buf189, buf190, buf191, buf192, buf193, buf194, buf195, buf196, buf197, buf198, buf199, buf200, buf201, buf202, buf203, buf204, buf205, buf206, buf207, buf208, buf209, buf210, buf211, buf212, buf213, buf214, buf215, buf216, buf217, buf218, buf219, buf220, buf221, buf222, buf223, buf224, buf225, buf226, buf227, buf228, buf229, buf230, buf231, buf232, buf233, buf234, buf235, buf236, buf237, buf238, buf239, buf240, buf241, buf242, buf243, buf244, buf245, buf246, buf247, buf248, buf249, buf250, buf251, buf252, buf253, buf254, buf255, buf256, buf257, buf258, buf259, buf260, buf261, buf262, buf263, buf264, buf265, buf266, buf267, buf268, buf269, buf270, buf271, buf272, buf273, buf274, buf275, buf276, buf277, buf278, buf279, buf280, buf281, buf282, buf283, buf284, buf285, buf286, buf287, buf288, buf289, buf290, buf291, buf292, buf293, buf294, buf295, buf296, buf297, buf298, buf299, buf300, buf301, buf302, buf303, buf304, buf305, buf306, buf307, buf308, buf309, buf310, buf311, buf312, buf313, buf314, buf315, buf316, buf317, buf318, buf319, buf320, buf321, buf322, buf323, buf324, buf325, buf326, buf327, buf328, buf329, buf330, buf331, buf332, buf333, buf334, buf335, buf336, buf337, buf338, buf339, buf340, buf341, buf342, buf343, buf344, buf345, buf346, buf347, buf348, buf349, buf350, buf351, buf352, buf353, buf354, buf355, buf356, buf357, buf358, buf359, buf360, buf361, buf362, buf363, buf364, buf365, buf366, buf367, buf368, buf369, buf370, buf371, buf372, buf373, buf374, buf375, buf376, buf377, buf378, buf379, buf380, buf381, buf382, buf383, buf384, buf385, buf386, buf387, buf388, buf389, buf390, buf391, buf392, buf393, buf394, buf395, buf396, buf397, buf398, buf399, buf400, buf401, buf402, buf403, buf404, buf405, buf406, buf407, buf408, buf409, buf410, buf411, buf412, buf413, buf414, buf415, buf416, buf417, buf418, buf419, buf420, buf421, buf422, buf423, buf424, buf425, buf426, buf427, buf428, buf429, buf430, buf431, buf432, buf433, buf434, buf435, buf436, buf437, buf438, buf439, buf440, buf441, buf442, buf443, buf444, buf445, buf446, buf447, buf448, buf449, buf450, buf451, buf452, buf453, buf454, buf455, buf456, buf457, buf458, buf459, buf460, buf461, buf462, buf463, buf464, buf465, buf466, buf467, buf468, buf469, buf470, buf471, buf472, buf473, buf474, buf475, buf476, buf477, buf478, buf479, buf480, buf481, buf482, buf483, buf484, buf485, buf486, buf487, buf488, buf489, buf490, buf491, buf492, buf493, buf494, buf495, buf496, buf497, buf498, buf499, buf500, buf501, buf502, buf503, buf504, buf505, buf506, buf507, buf508, buf509, buf510, buf511, buf512, buf513, buf514, buf515, buf516, buf517, buf518, buf519, buf520, buf521, buf522, buf523, buf524, buf525, buf526, buf527, buf528, buf529, buf530, buf531, buf532, buf533, buf534, buf535, buf536, buf537, buf538, buf539, buf540, buf541, buf542, buf543, buf544, buf545, buf546, buf547, buf548, buf549, buf550, buf551, buf552, buf553, buf554, buf555, buf556, buf557, buf558, buf559, buf560, buf561, buf562, buf563, buf564, buf565, buf566, buf567, buf568, buf569, buf570, buf571, buf572, buf573, buf574, buf575, buf576, buf577, buf578, buf579, buf580, buf581, buf582, buf583, buf584, buf585, buf586, buf587, buf588, buf589, buf590, buf591, buf592, buf593, buf594, buf595, buf597, buf598, buf599, buf600, buf601, buf602, buf603, buf604, buf605, buf606, buf607, buf608, buf609, buf610, buf611, buf612, buf613, buf614, buf615, buf616, buf617, buf618, buf619, buf620, as_strided(buf623, (4, 2048), (2048, 1)), as_strided(primals_466, (1000, 2048), (2048, 1)), buf625, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 515 ops
cuda train resnet18                           ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/resnet.py", line 284, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/h6/ch65dnxynpy6oom7cpioz6zksofxnbtklvtwq6zhytd2xx3ntzfm.py", line 971, in call
    return (buf1, buf2, buf7, buf8, buf11, buf12, buf15, buf16, buf19, buf20, buf23, buf24, buf27, buf28, buf30, buf31, buf35, buf36, buf39, buf40, buf43, buf44, buf47, buf48, buf50, buf51, buf55, buf56, buf59, buf60, buf63, buf64, buf67, buf68, buf70, buf71, buf75, buf76, buf79, buf80, buf84, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_123, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf53, buf54, buf55, buf56, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, as_strided(buf83, (4, 512), (512, 1)), as_strided(primals_61, (1000, 512), (512, 1)), buf85, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 69 ops
cuda train resnet50                           ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/resnet.py", line 284, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/je/cje5fxxx2qqxbrfyj6vwpnkg25cht4gt5uzndo44m45r4vpioeb7.py", line 1569, in call
    return (buf1, buf2, buf7, buf8, buf11, buf12, buf15, buf16, buf18, buf19, buf23, buf24, buf27, buf28, buf31, buf32, buf35, buf36, buf39, buf40, buf43, buf44, buf47, buf48, buf51, buf52, buf55, buf56, buf58, buf59, buf63, buf64, buf67, buf68, buf71, buf72, buf75, buf76, buf79, buf80, buf83, buf84, buf87, buf88, buf91, buf92, buf95, buf96, buf99, buf100, buf103, buf104, buf107, buf108, buf110, buf111, buf115, buf116, buf119, buf120, buf123, buf124, buf127, buf128, buf131, buf132, buf135, buf136, buf139, buf140, buf143, buf144, buf147, buf148, buf151, buf152, buf155, buf156, buf159, buf160, buf163, buf164, buf167, buf168, buf171, buf172, buf175, buf176, buf179, buf180, buf183, buf184, buf186, buf187, buf191, buf192, buf195, buf196, buf199, buf200, buf203, buf204, buf207, buf208, buf211, buf212, buf216, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_321, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf56, buf57, buf58, buf59, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, buf107, buf108, buf109, buf110, buf111, buf113, buf114, buf115, buf116, buf117, buf118, buf119, buf120, buf121, buf122, buf123, buf124, buf125, buf126, buf127, buf128, buf129, buf130, buf131, buf132, buf133, buf134, buf135, buf136, buf137, buf138, buf139, buf140, buf141, buf142, buf143, buf144, buf145, buf146, buf147, buf148, buf149, buf150, buf151, buf152, buf153, buf154, buf155, buf156, buf157, buf158, buf159, buf160, buf161, buf162, buf163, buf164, buf165, buf166, buf167, buf168, buf169, buf170, buf171, buf172, buf173, buf174, buf175, buf176, buf177, buf178, buf179, buf180, buf181, buf182, buf183, buf184, buf185, buf186, buf187, buf189, buf190, buf191, buf192, buf193, buf194, buf195, buf196, buf197, buf198, buf199, buf200, buf201, buf202, buf203, buf204, buf205, buf206, buf207, buf208, buf209, buf210, buf211, buf212, as_strided(buf215, (4, 2048), (2048, 1)), as_strided(primals_160, (1000, 2048), (2048, 1)), buf217, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 175 ops
cuda train resnet50_quantized_qat             WARNING:common:fp64 golden ref were not generated for resnet50_quantized_qat. Setting accuracy check to cosine
[2022-12-12 06:55:23,622] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,633] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,658] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,684] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,692] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,713] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,721] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,740] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,746] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,766] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,771] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,789] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,794] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,801] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,820] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,827] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,847] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,853] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,871] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,877] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,883] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,902] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,909] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,928] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,934] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,952] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,958] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,965] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,986] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:23,994] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,017] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,025] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,046] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,052] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,072] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,077] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,085] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,109] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,116] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,135] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,141] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,160] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,165] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,172] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,191] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,198] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,217] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,224] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,242] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,248] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,255] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,275] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,283] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,303] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,309] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,329] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,335] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,342] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,363] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,379] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,402] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,802] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,824] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,830] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,850] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,856] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,865] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,886] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,894] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,915] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,922] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,943] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,950] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,957] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,979] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:24,986] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,008] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,016] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,036] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,043] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,051] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,082] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,089] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,112] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,120] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,140] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,147] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,154] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,176] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,184] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,205] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,213] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,234] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,240] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,247] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,270] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,278] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,300] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,309] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,331] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,339] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,347] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,383] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,395] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,420] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,430] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,454] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,462] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,486] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,493] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,505] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,529] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,537] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,561] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,569] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,594] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,603] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,614] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,638] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,646] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,671] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,678] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,719] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,728] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,735] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,740] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,744] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-12-12 06:55:25,750] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
    return self._wrapped_call(self, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 279, in __call__
    raise e
  File "/data/users/ezyang/a/pytorch/torch/fx/graph_module.py", line 269, in __call__
    return super(self.cls, obj).__call__(*args, **kwargs)  # type: ignore[misc]
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "<eval_with_key>.8", line 4, in forward
    def forward(self, x : torch.Tensor) -> torch.Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/ow/cowkjznhz3xsve5n5cxxr2yswzyg4lhbxds63lcrst2t7ykvrwxq.py", line 6536, in call
    return (buf9, buf10, buf7, buf8, buf26, buf27, buf21, buf22, buf19, buf20, buf47, buf48, buf45, buf46, buf64, buf65, buf59, buf60, buf57, buf58, buf76, buf77, buf74, buf75, buf92, buf93, buf88, buf89, buf86, buf87, buf104, buf105, buf102, buf103, buf122, buf123, buf116, buf117, buf114, buf115, buf134, buf135, buf132, buf133, buf151, buf152, buf146, buf147, buf144, buf145, buf163, buf164, buf161, buf162, buf175, buf176, buf173, buf174, buf192, buf193, buf187, buf188, buf185, buf186, buf204, buf205, buf202, buf203, buf220, buf221, buf216, buf217, buf214, buf215, buf232, buf233, buf230, buf231, buf249, buf250, buf244, buf245, buf242, buf243, buf261, buf262, buf259, buf260, buf273, buf274, buf271, buf272, buf290, buf291, buf285, buf286, buf283, buf284, buf302, buf303, buf300, buf301, buf318, buf319, buf314, buf315, buf312, buf313, buf330, buf331, buf328, buf329, buf347, buf348, buf342, buf343, buf340, buf341, buf359, buf360, buf357, buf358, buf371, buf372, buf369, buf370, buf389, buf390, buf383, buf384, buf381, buf382, buf401, buf402, buf399, buf400, buf417, buf418, buf413, buf414, buf411, buf412, buf429, buf430, buf427, buf428, buf447, buf448, buf441, buf442, buf439, buf440, buf459, buf460, buf457, buf458, buf476, buf477, buf471, buf472, buf469, buf470, buf488, buf489, buf486, buf487, buf500, buf501, buf498, buf499, buf517, buf518, buf512, buf513, buf510, buf511, buf529, buf530, buf527, buf528, buf545, buf546, buf541, buf542, buf539, buf540, buf557, buf558, buf555, buf556, buf574, buf575, buf569, buf570, buf567, buf568, buf586, buf587, buf584, buf585, buf598, buf599, buf596, buf597, buf615, buf616, buf610, buf611, buf608, buf609, buf627, buf628, buf625, buf626, buf643, buf644, buf639, buf640, buf637, buf638, buf655, buf656, buf653, buf654, buf672, buf673, buf667, buf668, buf665, buf666, buf684, buf685, buf682, buf683, buf696, buf697, buf694, buf695, buf713, buf714, buf708, buf709, buf706, buf707, buf725, buf726, buf723, buf724, buf741, buf742, buf737, buf738, buf735, buf736, buf753, buf754, buf751, buf752, buf770, buf771, buf765, buf766, buf763, buf764, buf782, buf783, buf780, buf781, buf794, buf795, buf792, buf793, buf811, buf812, buf806, buf807, buf804, buf805, buf823, buf824, buf821, buf822, buf839, buf840, buf835, buf836, buf833, buf834, buf851, buf852, buf849, buf850, buf869, buf870, buf863, buf864, buf861, buf862, buf881, buf882, buf879, buf880, buf898, buf899, buf893, buf894, buf891, buf892, buf910, buf911, buf908, buf909, buf922, buf923, buf920, buf921, buf939, buf940, buf934, buf935, buf932, buf933, buf951, buf952, buf949, buf950, buf967, buf968, buf963, buf964, buf961, buf962, buf979, buf980, buf977, buf978, buf996, buf997, buf991, buf992, buf989, buf990, buf1008, buf1009, buf1006, buf1007, buf1020, buf1021, buf1018, buf1019, buf1037, buf1038, buf1032, buf1033, buf1030, buf1031, buf1049, buf1050, buf1047, buf1048, buf1065, buf1066, buf1061, buf1062, buf1059, buf1060, buf1077, buf1078, buf1075, buf1076, buf1094, buf1095, buf1089, buf1090, buf1087, buf1088, buf1106, buf1107, buf1104, buf1105, buf1118, buf1119, buf1116, buf1117, buf1135, buf1136, buf1130, buf1131, buf1128, buf1129, buf1147, buf1148, buf1145, buf1146, buf1163, buf1164, buf1159, buf1160, buf1157, buf1158, buf1175, buf1176, buf1173, buf1174, buf1192, buf1193, buf1187, buf1188, buf1185, buf1186, buf1204, buf1205, buf1202, buf1203, buf1216, buf1217, buf1214, buf1215, buf1233, buf1234, buf1228, buf1229, buf1226, buf1227, buf1245, buf1246, buf1243, buf1244, buf1261, buf1262, buf1257, buf1258, buf1255, buf1256, buf1273, buf1274, buf1271, buf1272, buf1290, buf1291, buf1285, buf1286, buf1283, buf1284, buf1302, buf1303, buf1300, buf1301, buf1314, buf1315, buf1312, buf1313, buf1331, buf1332, buf1326, buf1327, buf1324, buf1325, buf1343, buf1344, buf1341, buf1342, buf1359, buf1360, buf1355, buf1356, buf1353, buf1354, buf1371, buf1372, buf1369, buf1370, buf1388, buf1389, buf1383, buf1384, buf1381, buf1382, buf1400, buf1401, buf1398, buf1399, buf1412, buf1413, buf1410, buf1411, buf1429, buf1430, buf1424, buf1425, buf1422, buf1423, buf1441, buf1442, buf1439, buf1440, buf1457, buf1458, buf1453, buf1454, buf1451, buf1452, buf1469, buf1470, buf1467, buf1468, buf1487, buf1488, buf1481, buf1482, buf1479, buf1480, buf1499, buf1500, buf1497, buf1498, buf1516, buf1517, buf1511, buf1512, buf1509, buf1510, buf1528, buf1529, buf1526, buf1527, buf1540, buf1541, buf1538, buf1539, buf1557, buf1558, buf1552, buf1553, buf1550, buf1551, buf1569, buf1570, buf1567, buf1568, buf1585, buf1586, buf1581, buf1582, buf1579, buf1580, buf1597, buf1598, buf1595, buf1596, buf1614, buf1615, buf1609, buf1610, buf1607, buf1608, buf1626, buf1627, buf1624, buf1625, buf1638, buf1639, buf1636, buf1637, buf1655, buf1656, buf1650, buf1651, buf1648, buf1649, buf1667, buf1668, buf1665, buf1666, buf1683, buf1684, buf1679, buf1680, buf1677, buf1678, buf1695, buf1696, buf1693, buf1694, buf1712, buf1713, buf1707, buf1708, buf1705, buf1706, buf1724, buf1725, buf1722, buf1723, buf1752, buf1753, buf1750, buf1751, buf1763, buf1764, buf1761, buf1762, buf1775, buf1776, buf1773, buf1774, buf1771, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_170, primals_187, primals_204, primals_221, primals_238, primals_262, primals_279, primals_296, primals_320, primals_337, primals_354, primals_378, primals_395, primals_412, primals_429, primals_453, primals_470, primals_487, primals_511, primals_528, primals_545, primals_569, primals_586, primals_603, primals_627, primals_644, primals_661, primals_678, primals_702, primals_719, primals_736, primals_760, primals_777, primals_794, primals_818, primals_835, primals_852, primals_876, primals_893, primals_910, primals_934, primals_951, primals_968, primals_992, primals_1009, primals_1026, primals_1043, primals_1067, primals_1084, primals_1101, primals_1125, primals_1142, primals_1159, buf5, buf17, buf18, buf25, buf26, buf34, buf35, buf41, buf43, buf44, buf61, buf56, buf63, buf64, buf72, buf73, buf84, buf85, buf91, buf92, buf100, buf101, buf118, buf113, buf121, buf122, buf131, buf148, buf143, buf150, buf151, buf160, buf171, buf172, buf189, buf184, buf191, buf192, buf200, buf201, buf212, buf213, buf219, buf220, buf228, buf229, buf246, buf241, buf248, buf249, buf258, buf269, buf270, buf287, buf282, buf289, buf290, buf298, buf299, buf310, buf311, buf317, buf318, buf326, buf327, buf344, buf339, buf346, buf347, buf356, buf367, buf368, buf385, buf380, buf388, buf389, buf397, buf398, buf409, buf410, buf416, buf417, buf425, buf426, buf443, buf438, buf446, buf447, buf456, buf473, buf468, buf475, buf476, buf485, buf496, buf497, buf514, buf509, buf516, buf517, buf525, buf526, buf537, buf538, buf544, buf545, buf553, buf554, buf571, buf566, buf573, buf574, buf583, buf594, buf595, buf612, buf607, buf614, buf615, buf623, buf624, buf635, buf636, buf642, buf643, buf651, buf652, buf669, buf664, buf671, buf672, buf681, buf692, buf693, buf710, buf705, buf712, buf713, buf721, buf722, buf733, buf734, buf740, buf741, buf749, buf750, buf767, buf762, buf769, buf770, buf779, buf790, buf791, buf808, buf803, buf810, buf811, buf819, buf820, buf831, buf832, buf838, buf839, buf847, buf848, buf865, buf860, buf868, buf869, buf878, buf895, buf890, buf897, buf898, buf907, buf918, buf919, buf936, buf931, buf938, buf939, buf947, buf948, buf959, buf960, buf966, buf967, buf975, buf976, buf993, buf988, buf995, buf996, buf1005, buf1016, buf1017, buf1034, buf1029, buf1036, buf1037, buf1045, buf1046, buf1057, buf1058, buf1064, buf1065, buf1073, buf1074, buf1091, buf1086, buf1093, buf1094, buf1103, buf1114, buf1115, buf1132, buf1127, buf1134, buf1135, buf1143, buf1144, buf1155, buf1156, buf1162, buf1163, buf1171, buf1172, buf1189, buf1184, buf1191, buf1192, buf1201, buf1212, buf1213, buf1230, buf1225, buf1232, buf1233, buf1241, buf1242, buf1253, buf1254, buf1260, buf1261, buf1269, buf1270, buf1287, buf1282, buf1289, buf1290, buf1299, buf1310, buf1311, buf1328, buf1323, buf1330, buf1331, buf1339, buf1340, buf1351, buf1352, buf1358, buf1359, buf1367, buf1368, buf1385, buf1380, buf1387, buf1388, buf1397, buf1408, buf1409, buf1426, buf1421, buf1428, buf1429, buf1437, buf1438, buf1449, buf1450, buf1456, buf1457, buf1465, buf1466, buf1483, buf1478, buf1486, buf1487, buf1496, buf1513, buf1508, buf1515, buf1516, buf1525, buf1536, buf1537, buf1554, buf1549, buf1556, buf1557, buf1565, buf1566, buf1577, buf1578, buf1584, buf1585, buf1593, buf1594, buf1611, buf1606, buf1613, buf1614, buf1623, buf1634, buf1635, buf1652, buf1647, buf1654, buf1655, buf1663, buf1664, buf1675, buf1676, buf1682, buf1683, buf1691, buf1692, buf1709, buf1704, buf1711, buf1712, buf1721, buf1733, buf1742, buf1748, buf1749, buf1760, buf1772, as_strided(buf1759, (1000, 2048), (2048, 1)), buf1777, buf1778, buf1779, buf1780, buf1781, buf1782, buf1783, buf1784, buf1785, buf1786, buf1787, buf1788, buf1789, buf1790, buf1791, buf1792, buf1793, buf1794, buf1795, buf1796, buf1797, buf1798, buf1799, buf1800, buf1801, buf1802, buf1803, buf1804, buf1805, buf1806, buf1807, buf1808, buf1809, buf1810, buf1811, buf1812, buf1813, buf1814, buf1815, buf1816, buf1817, buf1818, buf1819, buf1820, buf1821, buf1822, buf1823, buf1824, buf1825, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 163 ops
cuda train resnext50_32x4d                    ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/resnet.py", line 284, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/zx/czxdjqzfafoubcx2lj4sneafh74fnf7zsvzshykslm335hitakww.py", line 1569, in call
    return (buf1, buf2, buf7, buf8, buf11, buf12, buf15, buf16, buf18, buf19, buf23, buf24, buf27, buf28, buf31, buf32, buf35, buf36, buf39, buf40, buf43, buf44, buf47, buf48, buf51, buf52, buf55, buf56, buf58, buf59, buf63, buf64, buf67, buf68, buf71, buf72, buf75, buf76, buf79, buf80, buf83, buf84, buf87, buf88, buf91, buf92, buf95, buf96, buf99, buf100, buf103, buf104, buf107, buf108, buf110, buf111, buf115, buf116, buf119, buf120, buf123, buf124, buf127, buf128, buf131, buf132, buf135, buf136, buf139, buf140, buf143, buf144, buf147, buf148, buf151, buf152, buf155, buf156, buf159, buf160, buf163, buf164, buf167, buf168, buf171, buf172, buf175, buf176, buf179, buf180, buf183, buf184, buf186, buf187, buf191, buf192, buf195, buf196, buf199, buf200, buf203, buf204, buf207, buf208, buf211, buf212, buf216, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_321, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf56, buf57, buf58, buf59, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, buf107, buf108, buf109, buf110, buf111, buf113, buf114, buf115, buf116, buf117, buf118, buf119, buf120, buf121, buf122, buf123, buf124, buf125, buf126, buf127, buf128, buf129, buf130, buf131, buf132, buf133, buf134, buf135, buf136, buf137, buf138, buf139, buf140, buf141, buf142, buf143, buf144, buf145, buf146, buf147, buf148, buf149, buf150, buf151, buf152, buf153, buf154, buf155, buf156, buf157, buf158, buf159, buf160, buf161, buf162, buf163, buf164, buf165, buf166, buf167, buf168, buf169, buf170, buf171, buf172, buf173, buf174, buf175, buf176, buf177, buf178, buf179, buf180, buf181, buf182, buf183, buf184, buf185, buf186, buf187, buf189, buf190, buf191, buf192, buf193, buf194, buf195, buf196, buf197, buf198, buf199, buf200, buf201, buf202, buf203, buf204, buf205, buf206, buf207, buf208, buf209, buf210, buf211, buf212, as_strided(buf215, (4, 2048), (2048, 1)), as_strided(primals_160, (1000, 2048), (2048, 1)), buf217, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 175 ops
cuda train shufflenet_v2_x1_0                 ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/shufflenetv2.py", line 165, in forward
    def forward(self, x: Tensor) -> Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/wa/cwachfmlmnf5dzhuk226njm66oymuij2c4yd5cfwogyjcqeb2zlz.py", line 1833, in call
    return (buf1, buf2, buf7, buf8, buf11, buf12, buf15, buf16, buf19, buf20, buf23, buf24, buf29, buf30, buf33, buf34, buf37, buf38, buf44, buf45, buf48, buf49, buf52, buf53, buf59, buf60, buf63, buf64, buf67, buf68, buf74, buf75, buf78, buf79, buf82, buf83, buf86, buf87, buf90, buf91, buf96, buf97, buf100, buf101, buf104, buf105, buf111, buf112, buf115, buf116, buf119, buf120, buf126, buf127, buf130, buf131, buf134, buf135, buf141, buf142, buf145, buf146, buf149, buf150, buf156, buf157, buf160, buf161, buf164, buf165, buf171, buf172, buf175, buf176, buf179, buf180, buf186, buf187, buf190, buf191, buf194, buf195, buf201, buf202, buf205, buf206, buf209, buf210, buf213, buf214, buf217, buf218, buf223, buf224, buf227, buf228, buf231, buf232, buf238, buf239, buf242, buf243, buf246, buf247, buf253, buf254, buf257, buf258, buf261, buf262, buf268, buf269, buf273, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_160, primals_161, primals_163, primals_164, primals_166, primals_167, primals_339, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf94, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf109, buf110, buf111, buf112, buf113, buf114, buf115, buf116, buf117, buf118, buf119, buf120, buf124, buf125, buf126, buf127, buf128, buf129, buf130, buf131, buf132, buf133, buf134, buf135, buf139, buf140, buf141, buf142, buf143, buf144, buf145, buf146, buf147, buf148, buf149, buf150, buf154, buf155, buf156, buf157, buf158, buf159, buf160, buf161, buf162, buf163, buf164, buf165, buf169, buf170, buf171, buf172, buf173, buf174, buf175, buf176, buf177, buf178, buf179, buf180, buf184, buf185, buf186, buf187, buf188, buf189, buf190, buf191, buf192, buf193, buf194, buf195, buf199, buf200, buf201, buf202, buf203, buf204, buf205, buf206, buf208, buf209, buf210, buf211, buf212, buf213, buf214, buf215, buf216, buf217, buf218, buf221, buf222, buf223, buf224, buf225, buf226, buf227, buf228, buf229, buf230, buf231, buf232, buf236, buf237, buf238, buf239, buf240, buf241, buf242, buf243, buf244, buf245, buf246, buf247, buf251, buf252, buf253, buf254, buf255, buf256, buf257, buf258, buf259, buf260, buf261, buf262, buf266, buf267, buf268, buf269, buf272, as_strided(primals_169, (1000, 1024), (1024, 1)), buf274, buf275, buf276, buf277, buf278, buf279, buf280, buf281, buf282, buf283, buf284, buf285, buf286, buf287, buf288, buf289, buf290, buf291, buf292, buf293, s0, 28, 28, 14, 14, 7, 7, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 367 ops
/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/gym/core.py:317: DeprecationWarning: [33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: [33mWARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
cuda train soft_actor_critic                  ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 360, in <graph break in forward_and_backward_pass>
    loss = self.compute_loss(pred)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 350, in compute_loss
    return reduce_to_scalar_loss(pred)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/testing.py", line 105, in reduce_to_scalar_loss
    return out.mean.sum()
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/soft_actor_critic/nets.py", line 255, in mean
    mu = tr(mu)
  File "/data/users/ezyang/a/pytorch/torch/distributions/transforms.py", line 156, in __call__
    y = self._call(x)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 20 ops
cuda train speech_transformer                 ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/transformer.py", line 28, in forward
    encoder_padded_outputs, *_ = self.encoder(padded_input, input_lengths)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 48, in forward
    non_pad_mask = get_non_pad_mask(padded_input, input_lengths=input_lengths)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 50, in <graph break in forward>
    slf_attn_mask = get_attn_pad_mask(padded_input, input_lengths, length)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 11 ops
cuda train squeezenet1_1                      ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/squeezenet.py", line 94, in forward
    def forward(self, x: torch.Tensor) -> torch.Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/xk/cxkeuuj4s5eu77f77phnxynep4k5uigmbjpwkjhppcyvtzsxjzmi.py", line 678, in call
    return (as_strided(buf66, (4, 1000), (1000, 1)), primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, primals_49, primals_51, primals_53, buf1, buf2, buf3, buf5, buf10, buf12, buf17, buf18, buf19, buf21, buf26, buf28, buf33, buf34, buf35, buf37, buf42, buf44, buf49, buf51, buf56, buf58, buf63, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, s0, 13, 13, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 66 ops
cuda train tacotron2                          ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 4 ops
cuda train timm_efficientdet                  ERROR:common:

from user code:
   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/effdet/efficientdet.py", line 211, in forward
    input_node = resample(input_node)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/effdet/efficientdet.py", line 134, in forward
    return F.interpolate(

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/functional.py", line 3924, in interpolate
    return torch._C._nn.upsample_nearest2d(input, output_size, scale_factors)
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1046, in get_fake_value
    return wrap_fake_exception(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 712, in wrap_fake_exception
    return fn()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1047, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <function interpolate at 0x7f1cbc6a6ee0>(*(FakeTensor(FakeTensor(..., device='meta',
           size=(s0, 88, ceiling(ceiling(ceiling(ceiling(ceiling(ceiling(ceiling(s2/2)/2)/2)/2)/2)/2)/2), ceiling(ceiling(ceiling(ceiling(ceiling(ceiling(ceiling(s2/2)/2)/2)/2)/2)/2)/2)),
           grad_fn=<MaxPool2DWithIndicesBackward0>), cuda:0), (10, 10), None, 'nearest', None), **{'recompute_scale_factor': False}):
Cannot call sizes() on tensor with symbolic sizes/strides
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 182, in call_function
    tx.call_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 220, in call_function
    return tx.inline_user_function_return(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 466, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1750, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1805, in inline_call_
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1005, in CALL_FUNCTION_KW
    self.call_function(fn, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/torch.py", line 444, in call_function
    tensor_variable = wrap_fx_proxy(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 729, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 766, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/effdet/efficientdet.py", line 211, in forward
    input_node = resample(input_node)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/effdet/efficientdet.py", line 134, in forward
    return F.interpolate(

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 0 ops
cuda train timm_efficientnet                  ERROR:common:compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_214 : [#users=1] = placeholder[target=primals_214]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'SymInt'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in <listcomp>
    size = [sympy.Integer(i) for i in ex.size()]
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 74, in wrapper
    retval = func(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/data/users/ezyang/a/pytorch/torch/__init__.py", line 242, in __int__
    return self.node.int_()
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/symbolic_shapes.py", line 210, in int_
    raise RuntimeError("Trying to extract a concrete int out of a symbolic int")
RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_214 : [#users=1] = placeholder[target=primals_214]
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_214 : [#users=1] = placeholder[target=primals_214]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 313 ops
cuda train timm_regnet                        ERROR:common:compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_265 : [#users=1] = placeholder[target=primals_265]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'SymInt'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in <listcomp>
    size = [sympy.Integer(i) for i in ex.size()]
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 74, in wrapper
    retval = func(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/data/users/ezyang/a/pytorch/torch/__init__.py", line 242, in __int__
    return self.node.int_()
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/symbolic_shapes.py", line 210, in int_
    raise RuntimeError("Trying to extract a concrete int out of a symbolic int")
RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_265 : [#users=1] = placeholder[target=primals_265]
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_265 : [#users=1] = placeholder[target=primals_265]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 458 ops
cuda train timm_resnest                       ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/timm/models/resnet.py", line 716, in forward
    def forward(self, x):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/k2/ck2u24qupvv2picybie2m4mekfm6wvvpboaxlvb76jq2jikkuwka.py", line 2178, in call
    return (buf1, buf2, buf5, buf6, buf9, buf10, buf15, buf16, buf19, buf20, buf26, buf27, buf34, buf35, buf37, buf38, buf42, buf43, buf46, buf47, buf53, buf54, buf62, buf63, buf66, buf67, buf71, buf72, buf75, buf76, buf82, buf83, buf91, buf92, buf95, buf96, buf100, buf101, buf104, buf105, buf111, buf112, buf120, buf121, buf124, buf125, buf129, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_18, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_36, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_54, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_72, primals_74, primals_76, primals_77, primals_79, primals_80, primals_153, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf23, buf25, buf26, buf27, buf28, buf31, buf33, buf34, buf35, buf36, buf37, buf38, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf50, buf52, buf53, buf54, buf55, buf58, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf79, buf81, buf82, buf83, buf84, buf87, buf89, buf90, buf91, buf92, buf93, buf94, buf95, buf96, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, buf108, buf110, buf111, buf112, buf113, buf116, buf118, buf119, buf120, buf121, buf122, buf123, buf124, buf125, as_strided(buf128, (4, 2048), (2048, 1)), as_strided(primals_82, (1000, 2048), (2048, 1)), buf130, s0, 56, 56, 128, 56, 56, 256, 28, 28, 512, 14, 14, 1024, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 180 ops
cuda train timm_vision_transformer            ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/timm/models/vision_transformer.py", line 449, in forward
    def forward(self, x):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/d5/cd5yxyrdmu3enl64j7qamsdiqvonep4pu6qxgfruo7s6dwa3oz7w.py", line 1690, in call
    return (buf304, primals_2, primals_3, primals_5, primals_6, primals_11, primals_12, primals_17, primals_18, primals_23, primals_24, primals_29, primals_30, primals_35, primals_36, primals_41, primals_42, primals_47, primals_48, primals_53, primals_54, primals_59, primals_60, primals_65, primals_66, primals_71, primals_72, primals_77, primals_78, primals_83, primals_84, primals_89, primals_90, primals_95, primals_96, primals_101, primals_102, primals_107, primals_108, primals_113, primals_114, primals_119, primals_120, primals_125, primals_126, primals_131, primals_132, primals_137, primals_138, primals_143, primals_144, primals_149, primals_153, buf3, buf7, buf8, as_strided(buf11, (24, 197, 64), (12608, 64, 1)), buf16, as_strided(buf19, (788, 384), (384, 1)), buf24, as_strided(buf26, (4, 197, 1536), (302592, 1536, 1)), buf32, as_strided(buf35, (24, 197, 64), (12608, 64, 1)), buf40, as_strided(buf43, (788, 384), (384, 1)), buf49, as_strided(buf51, (4, 197, 1536), (302592, 1536, 1)), buf57, as_strided(buf60, (24, 197, 64), (12608, 64, 1)), buf65, as_strided(buf68, (788, 384), (384, 1)), buf73, as_strided(buf75, (4, 197, 1536), (302592, 1536, 1)), buf81, as_strided(buf84, (24, 197, 64), (12608, 64, 1)), buf89, as_strided(buf92, (788, 384), (384, 1)), buf98, as_strided(buf100, (4, 197, 1536), (302592, 1536, 1)), buf106, as_strided(buf109, (24, 197, 64), (12608, 64, 1)), buf114, as_strided(buf117, (788, 384), (384, 1)), buf122, as_strided(buf124, (4, 197, 1536), (302592, 1536, 1)), buf130, as_strided(buf133, (24, 197, 64), (12608, 64, 1)), buf138, as_strided(buf141, (788, 384), (384, 1)), buf147, as_strided(buf149, (4, 197, 1536), (302592, 1536, 1)), buf155, as_strided(buf158, (24, 197, 64), (12608, 64, 1)), buf163, as_strided(buf166, (788, 384), (384, 1)), buf171, as_strided(buf173, (4, 197, 1536), (302592, 1536, 1)), buf179, as_strided(buf182, (24, 197, 64), (12608, 64, 1)), buf187, as_strided(buf190, (788, 384), (384, 1)), buf196, as_strided(buf198, (4, 197, 1536), (302592, 1536, 1)), buf204, as_strided(buf207, (24, 197, 64), (12608, 64, 1)), buf212, as_strided(buf215, (788, 384), (384, 1)), buf220, as_strided(buf222, (4, 197, 1536), (302592, 1536, 1)), buf228, as_strided(buf231, (24, 197, 64), (12608, 64, 1)), buf236, as_strided(buf239, (788, 384), (384, 1)), buf245, as_strided(buf247, (4, 197, 1536), (302592, 1536, 1)), buf253, as_strided(buf256, (24, 197, 64), (12608, 64, 1)), buf261, as_strided(buf264, (788, 384), (384, 1)), buf269, as_strided(buf271, (4, 197, 1536), (302592, 1536, 1)), buf277, as_strided(buf280, (24, 197, 64), (12608, 64, 1)), buf285, as_strided(buf288, (788, 384), (384, 1)), buf294, as_strided(buf296, (4, 197, 1536), (302592, 1536, 1)), buf302, as_strided(buf303, (4, 384), (75648, 1)), as_strided(primals_151, (1000, 384), (384, 1)), buf305, as_strided(primals_147, (384, 1536), (1536, 1)), as_strided(primals_145, (1536, 384), (384, 1)), buf306, as_strided(primals_141, (384, 384), (384, 1)), as_strided(buf286, (24, 64, 197), (12608, 1, 64)), as_strided(buf281, (24, 197, 64), (12608, 1, 197)), as_strided(primals_139, (1152, 384), (384, 1)), buf307, as_strided(primals_135, (384, 1536), (1536, 1)), as_strided(primals_133, (1536, 384), (384, 1)), buf308, as_strided(primals_129, (384, 384), (384, 1)), as_strided(buf262, (24, 64, 197), (12608, 1, 64)), as_strided(buf257, (24, 197, 64), (12608, 1, 197)), as_strided(primals_127, (1152, 384), (384, 1)), buf309, as_strided(primals_123, (384, 1536), (1536, 1)), as_strided(primals_121, (1536, 384), (384, 1)), buf310, as_strided(primals_117, (384, 384), (384, 1)), as_strided(buf237, (24, 64, 197), (12608, 1, 64)), as_strided(buf232, (24, 197, 64), (12608, 1, 197)), as_strided(primals_115, (1152, 384), (384, 1)), buf311, as_strided(primals_111, (384, 1536), (1536, 1)), as_strided(primals_109, (1536, 384), (384, 1)), buf312, as_strided(primals_105, (384, 384), (384, 1)), as_strided(buf213, (24, 64, 197), (12608, 1, 64)), as_strided(buf208, (24, 197, 64), (12608, 1, 197)), as_strided(primals_103, (1152, 384), (384, 1)), buf313, as_strided(primals_99, (384, 1536), (1536, 1)), as_strided(primals_97, (1536, 384), (384, 1)), buf314, as_strided(primals_93, (384, 384), (384, 1)), as_strided(buf188, (24, 64, 197), (12608, 1, 64)), as_strided(buf183, (24, 197, 64), (12608, 1, 197)), as_strided(primals_91, (1152, 384), (384, 1)), buf315, as_strided(primals_87, (384, 1536), (1536, 1)), as_strided(primals_85, (1536, 384), (384, 1)), buf316, as_strided(primals_81, (384, 384), (384, 1)), as_strided(buf164, (24, 64, 197), (12608, 1, 64)), as_strided(buf159, (24, 197, 64), (12608, 1, 197)), as_strided(primals_79, (1152, 384), (384, 1)), buf317, as_strided(primals_75, (384, 1536), (1536, 1)), as_strided(primals_73, (1536, 384), (384, 1)), buf318, as_strided(primals_69, (384, 384), (384, 1)), as_strided(buf139, (24, 64, 197), (12608, 1, 64)), as_strided(buf134, (24, 197, 64), (12608, 1, 197)), as_strided(primals_67, (1152, 384), (384, 1)), buf319, as_strided(primals_63, (384, 1536), (1536, 1)), as_strided(primals_61, (1536, 384), (384, 1)), buf320, as_strided(primals_57, (384, 384), (384, 1)), as_strided(buf115, (24, 64, 197), (12608, 1, 64)), as_strided(buf110, (24, 197, 64), (12608, 1, 197)), as_strided(primals_55, (1152, 384), (384, 1)), buf321, as_strided(primals_51, (384, 1536), (1536, 1)), as_strided(primals_49, (1536, 384), (384, 1)), buf322, as_strided(primals_45, (384, 384), (384, 1)), as_strided(buf90, (24, 64, 197), (12608, 1, 64)), as_strided(buf85, (24, 197, 64), (12608, 1, 197)), as_strided(primals_43, (1152, 384), (384, 1)), buf323, as_strided(primals_39, (384, 1536), (1536, 1)), as_strided(primals_37, (1536, 384), (384, 1)), buf324, as_strided(primals_33, (384, 384), (384, 1)), as_strided(buf66, (24, 64, 197), (12608, 1, 64)), as_strided(buf61, (24, 197, 64), (12608, 1, 197)), as_strided(primals_31, (1152, 384), (384, 1)), buf325, as_strided(primals_27, (384, 1536), (1536, 1)), as_strided(primals_25, (1536, 384), (384, 1)), buf326, as_strided(primals_21, (384, 384), (384, 1)), as_strided(buf41, (24, 64, 197), (12608, 1, 64)), as_strided(buf36, (24, 197, 64), (12608, 1, 197)), as_strided(primals_19, (1152, 384), (384, 1)), buf327, as_strided(primals_15, (384, 1536), (1536, 1)), as_strided(primals_13, (1536, 384), (384, 1)), buf328, as_strided(primals_9, (384, 384), (384, 1)), as_strided(buf17, (24, 64, 197), (12608, 1, 64)), as_strided(buf12, (24, 197, 64), (12608, 1, 197)), as_strided(primals_7, (1152, 384), (384, 1)), s0, 14, 14, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, 197, 6, 6*s0, 384, 197, 197*s0, s0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 196, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 441 ops
cuda train timm_vision_transformer_large      PASS
Dynamo produced 0 graph(s) covering 0 ops
cuda train timm_vovnet                        ERROR:common:compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_120 : [#users=1] = placeholder[target=primals_120]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'SymInt'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in <listcomp>
    size = [sympy.Integer(i) for i in ex.size()]
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 74, in wrapper
    retval = func(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/data/users/ezyang/a/pytorch/torch/__init__.py", line 242, in __int__
    return self.node.int_()
  File "/data/users/ezyang/a/pytorch/torch/fx/experimental/symbolic_shapes.py", line 210, in int_
    raise RuntimeError("Trying to extract a concrete int out of a symbolic int")
RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_120 : [#users=1] = placeholder[target=primals_120]
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised RuntimeError: Trying to extract a concrete int out of a symbolic int

While executing %primals_120 : [#users=1] = placeholder[target=primals_120]
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 169 ops
cuda train tts_angular                        ERROR:common:

from user code:
   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/tts_angular/model.py", line 18, in <graph break in forward>
    o, (_, _) = self.lstm(x)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1092, in run_node
    return nnmodule(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/rnn.py", line 776, in forward
    result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1046, in get_fake_value
    return wrap_fake_exception(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 712, in wrap_fake_exception
    return fn()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1047, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_module self_lstm(*(FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, 40)), cuda:0),), **{}):
Cannot call sizes() on tensor with symbolic sizes/strides
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/tts_angular/model.py", line 59, in forward
    d = self.layers(x)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/container.py", line 204, in forward
    input = module(input)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/tts_angular/model.py", line 17, in forward
    self.lstm.flatten_parameters()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 305, in wrapper
    return inner_fn(self, inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 956, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 430, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/nn_module.py", line 201, in call_function
    return wrap_fx_proxy(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 729, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/variables/builder.py", line 766, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/tts_angular/model.py", line 18, in <graph break in forward>
    o, (_, _) = self.lstm(x)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 0 ops
cuda train vgg16                              ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchvision/torchvision/models/vgg.py", line 65, in forward
    def forward(self, x: torch.Tensor) -> torch.Tensor:
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/4b/c4bucu2bcj7r2kirrua5nbo4simoi6tw43w5hasphd5ltxvo7gjj.py", line 461, in call
    return (buf36, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_33, buf1, buf3, buf4, buf37, buf6, buf8, buf9, buf38, buf11, buf13, buf15, buf16, buf39, buf18, buf20, buf22, buf23, buf40, buf25, buf27, buf29, buf30, buf41, as_strided(buf31, (4, 25088), (25088, 1)), buf33, buf35, as_strided(primals_31, (1000, 4096), (4096, 1)), as_strided(primals_29, (4096, 4096), (4096, 1)), as_strided(primals_27, (4096, 25088), (25088, 1)), s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 40 ops
cuda train vision_maskrcnn                    Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 373, in <module>
    main(TorchBenchmarkRunner(), original_dir)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1670, in main
    return maybe_fresh_cache(run, args.cold_start_latency and args.only)(
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 842, in inner
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 2044, in run
    runner.run_one_model(
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1333, in run_one_model
    status = self.check_accuracy(
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1139, in check_accuracy
    if not same(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 771, in same
    return len(ref) == len(res) and all(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 772, in <genexpr>
    same(ai, bi, fp64_refi, cos_similarity, tol, equal_nan, exact_dtype)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 771, in same
    return len(ref) == len(res) and all(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 772, in <genexpr>
    same(ai, bi, fp64_refi, cos_similarity, tol, equal_nan, exact_dtype)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 782, in same
    same(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 839, in same
    ref_error = rmse(fp64_ref, ref).item()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 754, in rmse
    return torch.sqrt(torch.mean(torch.square(ref - res)))
RuntimeError: The size of tensor a (28) must match the size of tensor b (29) at non-singleton dimension 0
cuda train yolov3                             [2022-12-12 07:03:14,612] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <built-in function truediv>
  args[0]: 25165824
  args[1]: 32
ERROR:common:compile_fx raised LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 25165824
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: truediv expected 2 arguments, got 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 25165824
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 356, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 357, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/torchbench.py", line 359, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/yolov3/yolo_models.py", line 238, in forward
    return self.forward_once(x)
  File "/data/users/ezyang/a/torchbenchmark/torchbenchmark/models/yolov3/yolo_models.py", line 292, in forward_once
    x = module(x)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 25165824
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 3 ops
ERROR
cuda train AlbertForMaskedLM                  ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 990, in forward
    outputs = self.albert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 737, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 560 ops
cuda train AlbertForQuestionAnswering         ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 1274, in forward
    outputs = self.albert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 737, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 560 ops
cuda train AllenaiLongformerBase              [2022-12-12 07:05:06,965] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1813, in forward
    outputs = self.longformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1696, in forward
    padding_len, input_ids, attention_mask, token_type_ids, position_ids, inputs_embeds = self._pad_to_window_size(
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1715, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 30 ops
cuda train BartForCausalLM                    [2022-12-12 07:05:26,654] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1839, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1098, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 418, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 49 ops
cuda train BartForConditionalGeneration       ERROR:common:compile_fx raised LoweringException: TypeError: Argument of Integer should be of numeric type, got s0.
  target: aten.new_zeros.default
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='arg0_1', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: [1, s0]
  kwargs: {'dtype': torch.int64, 'layout': torch.strided, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %new_zeros : [#users=5] = call_function[target=torch.ops.aten.new_zeros.default](args = (%arg0_1, [%sym_size, %sym_size_1]), kwargs = {dtype: torch.int64, layout: torch.strided, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 79, in shift_tokens_right
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1672, in _new_constant
    size = [sympy.Integer(s) for s in size]
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1672, in <listcomp>
    size = [sympy.Integer(s) for s in size]
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/numbers.py", line 2097, in __new__
    raise TypeError(
TypeError: Argument of Integer should be of numeric type, got s0.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: Argument of Integer should be of numeric type, got s0.
  target: aten.new_zeros.default
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='arg0_1', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: [1, s0]
  kwargs: {'dtype': torch.int64, 'layout': torch.strided, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %new_zeros : [#users=5] = call_function[target=torch.ops.aten.new_zeros.default](args = (%arg0_1, [%sym_size, %sym_size_1]), kwargs = {dtype: torch.int64, layout: torch.strided, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 79, in shift_tokens_right
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1349, in forward
    decoder_input_ids = shift_tokens_right(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: Argument of Integer should be of numeric type, got s0.
  target: aten.new_zeros.default
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='arg0_1', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: [1, s0]
  kwargs: {'dtype': torch.int64, 'layout': torch.strided, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %new_zeros : [#users=5] = call_function[target=torch.ops.aten.new_zeros.default](args = (%arg0_1, [%sym_size, %sym_size_1]), kwargs = {dtype: torch.int64, layout: torch.strided, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 79, in shift_tokens_right
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 8 ops
cuda train BertForMaskedLM                    ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1351, in forward
    outputs = self.bert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1018, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 552 ops
cuda train BertForQuestionAnswering           ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1847, in forward
    outputs = self.bert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1018, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 552 ops
cuda train BlenderbotForCausalLM              PASS
Dynamo produced 0 graph(s) covering 0 ops
cuda train BlenderbotSmallForCausalLM         [2022-12-12 07:08:41,319] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 1529, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 1033, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 407, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 38 ops
cuda train BlenderbotSmallForConditionalGeneration ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 1292, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 1155, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 780, in forward
    layer_outputs = encoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/blenderbot_small/modeling_blenderbot_small.py", line 311, in forward
    hidden_states, attn_weights, _ = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 25 ops
cuda train CamemBert                          ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 1095, in forward
    outputs = self.roberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 5 ops
cuda train DebertaForMaskedLM                 ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 1041, in forward
    outputs = self.deberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 946, in forward
    embedding_output = self.embeddings(
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 954, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 414, in forward
    attention_mask = self.get_attention_mask(attention_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 24 ops
cuda train DebertaForQuestionAnswering        ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 1369, in forward
    outputs = self.deberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 946, in forward
    embedding_output = self.embeddings(
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 954, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta/modeling_deberta.py", line 414, in forward
    attention_mask = self.get_attention_mask(attention_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 24 ops
cuda train DebertaV2ForMaskedLM               PASS
Dynamo produced 0 graph(s) covering 0 ops
cuda train DebertaV2ForQuestionAnswering      ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta_v2/modeling_deberta_v2.py", line 1469, in forward
    outputs = self.deberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta_v2/modeling_deberta_v2.py", line 1042, in forward
    embedding_output = self.embeddings(
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta_v2/modeling_deberta_v2.py", line 1050, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/deberta_v2/modeling_deberta_v2.py", line 465, in forward
    attention_mask = self.get_attention_mask(attention_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 12 ops
WARNING:__main__:Sequence Length not defined for DistilBertForMaskedLM. Choosing 128 arbitrarily
cuda train DistilBertForMaskedLM              ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 649, in forward
    dlbrt_output = self.distilbert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 2 ops
WARNING:__main__:Sequence Length not defined for DistilBertForQuestionAnswering. Choosing 128 arbitrarily
cuda train DistilBertForQuestionAnswering     ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 868, in forward
    distilbert_output = self.distilbert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 2 ops
cuda train DistillGPT2                        ERROR:common:compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 493, in codegen
    assert shape in added, f"{shape} is needed but not added"
AssertionError: s1 is needed but not added

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1048, in forward
    transformer_outputs = self.transformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 891, in forward
    outputs = block(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 391, in forward
    attn_outputs = self.attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 332, in forward
    attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 33 ops
If you want to use `ElectraForCausalLM` as a standalone, add `is_decoder=True.`
cuda train ElectraForCausalLM                 ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/electra/modeling_electra.py", line 1621, in forward
    outputs = self.electra(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/electra/modeling_electra.py", line 917, in forward
    hidden_states = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 551 ops
cuda train ElectraForQuestionAnswering        ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/electra/modeling_electra.py", line 1390, in forward
    discriminator_hidden_states = self.electra(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/electra/modeling_electra.py", line 917, in forward
    hidden_states = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 551 ops
cuda train GPT2ForSequenceClassification      ERROR:common:compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 493, in codegen
    assert shape in added, f"{shape} is needed but not added"
AssertionError: s1 is needed but not added

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1378, in forward
    transformer_outputs = self.transformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 891, in forward
    outputs = block(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 391, in forward
    attn_outputs = self.attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 332, in forward
    attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: s1 is needed but not added

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 33 ops
cuda train GoogleFnet                         ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/fnet/modeling_fnet.py", line 763, in forward
    outputs = self.fnet(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/fnet/modeling_fnet.py", line 604, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/fnet/modeling_fnet.py", line 308, in forward
    layer_outputs = layer_module(hidden_states)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/fnet/modeling_fnet.py", line 267, in forward
    self_fourier_outputs = self.fourier(hidden_states)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/fnet/modeling_fnet.py", line 220, in forward
    self_outputs = self.self(hidden_states)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 8 ops
cuda train LayoutLMForMaskedLM                ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/layoutlm/modeling_layoutlm.py", line 935, in forward
    outputs = self.layoutlm(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/layoutlm/modeling_layoutlm.py", line 820, in forward
    embedding_output = self.embeddings(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 14 ops
cuda train LayoutLMForSequenceClassification  ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/layoutlm/modeling_layoutlm.py", line 1057, in forward
    outputs = self.layoutlm(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/layoutlm/modeling_layoutlm.py", line 820, in forward
    embedding_output = self.embeddings(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 14 ops
WARNING:__main__:Sequence Length not defined for M2M100ForConditionalGeneration. Choosing 128 arbitrarily
cuda train M2M100ForConditionalGeneration     [2022-12-12 07:14:30,771] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: aten.sym_stride
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: 0
[2022-12-12 07:14:30,775] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.sym_stride
ERROR:common:compile_fx raised LoweringException: AssertionError: FallbackKernel output type is not supported
  target: aten.sym_stride
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: 0

While executing %sym_stride : [#users=1] = call_function[target=torch.ops.aten.sym_stride](args = (%primals_2, 0), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3048, in create
    return generate_output(example_output)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3045, in generate_output
    assert output is None, "FallbackKernel output type is not supported"
AssertionError: FallbackKernel output type is not supported

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: AssertionError: FallbackKernel output type is not supported
  target: aten.sym_stride
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: 0

While executing %sym_stride : [#users=1] = call_function[target=torch.ops.aten.sym_stride](args = (%primals_2, 0), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/m2m_100/modeling_m2m_100.py", line 1317, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/m2m_100/modeling_m2m_100.py", line 1190, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: AssertionError: FallbackKernel output type is not supported
  target: aten.sym_stride
  args[0]: TensorBox(StorageBox(
    InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.int64, size=[1, s0], stride=[s0, 1]))
  ))
  args[1]: 0

While executing %sym_stride : [#users=1] = call_function[target=torch.ops.aten.sym_stride](args = (%primals_2, 0), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 5 ops
cuda train MBartForCausalLM                   [2022-12-12 07:14:49,201] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 1836, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 1095, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 426, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 4 graph(s) covering 49 ops
cuda train MBartForConditionalGeneration      [2022-12-12 07:15:46,834] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 1346, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 1229, in forward
    decoder_outputs = self.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 1095, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mbart/modeling_mbart.py", line 426, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 42 graph(s) covering 455 ops
WARNING:__main__:Sequence Length not defined for MT5ForConditionalGeneration. Choosing 128 arbitrarily
cuda train MT5ForConditionalGeneration        WARNING:common:fp64 golden ref were not generated for MT5ForConditionalGeneration. Setting accuracy check to cosine
ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 1282 ops
If you want to use `MegatronBertForCausalLM` as a standalone, add `is_decoder=True.`
cuda train MegatronBertForCausalLM            ERROR:common:'int' object has no attribute 'size'

While executing %sym_size : [#users=3] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_cls': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertOnlyMLMHead'>", 'self_cls_predictions': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLMPredictionHead'>", 'self_cls_predictions_transform': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertPredictionHeadTransform'>", 'self_cls_predictions_transform_dense': "<class 'torch.nn.modules.linear.Linear'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 638, in forward
    hidden_states = self.dense(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 660, in forward
    hidden_states = self.transform(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 672, in forward
    prediction_scores = self.predictions(sequence_output)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 1218, in <graph break in forward>
    prediction_scores = self.cls(sequence_output)

Gradient addition node due to multiple use of tensor around:
Module stack: {'self_encoder': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertEncoder'>", 'self_encoder_layer_0': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLayer'>", 'self_encoder_layer_0_attention': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertAttention'>", 'self_encoder_layer_0_attention_ln': "<class 'torch.nn.modules.normalization.LayerNorm'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 370, in forward
    ln_outputs = self.ln(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 443, in forward
    self_attention_outputs = self.attention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 562, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 976, in <graph break in forward>
    encoder_outputs = self.encoder(
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 485, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'

While executing %sym_size : [#users=3] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_cls': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertOnlyMLMHead'>", 'self_cls_predictions': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLMPredictionHead'>", 'self_cls_predictions_transform': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertPredictionHeadTransform'>", 'self_cls_predictions_transform_dense': "<class 'torch.nn.modules.linear.Linear'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 638, in forward
    hidden_states = self.dense(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 660, in forward
    hidden_states = self.transform(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 672, in forward
    prediction_scores = self.predictions(sequence_output)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 1218, in <graph break in forward>
    prediction_scores = self.cls(sequence_output)

Gradient addition node due to multiple use of tensor around:
Module stack: {'self_encoder': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertEncoder'>", 'self_encoder_layer_0': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLayer'>", 'self_encoder_layer_0_attention': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertAttention'>", 'self_encoder_layer_0_attention_ln': "<class 'torch.nn.modules.normalization.LayerNorm'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 370, in forward
    ln_outputs = self.ln(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 443, in forward
    self_attention_outputs = self.attention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 562, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 976, in <graph break in forward>
    encoder_outputs = self.encoder(

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 4 graph(s) covering 1105 ops
cuda train MegatronBertForQuestionAnswering   ERROR:common:'int' object has no attribute 'size'

While executing %sym_size : [#users=1] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_qa_outputs': "<class 'torch.nn.modules.linear.Linear'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 1813, in <graph break in forward>
    logits = self.qa_outputs(sequence_output)

Gradient addition node due to multiple use of tensor around:
Module stack: {'self_encoder': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertEncoder'>", 'self_encoder_layer_0': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLayer'>", 'self_encoder_layer_0_attention': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertAttention'>", 'self_encoder_layer_0_attention_ln': "<class 'torch.nn.modules.normalization.LayerNorm'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 370, in forward
    ln_outputs = self.ln(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 443, in forward
    self_attention_outputs = self.attention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 562, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 976, in <graph break in forward>
    encoder_outputs = self.encoder(
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 485, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/data/users/ezyang/a/pytorch/torch/_tensor.py", line 484, in backward
    torch.autograd.backward(
  File "/data/users/ezyang/a/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 272, in apply
    return user_fn(self, *args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1683, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 68, in _wrapped_bw_compiler
    return eval_frame.disable(eval_frame.disable(bw_compiler)(*args, **kwargs))
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 380, in bw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 231, in placeholder
    sizes, strides = self.static_sizes_strides(example)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 62, in static_sizes_strides
    size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'

While executing %sym_size : [#users=1] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_qa_outputs': "<class 'torch.nn.modules.linear.Linear'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 1813, in <graph break in forward>
    logits = self.qa_outputs(sequence_output)

Gradient addition node due to multiple use of tensor around:
Module stack: {'self_encoder': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertEncoder'>", 'self_encoder_layer_0': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertLayer'>", 'self_encoder_layer_0_attention': "<class 'transformers.models.megatron_bert.modeling_megatron_bert.MegatronBertAttention'>", 'self_encoder_layer_0_attention_ln': "<class 'torch.nn.modules.normalization.LayerNorm'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 370, in forward
    ln_outputs = self.ln(hidden_states)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 443, in forward
    self_attention_outputs = self.attention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 562, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/megatron_bert/modeling_megatron_bert.py", line 976, in <graph break in forward>
    encoder_outputs = self.encoder(

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 4 graph(s) covering 1111 ops
cuda train MobileBertForMaskedLM              ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mobilebert/modeling_mobilebert.py", line 1089, in forward
    outputs = self.mobilebert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 7 ops
cuda train MobileBertForQuestionAnswering     ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/mobilebert/modeling_mobilebert.py", line 1395, in forward
    outputs = self.mobilebert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 7 ops
cuda train OPTForCausalLM                     [2022-12-12 07:20:19,445] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
[2022-12-12 07:20:20,338] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 952, in aot_dispatch_base
    compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 437, in codegen
    self.init_wrapper_code()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 431, in init_wrapper_code
    self.wrapper_code = WrapperCodeGen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 302, in __init__
    self.write_prefix()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/wrapper.py", line 335, in write_prefix
    V.graph.sizevars.codegen(self.wrapper_call, V.graph.graph_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 494, in codegen
    assert not needed
AssertionError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/opt/modeling_opt.py", line 918, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/opt/modeling_opt.py", line 622, in forward
    attention_mask = self._prepare_decoder_attention_mask(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError:

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 32 ops
cuda train PLBartForCausalLM                  [2022-12-12 07:20:34,579] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 1680, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 1070, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 424, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 39 ops
cuda train PLBartForConditionalGeneration     ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 1314, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 1182, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 817, in forward
    layer_outputs = encoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/plbart/modeling_plbart.py", line 328, in forward
    hidden_states, attn_weights, _ = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 39 ops
WARNING:__main__:Sequence Length not defined for PegasusForCausalLM. Choosing 128 arbitrarily
cuda train PegasusForCausalLM                 ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 84, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 877, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1023, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 84, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 877, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1023, in forward
    attention_mask = self._prepare_decoder_attention_mask(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1659, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 84, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 877, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1023, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 21 ops
WARNING:__main__:Sequence Length not defined for PegasusForConditionalGeneration. Choosing 128 arbitrarily
cuda train PegasusForConditionalGeneration    ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1399, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 1238, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 763, in forward
    embed_pos = self.embed_positions(input_shape)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 807, in <graph break in forward>
    layer_outputs = encoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/pegasus/modeling_pegasus.py", line 329, in forward
    hidden_states, attn_weights, _ = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 32 ops
If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
cuda train RobertaForCausalLM                 [2022-12-12 07:22:10,493] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 971, in forward
    outputs = self.roberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 848, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 565 ops
cuda train RobertaForQuestionAnswering        [2022-12-12 07:22:48,672] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 1513, in forward
    outputs = self.roberta(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 848, in forward
    encoder_outputs = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 565 ops
WARNING:__main__:Sequence Length not defined for Speech2Text2ForCausalLM. Choosing 128 arbitrarily
cuda train Speech2Text2ForCausalLM            ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 53, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 496, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 613, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 53, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 496, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 613, in forward
    attention_mask = self._prepare_decoder_attention_mask(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 910, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 53, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 496, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py", line 613, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 21 ops
cuda train T5ForConditionalGeneration         WARNING:common:fp64 golden ref were not generated for T5ForConditionalGeneration. Setting accuracy check to cosine
ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 885 ops
cuda train T5Small                            WARNING:common:fp64 golden ref were not generated for T5Small. Setting accuracy check to cosine
ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_encoder': "<class 'transformers.models.t5.modeling_t5.T5Stack'>", 'self_encoder_block_0': "<class 'transformers.models.t5.modeling_t5.T5Block'>", 'sub0_0': "<class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>", 'self_encoder_block_0_layer_0_SelfAttention': "<class 'transformers.models.t5.modeling_t5.T5Attention'>"}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
    context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
    layer_outputs = layer_module(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 885 ops
cuda train TrOCRForCausalLM                   [2022-12-12 07:25:07,098] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 136, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 503, in compile_to_fn
    return self.compile_to_module().call
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 488, in compile_to_module
    code = self.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 440, in codegen
    self.scheduler.codegen()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/scheduler.py", line 1129, in codegen
    self.get_backend(device).codegen_nodes(node.get_nodes())
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1284, in codegen_nodes
    return self.codegen_node_schedule(node_schedule, numel, rnumel)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1336, in codegen_node_schedule
    src_code = kernel.codegen_kernel()
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 1035, in codegen_kernel
    "signature": dict(enumerate(map(signature_of, signature))),
  File "/data/users/ezyang/a/pytorch/torch/_inductor/codegen/triton.py", line 57, in signature_of
    return JITFunction._key_of(V.graph.sizevars.size_hint(arg.expr))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/sizevars.py", line 367, in size_hint
    return int(out)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/sympy-1.11.1-py3.9.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/trocr/modeling_trocr.py", line 953, in forward
    outputs = self.model.decoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/trocr/modeling_trocr.py", line 715, in forward
    layer_outputs = decoder_layer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/trocr/modeling_trocr.py", line 381, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 516, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised TypeError: Cannot convert symbols to int

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 40 ops
WARNING:__main__:Sequence Length not defined for XGLMForCausalLM. Choosing 128 arbitrarily
cuda train XGLMForCausalLM                    ERROR:common:compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 124, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 578, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 701, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 1318, in arange
    return fallback_arange(
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/data/users/ezyang/a/pytorch/torch/_inductor/overrides.py", line 37, in __torch_function__
    return func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 124, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 578, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 701, in forward
    attention_mask = self._prepare_decoder_attention_mask(


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 889, in forward
    outputs = self.model(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 332, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s0
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s0 to number

aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)

aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)


  target: aten.arange.default
  args[0]: s0
  kwargs: {'device': device(type='cpu'), 'pin_memory': False}

While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {device: cpu, pin_memory: False})
Original traceback:
Module stack: {}
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 124, in _make_causal_mask
    mask_cond = torch.arange(mask.size(-1))
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 578, in _prepare_decoder_attention_mask
    combined_attention_mask = _make_causal_mask(
 |   File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xglm/modeling_xglm.py", line 701, in forward
    attention_mask = self._prepare_decoder_attention_mask(


Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 21 ops
cuda train XLNetLMHeadModel                   [2022-12-12 07:27:26,608] torch._inductor.ir: [WARNING] DeviceCopy
[2022-12-12 07:27:26,631] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <built-in function sub>
  args[0]: 1024
  args[1]: 1
ERROR:common:compile_fx raised LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 1024
  args[1]: 1

While executing %sub : [#users=1] = call_function[target=operator.sub](args = (%sym_size_9, 1), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: sub expected 2 arguments, got 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 1024
  args[1]: 1

While executing %sub : [#users=1] = call_function[target=operator.sub](args = (%sym_size_9, 1), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xlnet/modeling_xlnet.py", line 1448, in forward
    transformer_outputs = self.transformer(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/xlnet/modeling_xlnet.py", line 1207, in forward
    pos_emb = self.relative_positional_encoding(qlen, klen, bsz=bsz)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 1024
  args[1]: 1

While executing %sub : [#users=1] = call_function[target=operator.sub](args = (%sym_size_9, 1), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 2 graph(s) covering 1009 ops
cuda train YituTechConvBert                   [2022-12-12 07:28:17,885] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <built-in function sub>
  args[0]: 520
  args[1]: 8
ERROR:common:compile_fx raised LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 520
  args[1]: 8

While executing %sub_1 : [#users=2] = call_function[target=operator.sub](args = (%add_2, 8), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: sub expected 2 arguments, got 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 520
  args[1]: 8

While executing %sub_1 : [#users=2] = call_function[target=operator.sub](args = (%add_2, 8), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 480, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 481, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/huggingface.py", line 483, in <graph break in forward_and_backward_pass>
    pred = mod(**cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/convbert/modeling_convbert.py", line 928, in forward
    generator_hidden_states = self.convbert(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/transformers/models/convbert/modeling_convbert.py", line 853, in forward
    hidden_states = self.encoder(
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 500, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: sub expected 2 arguments, got 0
  target: <built-in function sub>
  args[0]: 520
  args[1]: 8

While executing %sub_1 : [#users=2] = call_function[target=operator.sub](args = (%add_2, 8), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 3 graph(s) covering 827 ops
cuda train adv_inception_v3                   [2022-12-12 07:29:42,686] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <built-in function truediv>
  args[0]: 5683456
  args[1]: 32
ERROR:common:compile_fx raised LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 5683456
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: truediv expected 2 arguments, got 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 5683456
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 312, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 313, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 315, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 332, in catch_errors
    return callback(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 479, in _convert_frame
    result = inner_convert(frame, cache_size, hooks)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 103, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 398, in _compile
    out_code = transform_code_object(code, transform)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/convert_frame.py", line 385, in transform
    tracer.run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1676, in run
    super().run()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 528, in run
    and self.step()
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 496, in step
    getattr(self, inst.opname)(inst)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1738, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 476, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 547, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 628, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 5683456
  args[1]: 32

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 32), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 0 graph(s) covering 313 ops
cuda train beit_base_patch16_224              ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 312, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 313, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 315, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/timm/models/beit.py", line 341, in forward
    def forward(self, x):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2340, in forward
    return compiled_fn(full_args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 887, in g
    return f(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1903, in debug_compiled_function
    return compiled_function(*args)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1718, in compiled_function
    all_outs = CompiledFunction.apply(*args_with_synthetic_bases)
  File "/data/users/ezyang/a/pytorch/torch/autograd/function.py", line 419, in apply
    return super().apply(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1581, in forward
    fw_outs = call_func_with_args(
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 912, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 199, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/mw/cmwozk3udaucc75xv6rddb425ftttk25fh6vyeqgzrylvedhek2u.py", line 1424, in call
    return (buf360, primals_2, primals_7, primals_8, primals_13, primals_14, primals_19, primals_20, primals_25, primals_26, primals_31, primals_32, primals_37, primals_38, primals_43, primals_44, primals_49, primals_50, primals_55, primals_56, primals_61, primals_62, primals_67, primals_68, primals_73, primals_74, primals_76, primals_77, primals_80, primals_81, primals_86, primals_87, primals_90, primals_91, primals_96, primals_97, primals_100, primals_101, primals_106, primals_107, primals_110, primals_111, primals_116, primals_117, primals_120, primals_121, primals_126, primals_127, primals_130, primals_131, primals_136, primals_137, primals_140, primals_141, primals_146, primals_147, primals_150, primals_151, primals_156, primals_157, primals_160, primals_161, primals_166, primals_167, primals_170, primals_171, primals_176, primals_177, primals_180, primals_181, primals_186, primals_187, primals_190, primals_191, primals_196, primals_197, primals_224, buf3, buf7, buf8, as_strided(buf15, (96, 197, 64), (12608, 64, 1)), as_strided(primals_201, (38809, ), (1, )), buf20, as_strided(buf23, (1576, 768), (768, 1)), buf24, buf28, as_strided(buf30, (8, 197, 3072), (605184, 3072, 1)), buf32, buf37, as_strided(buf44, (96, 197, 64), (12608, 64, 1)), as_strided(primals_203, (38809, ), (1, )), buf49, as_strided(buf52, (1576, 768), (768, 1)), buf53, buf57, as_strided(buf59, (8, 197, 3072), (605184, 3072, 1)), buf61, buf66, as_strided(buf73, (96, 197, 64), (12608, 64, 1)), as_strided(primals_205, (38809, ), (1, )), buf78, as_strided(buf81, (1576, 768), (768, 1)), buf82, buf86, as_strided(buf88, (8, 197, 3072), (605184, 3072, 1)), buf90, buf95, as_strided(buf102, (96, 197, 64), (12608, 64, 1)), as_strided(primals_207, (38809, ), (1, )), buf107, as_strided(buf110, (1576, 768), (768, 1)), buf111, buf115, as_strided(buf117, (8, 197, 3072), (605184, 3072, 1)), buf119, buf124, as_strided(buf131, (96, 197, 64), (12608, 64, 1)), as_strided(primals_209, (38809, ), (1, )), buf136, as_strided(buf139, (1576, 768), (768, 1)), buf140, buf144, as_strided(buf146, (8, 197, 3072), (605184, 3072, 1)), buf148, buf153, as_strided(buf160, (96, 197, 64), (12608, 64, 1)), as_strided(primals_211, (38809, ), (1, )), buf165, as_strided(buf168, (1576, 768), (768, 1)), buf169, buf173, as_strided(buf175, (8, 197, 3072), (605184, 3072, 1)), buf177, buf182, as_strided(buf189, (96, 197, 64), (12608, 64, 1)), as_strided(primals_213, (38809, ), (1, )), buf194, as_strided(buf197, (1576, 768), (768, 1)), buf198, buf202, as_strided(buf204, (8, 197, 3072), (605184, 3072, 1)), buf206, buf211, as_strided(buf218, (96, 197, 64), (12608, 64, 1)), as_strided(primals_215, (38809, ), (1, )), buf223, as_strided(buf226, (1576, 768), (768, 1)), buf227, buf231, as_strided(buf233, (8, 197, 3072), (605184, 3072, 1)), buf235, buf240, as_strided(buf247, (96, 197, 64), (12608, 64, 1)), as_strided(primals_217, (38809, ), (1, )), buf252, as_strided(buf255, (1576, 768), (768, 1)), buf256, buf260, as_strided(buf262, (8, 197, 3072), (605184, 3072, 1)), buf264, buf269, as_strided(buf276, (96, 197, 64), (12608, 64, 1)), as_strided(primals_219, (38809, ), (1, )), buf281, as_strided(buf284, (1576, 768), (768, 1)), buf285, buf289, as_strided(buf291, (8, 197, 3072), (605184, 3072, 1)), buf293, buf298, as_strided(buf305, (96, 197, 64), (12608, 64, 1)), as_strided(primals_221, (38809, ), (1, )), buf310, as_strided(buf313, (1576, 768), (768, 1)), buf314, buf318, as_strided(buf320, (8, 197, 3072), (605184, 3072, 1)), buf322, buf327, as_strided(buf334, (96, 197, 64), (12608, 64, 1)), as_strided(primals_223, (38809, ), (1, )), buf339, as_strided(buf342, (1576, 768), (768, 1)), buf343, buf347, as_strided(buf349, (8, 197, 3072), (605184, 3072, 1)), buf351, buf358, as_strided(primals_198, (1000, 768), (768, 1)), buf361, as_strided(primals_194, (768, 3072), (3072, 1)), as_strided(primals_192, (3072, 768), (768, 1)), buf362, as_strided(primals_188, (768, 768), (768, 1)), as_strided(buf340, (96, 64, 197), (12608, 1, 64)), as_strided(buf335, (96, 197, 64), (12608, 1, 197)), as_strided(primals_71, (2304, 768), (768, 1)), buf363, as_strided(primals_184, (768, 3072), (3072, 1)), as_strided(primals_182, (3072, 768), (768, 1)), buf364, as_strided(primals_178, (768, 768), (768, 1)), as_strided(buf311, (96, 64, 197), (12608, 1, 64)), as_strided(buf306, (96, 197, 64), (12608, 1, 197)), as_strided(primals_65, (2304, 768), (768, 1)), buf365, as_strided(primals_174, (768, 3072), (3072, 1)), as_strided(primals_172, (3072, 768), (768, 1)), buf366, as_strided(primals_168, (768, 768), (768, 1)), as_strided(buf282, (96, 64, 197), (12608, 1, 64)), as_strided(buf277, (96, 197, 64), (12608, 1, 197)), as_strided(primals_59, (2304, 768), (768, 1)), buf367, as_strided(primals_164, (768, 3072), (3072, 1)), as_strided(primals_162, (3072, 768), (768, 1)), buf368, as_strided(primals_158, (768, 768), (768, 1)), as_strided(buf253, (96, 64, 197), (12608, 1, 64)), as_strided(buf248, (96, 197, 64), (12608, 1, 197)), as_strided(primals_53, (2304, 768), (768, 1)), buf369, as_strided(primals_154, (768, 3072), (3072, 1)), as_strided(primals_152, (3072, 768), (768, 1)), buf370, as_strided(primals_148, (768, 768), (768, 1)), as_strided(buf224, (96, 64, 197), (12608, 1, 64)), as_strided(buf219, (96, 197, 64), (12608, 1, 197)), as_strided(primals_47, (2304, 768), (768, 1)), buf371, as_strided(primals_144, (768, 3072), (3072, 1)), as_strided(primals_142, (3072, 768), (768, 1)), buf372, as_strided(primals_138, (768, 768), (768, 1)), as_strided(buf195, (96, 64, 197), (12608, 1, 64)), as_strided(buf190, (96, 197, 64), (12608, 1, 197)), as_strided(primals_41, (2304, 768), (768, 1)), buf373, as_strided(primals_134, (768, 3072), (3072, 1)), as_strided(primals_132, (3072, 768), (768, 1)), buf374, as_strided(primals_128, (768, 768), (768, 1)), as_strided(buf166, (96, 64, 197), (12608, 1, 64)), as_strided(buf161, (96, 197, 64), (12608, 1, 197)), as_strided(primals_35, (2304, 768), (768, 1)), buf375, as_strided(primals_124, (768, 3072), (3072, 1)), as_strided(primals_122, (3072, 768), (768, 1)), buf376, as_strided(primals_118, (768, 768), (768, 1)), as_strided(buf137, (96, 64, 197), (12608, 1, 64)), as_strided(buf132, (96, 197, 64), (12608, 1, 197)), as_strided(primals_29, (2304, 768), (768, 1)), buf377, as_strided(primals_114, (768, 3072), (3072, 1)), as_strided(primals_112, (3072, 768), (768, 1)), buf378, as_strided(primals_108, (768, 768), (768, 1)), as_strided(buf108, (96, 64, 197), (12608, 1, 64)), as_strided(buf103, (96, 197, 64), (12608, 1, 197)), as_strided(primals_23, (2304, 768), (768, 1)), buf379, as_strided(primals_104, (768, 3072), (3072, 1)), as_strided(primals_102, (3072, 768), (768, 1)), buf380, as_strided(primals_98, (768, 768), (768, 1)), as_strided(buf79, (96, 64, 197), (12608, 1, 64)), as_strided(buf74, (96, 197, 64), (12608, 1, 197)), as_strided(primals_17, (2304, 768), (768, 1)), buf381, as_strided(primals_94, (768, 3072), (3072, 1)), as_strided(primals_92, (3072, 768), (768, 1)), buf382, as_strided(primals_88, (768, 768), (768, 1)), as_strided(buf50, (96, 64, 197), (12608, 1, 64)), as_strided(buf45, (96, 197, 64), (12608, 1, 197)), as_strided(primals_11, (2304, 768), (768, 1)), buf383, as_strided(primals_84, (768, 3072), (3072, 1)), as_strided(primals_82, (3072, 768), (768, 1)), buf384, as_strided(primals_78, (768, 768), (768, 1)), as_strided(buf21, (96, 64, 197), (12608, 1, 64)), as_strided(buf16, (96, 197, 64), (12608, 1, 197)), as_strided(primals_5, (2304, 768), (768, 1)), s0, 14, 14, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, 197, 12, 12*s0, 12, 768, 197, 197*s0, s0, 196, 196, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
FAIL
Dynamo produced 1 graph(s) covering 513 ops
cuda train botnet26t_256                      [2022-12-12 07:30:48,793] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <built-in function truediv>
  args[0]: 3145728
  args[1]: 24
ERROR:common:compile_fx raised LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 3145728
  args[1]: 24

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 24), kwargs = {})
Original traceback:
None

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 272, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 221, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/lowering.py", line 991, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 3011, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/ir.py", line 2406, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
TypeError: truediv expected 2 arguments, got 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/output_graph.py", line 623, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.fake_example_inputs())
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 917, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 394, in compile_fx
    return aot_autograd(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/optimizations/training.py", line 78, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2326, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 2023, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1293, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config)
  File "/data/users/ezyang/a/pytorch/torch/_functorch/aot_autograd.py", line 1540, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 369, in fw_compiler
    return inner_compile(
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/debug_utils.py", line 494, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/debug.py", line 224, in inner
    return fn(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/compile_fx.py", line 135, in compile_fx_inner
    graph.run(*example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 147, in run
    return super().run(*args)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 345, in run_node
    result = super().run_node(n)
  File "/data/users/ezyang/a/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/data/users/ezyang/a/pytorch/torch/_inductor/graph.py", line 275, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: truediv expected 2 arguments, got 0
  target: <built-in function truediv>
  args[0]: 3145728
  args[1]: 24

While executing %truediv : [#users=2] = call_function[target=operator.truediv](args = (%sym_numel, 24), kwargs = {})
Original traceback:
None

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(model_copy, example_inputs)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/eval_frame.py", line 211, in _fn
    return fn(*args, **kwargs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/common.py", line 1040, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 312, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 313, in <graph break in forward_and_backward_pass>
    self.optimizer_zero_grad(mod)
  File "/data/users/ezyang/a/pytorch/benchmarks/dynamo/timm_models.py", line 315, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/data/users/ezyang/a/pytorch/torch/nn/modules/module.py", line 1482, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ezyang/local/a/pytorch-env/lib/python3.9/site-packages/timm/models/byobnet.py", line 1559, in forward
    x = self.forward_features(x)
  File "/data/users/ezyang/a/pytorch/torch/_dynamo/